In [None]:
import pandas as pd
import numpy as np
import random

# Set seed for reproducibility
np.random.seed(42)

# Define number of observations
n = 100

# Generate Independent Variables (Process Parameters)
# Normal distribution around set points to simulate real process control
data = {
    'Batch_ID': [f'B-{1001 + i}' for i in range(n)],
    'Temperature_C': np.random.normal(180, 5, n),       # Target 180C, std dev 5
    'Pressure_PSI': np.random.normal(48, 2, n),         # Target 48 PSI, std dev 2
    'Concentration_M': np.random.uniform(1.15, 1.25, n), # Uniform variation
    'Level_m': np.random.normal(2.5, 0.05, n),          # Tight control on level
    'Catalyst_Ratio': np.random.uniform(0.80, 0.95, n)   # Mix ratio variation
}

df = pd.DataFrame(data)

# Create the Target Variable (Product Purity)
# We simulate a "sweet spot" (parabola): Purity drops if Temp is too high OR too low.
# This non-linear relationship justifies using Polynomial Regression later.
# Formula: Purity = Base - (Temp deviation)^2 - (Pressure deviation)^2 + Noise
df['Product_Purity_Pct'] = (
    98.5
    - 0.05 * (df['Temperature_C'] - 180)**2
    - 0.1 * (df['Pressure_PSI'] - 48)**2
    + np.random.normal(0, 0.5, n) # Sensor noise
)

# Clip purity to realistic bounds (0-100%)
df['Product_Purity_Pct'] = df['Product_Purity_Pct'].clip(0, 100)

# Display first few rows to verify
print(df.head())

# Save to CSV for the next phase
# df.to_csv('quality_control_data.csv', index=False)

  Batch_ID  Temperature_C  Pressure_PSI  Concentration_M   Level_m  \
0   B-1001     182.483571     45.169259         1.237737  2.492411   
1   B-1002     179.308678     47.158709         1.224077  2.529416   
2   B-1003     183.238443     47.314571         1.219702  2.514050   
3   B-1004     187.615149     46.395445         1.220248  2.468865   
4   B-1005     178.829233     47.677429         1.185949  2.489594   

   Catalyst_Ratio  Product_Purity_Pct  
0        0.946159           97.125155  
1        0.947932           98.008890  
2        0.904724           97.875128  
3        0.880414           94.825394  
4        0.846429           98.144235  
