In [1]:
# =============================================================
# 📘 Capstone Project 1: Manufacturing Equipment Output Prediction
# Linear Regression Model (Fully Compatible with app.py & main.py)
# =============================================================

# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import pickle
import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# -------------------------------------------------------------
# Step 2: Load Dataset
# -------------------------------------------------------------
# Use your original dataset file path
df = pd.read_csv("D:\\MUFG\\manufacturing_dataset_1000_samples project1 Capstone.csv")

# Remove unwanted timestamp if present
if "Timestamp" in df.columns:
    df.drop("Timestamp", axis=1, inplace=True)

# -------------------------------------------------------------
# Step 3: Handle Missing Values
# -------------------------------------------------------------
df["Material_Viscosity"] = df["Material_Viscosity"].fillna(df["Material_Viscosity"].mean())
df["Ambient_Temperature"] = df["Ambient_Temperature"].fillna(df["Ambient_Temperature"].mean())
df["Operator_Experience"] = df["Operator_Experience"].fillna(df["Operator_Experience"].mean())

# -------------------------------------------------------------
# Step 4: Encode Categorical Features (Label Encoding)
# -------------------------------------------------------------
le = LabelEncoder()

for col in ["Shift", "Machine_Type", "Material_Grade", "Day_of_Week"]:
    df[col] = le.fit_transform(df[col])

# -------------------------------------------------------------
# Step 5: Define Features and Target
# -------------------------------------------------------------
X = df.drop("Parts_Per_Hour", axis=1)
y = df["Parts_Per_Hour"]

# -------------------------------------------------------------
# Step 6: Split Data
# -------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------------------------------------------------------------
# Step 7: Scale Numerical Columns
# -------------------------------------------------------------
numeric_features = [
    "Injection_Temperature",
    "Injection_Pressure",
    "Cycle_Time",
    "Cooling_Time",
    "Material_Viscosity",
    "Ambient_Temperature",
    "Machine_Age",
    "Operator_Experience",
    "Maintenance_Hours",
    "Temperature_Pressure_Ratio",
    "Total_Cycle_Time",
    "Efficiency_Score",
    "Machine_Utilization"
]

scaler = StandardScaler()
X_train[numeric_features] = scaler.fit_transform(X_train[numeric_features])
X_test[numeric_features] = scaler.transform(X_test[numeric_features])

# -------------------------------------------------------------
# Step 8: Train Linear Regression Model
# -------------------------------------------------------------
model = LinearRegression()
model.fit(X_train, y_train)

# -------------------------------------------------------------
# Step 9: Evaluate Model
# -------------------------------------------------------------
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\n📊 Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.3f}")
print(f"R² Score: {r2:.3f}")

# -------------------------------------------------------------
# Step 10: Save Model, Scaler, and Feature Columns
# -------------------------------------------------------------
with open("linear_regression_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

with open("feature_columns.json", "w") as f:
    json.dump(X.columns.tolist(), f)

print("\n✅ Model, scaler, and feature columns saved successfully!")
print("\n💡 Columns used for training:")
print(list(X.columns))



📊 Model Evaluation:
Mean Squared Error (MSE): 13.716
R² Score: 0.895

✅ Model, scaler, and feature columns saved successfully!

💡 Columns used for training:
['Injection_Temperature', 'Injection_Pressure', 'Cycle_Time', 'Cooling_Time', 'Material_Viscosity', 'Ambient_Temperature', 'Machine_Age', 'Operator_Experience', 'Maintenance_Hours', 'Shift', 'Machine_Type', 'Material_Grade', 'Day_of_Week', 'Temperature_Pressure_Ratio', 'Total_Cycle_Time', 'Efficiency_Score', 'Machine_Utilization']
