# WITH STITCHING UNIT

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv("manufacturing_data.csv")

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.isnull().sum()


In [None]:
df.describe()

In [None]:
df.dtypes

# feature selection


In [None]:
df=df.drop(columns=["idleMinutes", "idleWorkers","recordDate"])

#keeping only stitching unit 
df=df[df['productionDept']== 'Stitching Unit']
df.head(10)

# Encoding


In [None]:
from sklearn.preprocessing import LabelEncoder

# Create a LabelEncoder object
le = LabelEncoder()

# List of all categorical columns
categorical_cols = ['fiscalQuarter', 'productionDept', 'dayOfWeek', 'team', 'styleChangeCount']

# Apply label encoding to each categorical column
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

# Check the updated dataset
df.head()


In [None]:
for col in categorical_cols:
    plt.figure(figsize=(6,3))
    df[col].fillna("Missing").value_counts().nlargest(12).plot(kind="bar")
    plt.title(f"Counts for {col}")
    plt.tight_layout()
    plt.show()

#  Scaling and Splitting Data

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split


In [None]:
# Separate features and target
X = df.drop(columns=['efficiencyScore'])
y = df['efficiencyScore']
# Scale the Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
X_train.shape,X_test.shape

# Train Models


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error


Linear Regression

In [None]:

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

y_train_pred_lr = lr_model.predict(X_train)
y_test_pred_lr = lr_model.predict(X_test)

print("Linear Regression Results:")
print("Train R²:", r2_score(y_train, y_train_pred_lr))
print("Test R²:", r2_score(y_test, y_test_pred_lr))
print("Train MAE:", mean_absolute_error(y_train, y_train_pred_lr))
print("Test MAE:", mean_absolute_error(y_test, y_test_pred_lr))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred_lr)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred_lr)))



In [None]:
plt.figure(figsize=(6,6))
plt.scatter(y_test, y_test_pred_lr, alpha=0.6, color='royalblue')
plt.xlabel("Actual Efficiency Score")
plt.ylabel("Predicted Efficiency Score")
plt.title("Linear Regression: Actual vs Predicted Efficiency")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.show()


 Random Forest


In [None]:
# Random Forest
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)

y_train_pred_rf = rf_model.predict(X_train)
y_test_pred_rf = rf_model.predict(X_test)

print("\nRandom Forest Results:")
print("Train R²:", r2_score(y_train, y_train_pred_rf))
print("Test R²:", r2_score(y_test, y_test_pred_rf))
print("Train MAE:", mean_absolute_error(y_train, y_train_pred_rf))
print("Test MAE:", mean_absolute_error(y_test, y_test_pred_rf))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred_rf)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred_rf)))


In [None]:
plt.figure(figsize=(6,6))
plt.scatter(y_test, y_test_pred_rf, alpha=0.6, color='forestgreen')
plt.xlabel("Actual Efficiency Score")
plt.ylabel("Predicted Efficiency Score")
plt.title("Random Forest: Actual vs Predicted Efficiency")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.show()


Gradient Boosting

In [None]:

gb_model = GradientBoostingRegressor(random_state=42)
gb_model.fit(X_train, y_train)

y_train_pred_gb = gb_model.predict(X_train)
y_test_pred_gb = gb_model.predict(X_test)

print("\nGradient Boosting Results:")
print("Train R²:", r2_score(y_train, y_train_pred_gb))
print("Test R²:", r2_score(y_test, y_test_pred_gb))
print("Train MAE:", mean_absolute_error(y_train, y_train_pred_gb))
print("Test MAE:", mean_absolute_error(y_test, y_test_pred_gb))
print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred_gb)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred_gb)))


In [None]:
plt.figure(figsize=(6,6))
plt.scatter(y_test, y_test_pred_gb, alpha=0.6, color='darkorange')
plt.xlabel("Actual Efficiency Score")
plt.ylabel("Predicted Efficiency Score")
plt.title("Gradient Boosting: Actual vs Predicted Efficiency")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.show()


# Result comparison

In [None]:
# results_df = pd.DataFrame({
#     "Model": ["Linear Regression", "Random Forest", "Gradient Boosting"],
#     "R2 Score": [r2_lr, r2_rf, r2_gb],
#     "MSE": [mse_lr, mse_rf, mse_gb]
# })

# print("\nComparison of models:")
# print(results_df)



In [None]:

# Store R2 scores for comparison
r2_lr = r2_score(y_test, y_test_pred_lr)
r2_rf = r2_score(y_test, y_test_pred_rf)
r2_gb = r2_score(y_test, y_test_pred_gb)

results_df = pd.DataFrame({
    "Model": ["Linear Regression", "Random Forest", "Gradient Boosting"],
    "R2 Score": [r2_lr, r2_rf, r2_gb]
})

print("\nModel Comparison:")
print(results_df)

# Plot comparison
results_df.set_index("Model")["R2 Score"].plot(
    kind="bar", color=["royalblue", "green", "orange"], figsize=(6,4), title="Model Comparison (R2 Score)"
)
plt.ylabel("R2 Score")
plt.show()
