In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import OneHotEncoder
import joblib

# Load the dataset from the CSV file
file_path = 'Proddata.csv'  # Replace with the actual path to your CSV file
data = pd.read_csv(file_path)

# Convert 'Intensity' and 'Quantity' to numeric
data['Intensity'] = pd.to_numeric(data['Intensity'], errors='coerce')
data['Quantity'] = pd.to_numeric(data['Quantity'], errors='coerce')

# Feature Engineering: Create a new feature combining "Intensity" and "Quantity"
data['Intensity_Quantity'] = data['Intensity'] * data['Quantity']  # Adjust the combination method as needed

# Feature Engineering: Create another new feature 'Total_Intensity'
data['Total_Intensity'] = data['Intensity'] + data['Quantity']

# Split the data into features (X) and target variables (y)
X = data[['STYLE', 'Intensity_Quantity', 'Total_Intensity']]  # Include other relevant features as needed
y_factory = data['FACTORY']
y_team = data['TEAM']
y_defect = data['DefectName']

# Perform one-hot encoding for categorical variables
encoder = OneHotEncoder()
X_encoded = encoder.fit_transform(X)

# Save the encoder
joblib.dump(encoder, 'encoder.joblib')

# Split the data into training and testing sets
X_train, X_test, y_factory_train, y_factory_test, y_team_train, y_team_test, y_defect_train, y_defect_test = train_test_split(
    X_encoded, y_factory, y_team, y_defect, test_size=0.2, random_state=42
)

# Train a Random Forest Classifier for each prediction task
factory_model = RandomForestClassifier()
team_model = RandomForestClassifier()
defect_model = RandomForestClassifier()

factory_model.fit(X_train, y_factory_train)
team_model.fit(X_train, y_team_train)
defect_model.fit(X_train, y_defect_train)

# Save the trained models to disk
joblib.dump(factory_model, 'factory_model.joblib')
joblib.dump(team_model, 'team_model.joblib')
joblib.dump(defect_model, 'defect_model.joblib')

# Make predictions
y_factory_pred = factory_model.predict(X_test)
y_team_pred = team_model.predict(X_test)
y_defect_pred = defect_model.predict(X_test)

# Evaluate the models
factory_accuracy = accuracy_score(y_factory_test, y_factory_pred)
team_accuracy = accuracy_score(y_team_test, y_team_pred)
defect_accuracy = accuracy_score(y_defect_test, y_defect_pred)

print(f'Factory Prediction Accuracy: {factory_accuracy}')
print(f'Team Prediction Accuracy: {team_accuracy}')
print(f'Defect Prediction Accuracy: {defect_accuracy}')

# You can also print classification reports for more detailed evaluation
print('Factory Classification Report:')
print(classification_report(y_factory_test, y_factory_pred))


Factory Prediction Accuracy: 0.9149600156036669
Team Prediction Accuracy: 0.5266237565827969
Defect Prediction Accuracy: 0.2763799492880827
Factory Classification Report:
              precision    recall  f1-score   support

        CMCD       0.98      0.99      0.98       535
        CMCG       0.87      0.99      0.93      1465
         CME       0.93      0.92      0.92      1239
        CMGM       0.86      0.81      0.83       723
        CMPK       0.94      0.67      0.78       513
         CMW       1.00      1.00      1.00       652

    accuracy                           0.91      5127
   macro avg       0.93      0.89      0.91      5127
weighted avg       0.92      0.91      0.91      5127

