In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load the data from the CSV file
df = pd.read_csv('Proddata.csv')

# Extract relevant features (Style) and target variables (Factory, Team, Defect)
X = df[['STYLE']]
y_factory = df['FACTORY']
y_team = df['TEAM']
y_defect = df['DefectName']

# One-hot encode the 'STYLE' column
column_transformer = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(), ['STYLE'])],
    remainder='passthrough'
)

X_encoded = column_transformer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_factory_train, y_factory_test, y_team_train, y_team_test, y_defect_train, y_defect_test = train_test_split(
    X_encoded, y_factory, y_team, y_defect, test_size=0.2, random_state=42
)

# Create and train a RandomForestClassifier for Factory prediction
factory_model = RandomForestClassifier(n_estimators=100, random_state=42)
factory_model.fit(X_train, y_factory_train)

# Create and train a RandomForestClassifier for Team prediction
team_model = RandomForestClassifier(n_estimators=100, random_state=42)
team_model.fit(X_train, y_team_train)

# Create and train a RandomForestClassifier for Defect prediction
defect_model = RandomForestClassifier(n_estimators=100, random_state=42)
defect_model.fit(X_train, y_defect_train)

# Make predictions
style_to_predict = '11 LINUL6 6898048G2-AW22'
style_input = column_transformer.transform(pd.DataFrame({'STYLE': [style_to_predict]}))
predicted_factory = factory_model.predict(style_input)[0]
predicted_team = team_model.predict(style_input)[0]
predicted_defect = defect_model.predict(style_input)[0]

# Print the predictions
print(f"For the style '{style_to_predict}':")
print(f"Predicted Best Factory: {predicted_factory}")
print(f"Predicted Team: {predicted_team}")
print(f"Predicted Most Common Defect: {predicted_defect}")

# Save the models to files
joblib.dump(factory_model, 'factory_model.joblib')
joblib.dump(team_model, 'team_model.joblib')
joblib.dump(defect_model, 'defect_model.joblib')


For the style '11 LINUL6 6898048G2-AW22':
Predicted Best Factory: CMCD
Predicted Team: SERIYUNKA3
Predicted Most Common Defect: 599.21 - HIGH LOW


['defect_model.joblib']

In [8]:
import pandas as pd

# Load the data from the CSV file
df = pd.read_csv('Proddata.csv')

# Extract unique styles
unique_styles = df['STYLE'].unique()

# Create a DataFrame with unique styles
unique_styles_df = pd.DataFrame({'STYLE': unique_styles})

# Save the DataFrame to a new CSV file
unique_styles_df.to_csv('unique_styles.csv', index=False)
