In [80]:
#importing the necessary libraries

In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix

In [82]:
#importing the necessary libraries

In [83]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [84]:
#importing the dataset

In [85]:
historic_data = pd.read_csv('historic.csv')
historic_data.head()

Unnamed: 0,item_no,category,main_promotion,color,stars,success_indicator
0,739157,Tunic,Catalog,Green,3.1,flop
1,591846,Hoodie,Category_Highlight,Red,1.5,flop
2,337574,Sweatshirt,Catalog,Red,4.4,top
3,401933,Polo-Shirt,Category_Highlight,Blue,3.1,flop
4,812151,Hoodie,Category_Highlight,Green,4.1,top


In [86]:
prediction_data = pd.read_csv('prediction_input.csv')
prediction_data.head()

Unnamed: 0,item_no,category,main_promotion,color,stars
0,405901,Sweatshirt,Catalog,Blue,3.1
1,644275,Polo-Shirt,Frontpage_Header,Yellow,2.6
2,533070,Tunic,Catalog,Green,2.7
3,829436,Polo-Shirt,Catalog,Yellow,2.6
4,801722,Tunic,Catalog,Yellow,4.9


In [87]:
# Preprocessing the data

In [88]:
X_historic = historic_data.drop('success_indicator', axis=1)
y_historic = historic_data['success_indicator']

In [89]:
#preprocessing for numeric features

In [90]:
numeric_features = ['stars']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [91]:
# Preprocessing for categorical features

In [92]:
categorical_features = ['category', 'main_promotion', 'color']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [93]:
#combining the above preprocesses

In [94]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])


In [95]:
# Splitting the historic data into train and test sets

In [96]:
X_train, X_test, y_train, y_test = train_test_split(X_historic, y_historic, test_size=0.2, random_state=42)

In [97]:
# Logistic Regression model

In [98]:
logistic_model = Pipeline(steps=[('preprocessor', preprocessor),
                                 ('classifier', LogisticRegression())])
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)

In [99]:
# Random Forest model

In [100]:
rf_model = Pipeline(steps=[('preprocessor', preprocessor),
                            ('classifier', RandomForestClassifier())])
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

In [101]:
#Decision tree model

In [102]:
dt_model = Pipeline(steps=[('preprocessor', preprocessor),
                            ('classifier', DecisionTreeClassifier())])
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

In [103]:
# Evaluation metrics for logistic regression model

In [104]:
print("Logistic Regression Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred_logistic))
print("Classification Report:")
print(classification_report(y_test, y_pred_logistic))

Logistic Regression Model Evaluation:
Accuracy: 0.818125
Classification Report:
              precision    recall  f1-score   support

        flop       0.80      0.66      0.72       571
         top       0.83      0.91      0.87      1029

    accuracy                           0.82      1600
   macro avg       0.81      0.78      0.79      1600
weighted avg       0.82      0.82      0.81      1600



In [105]:
# Evaluation metrics for random forest model

In [106]:
print("\nRandom Forest Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Classification Report:")
print(classification_report(y_test, y_pred_rf))


Random Forest Model Evaluation:
Accuracy: 0.8225
Classification Report:
              precision    recall  f1-score   support

        flop       0.79      0.69      0.74       571
         top       0.84      0.90      0.87      1029

    accuracy                           0.82      1600
   macro avg       0.81      0.79      0.80      1600
weighted avg       0.82      0.82      0.82      1600



In [107]:
# Evaluation metrics for decision tree model

In [108]:
print("\ndecisiontree Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Classification Report:")
print(classification_report(y_test, y_pred_dt))


decisiontree Model Evaluation:
Accuracy: 0.8075
Classification Report:
              precision    recall  f1-score   support

        flop       0.73      0.72      0.73       571
         top       0.85      0.86      0.85      1029

    accuracy                           0.81      1600
   macro avg       0.79      0.79      0.79      1600
weighted avg       0.81      0.81      0.81      1600



In [None]:
#making the predictions using the random forest model

In [109]:
selected_model = rf_model
prediction_input = pd.read_csv('prediction_input.csv')
prediction_results = selected_model.predict(prediction_input)


In [112]:
# Append predicted success to prediction_data
prediction_data['predicted_success'] = prediction_results

# Save the prediction output with only the item_no and predicted_success columns
prediction_data[['item_no','category','color','stars' ,'predicted_success']].to_csv('prediction_output.csv', index=False)

In [114]:
prediction_data.head()

Unnamed: 0,item_no,category,main_promotion,color,stars,predicted_success
0,405901,Sweatshirt,Catalog,Blue,3.1,top
1,644275,Polo-Shirt,Frontpage_Header,Yellow,2.6,flop
2,533070,Tunic,Catalog,Green,2.7,flop
3,829436,Polo-Shirt,Catalog,Yellow,2.6,flop
4,801722,Tunic,Catalog,Yellow,4.9,top
