In [25]:
#importing the basic required libraries

In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.utils import check_array
from sklearn.preprocessing import LabelEncoder


In [27]:
#loading the dataset

In [28]:
historic_data = pd.read_csv('historic.csv')
prediction_input = pd.read_csv('prediction_input.csv')

In [29]:
historic_data.head()

Unnamed: 0,item_no,category,main_promotion,color,stars,success_indicator
0,739157,Tunic,Catalog,Green,3.1,flop
1,591846,Hoodie,Category_Highlight,Red,1.5,flop
2,337574,Sweatshirt,Catalog,Red,4.4,top
3,401933,Polo-Shirt,Category_Highlight,Blue,3.1,flop
4,812151,Hoodie,Category_Highlight,Green,4.1,top


In [30]:
prediction_input.head()

Unnamed: 0,item_no,category,main_promotion,color,stars
0,405901,Sweatshirt,Catalog,Blue,3.1
1,644275,Polo-Shirt,Frontpage_Header,Yellow,2.6
2,533070,Tunic,Catalog,Green,2.7
3,829436,Polo-Shirt,Catalog,Yellow,2.6
4,801722,Tunic,Catalog,Yellow,4.9


In [31]:
#seperating the features and the target variables

In [32]:
X_historic = historic_data.drop('success_indicator', axis=1)
y_historic = historic_data['success_indicator']

In [33]:
#preprocessing for numeric features

In [34]:
numeric_features = ['stars']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [35]:
#preprocessing for categorical features

In [36]:
categorical_features = ['category', 'main_promotion', 'color']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [37]:
#combining the above processes

In [38]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [39]:
#preprocessing on historic data

In [40]:
X_historic_preprocessed = preprocessor.fit_transform(X_historic)

In [41]:
#splitting into training and testing

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X_historic_preprocessed, y_historic, test_size=0.2, random_state=42)

In [43]:
#building the model using ann
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [44]:
#compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [45]:
# Convert sparse matrix to dense array
X_train_dense = check_array(X_train, accept_sparse=True)

# Splitting the historic data into train and validation sets
X_train_split, X_val, y_train_split, y_val = train_test_split(X_train_dense, y_train, test_size=0.2, random_state=42)

In [46]:
# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Encode target variable
y_train_split_encoded = label_encoder.fit_transform(y_train_split)
y_val_encoded = label_encoder.transform(y_val)

In [47]:
# Training the model
history = model.fit(X_train_split, y_train_split_encoded, epochs=20, batch_size=64, validation_data=(X_val, y_val_encoded))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [48]:
# Encoding target variable for test data
y_test_encoded = label_encoder.transform(y_test)

# Evaluating the model
loss, accuracy = model.evaluate(X_test, y_test_encoded)
print("Test Accuracy:", accuracy)


Test Accuracy: 0.8431249856948853
