In [3]:
!pip install tensorflow pandas scikit-learn

Defaulting to user installation because normal site-packages is not writeable


In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [10]:
# Load the dataset
data = pd.read_csv("C:/Users/Asus/Downloads/myntra202305041052.csv")
data =data.iloc[:5000,:]

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           5000 non-null   int64  
 1   name         5000 non-null   object 
 2   img          5000 non-null   object 
 3   asin         5000 non-null   object 
 4   price        5000 non-null   float64
 5   mrp          5000 non-null   float64
 6   rating       5000 non-null   float64
 7   ratingTotal  5000 non-null   int64  
 8   discount     5000 non-null   int64  
 9   seller       5000 non-null   object 
 10  purl         5000 non-null   object 
dtypes: float64(3), int64(3), object(5)
memory usage: 429.8+ KB


In [12]:
# Normalization
scaler = MinMaxScaler()
data['rating_normalized'] = scaler.fit_transform(data[['rating']])
data['score'] = data['rating_normalized']

In [27]:
# user & id mapping
user_ids = data['id'].unique().tolist()
item_ids = data['asin'].unique().tolist()

user_id_map = {id: idx for idx, id in enumerate(user_ids)}
item_id_map = {id: idx for idx, id in enumerate(item_ids)}

data['user'] = data['id'].map(user_id_map)
data['item'] = data['asin'].map(item_id_map)

In [28]:
# Split the data into training and testing
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [30]:
num_users = len(user_ids)
num_items = len(item_ids)
embedding_size = 50
#define model
user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

user_embedding = tf.keras.layers.Embedding(input_dim=num_users, output_dim=embedding_size, name='user_embedding')(user_input)
item_embedding = tf.keras.layers.Embedding(input_dim=num_items, output_dim=embedding_size, name='item_embedding')(item_input)

user_vecs = tf.keras.layers.Flatten()(user_embedding)
item_vecs = tf.keras.layers.Flatten()(item_embedding)

concat = tf.keras.layers.Concatenate()([user_vecs, item_vecs])

dense = tf.keras.layers.Dense(128, activation='relu')(concat)
dense = tf.keras.layers.Dense(64, activation='relu')(dense)
output = tf.keras.layers.Dense(1)(dense)

model = tf.keras.models.Model([user_input, item_input], output)
model.compile(optimizer='adam', loss='mse')

In [31]:
# Training
train_user_data = train_data['user'].values
train_item_data = train_data['item'].values
train_score_data = train_data['score'].values

model.fit([train_user_data, train_item_data], train_score_data, epochs=7, batch_size=64, validation_split=0.1)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.src.callbacks.History at 0x2ea51003fd0>

In [36]:
def recommend_products(model, user_id, user_id_map, item_id_map, num_recommendations=10):
    user = user_id_map[user_id]
    item = np.array(list(item_id_map.values()))
    user_array = np.array([user] * len(item))
    scores = model.predict([user_array, item]).flatten()
    
    indices = scores.argsort()[-num_recommendations:1][::-1]
    to_items = [list(item_id_map.keys())[idx] for idx in indices]
    return to_items

#Example recommendation
user_id = 313
recommended_item_ids = recommend_products(model, user_id, user_id_map, item_id_map, num_recommendations=10)


recommendations = data[data['asin'].isin(recommended_item_ids)][['name', 'score', 'purl']]
recommendations =recommendations.iloc[:11,:]
for idx, row in recommendations.iterrows():
    print(f"Product Name: {row['name']}, Score: {row['score']}, URL: {row['purl']}")


Product Name: Men Solid Oversized Cotton, Score: 0.82, URL: https://www.myntra.com/tshirts/difference-of-opinion/difference-of-opinion-men-mint-green-round-neck-drop-shoulder-sleeves-cotton-loose-t-shirt/16407468/buy
Product Name: Men Cotton Pure Cotton T-shirt, Score: 0.8400000000000001, URL: https://www.myntra.com/tshirts/roadster/roadster-men-black-cotton-pure-cotton-t-shirt/1996777/buy
Product Name: Women Pure Cotton T-shirt, Score: 0.9, URL: https://www.myntra.com/tshirts/dillinger/dillinger-women-navy-blue-typography-printed-pure-cotton-t-shirt/15598180/buy
Product Name: Typography Print T-shirt, Score: 0.8, URL: https://www.myntra.com/tshirts/huetrap/huetrap-men-beige--black-typography-printed-sustainable-t-shirt/11148764/buy
Product Name: Printed Round Neck Pure Cotton T-shirt, Score: 0.8400000000000001, URL: https://www.myntra.com/tshirts/roadster/roadster-men-green--white-printed-round-neck-pure-cotton-t-shirt/11545192/buy
Product Name: Boys Pack of 5 T-shirt, Score: 0.82, UR

In [33]:
#Evaluation
import numpy as np
from sklearn.metrics import mean_squared_error

def predict_scores(model, test_data, user_id_map, item_id_map):
    user_internal_ids = test_data['id'].map(user_id_map).values
    item_internal_ids = test_data['asin'].map(item_id_map).values
    
    predicted_scores = model.predict([user_internal_ids, item_internal_ids]).flatten()
    return predicted_scores

predicted_scores = predict_scores(model, test_data, user_id_map, item_id_map)
actual_scores = test_data['score'].values

#RMSE
rmse = np.sqrt(mean_squared_error(actual_scores, predicted_scores))
print(f'RMSE: {rmse}')
# MAE
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(actual_scores, predicted_scores)
print(f'MAE: {mae}')


RMSE: 0.44508917287956734
MAE: 0.4185788988259435
