In [5]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split


In [43]:
data_path = '/content/drive/MyDrive/dataset/Final-Fashion-Dataset.csv'
data = pd.read_csv(data_path)


In [45]:
df = data[['user_id', 'id', 'gender', 'masterCategory', 'subCategory', 'articleType',
           'baseColour', 'season', 'year', 'usage', 'ratings', 'Price (USD)']]
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

df['user_id'] = user_encoder.fit_transform(df['user_id'])
df['id'] = item_encoder.fit_transform(df['id'])

# One-hot encode other categorical features
df = pd.get_dummies(df, columns=['gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour', 'season', 'usage'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['user_id'] = user_encoder.fit_transform(df['user_id'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['id'] = item_encoder.fit_transform(df['id'])


In [33]:
print(df.columns)

Index(['user_id', 'id', 'year', 'ratings', 'Price (USD)', 'gender_Boys',
       'gender_Girls', 'gender_Men', 'gender_Unisex', 'gender_Women',
       ...
       'season_Spring', 'season_Summer', 'season_Winter', 'usage_Casual',
       'usage_Ethnic', 'usage_Formal', 'usage_Party', 'usage_Smart Casual',
       'usage_Sports', 'usage_Travel'],
      dtype='object', length=159)


In [34]:
print(df.head())


   user_id     id  year  ratings  Price (USD)  gender_Boys  gender_Girls  \
0     4511   5456  2011        5           46        False         False   
1      771  20750  2012        4           29        False         False   
2      139  10313  2011        3           39        False         False   
3     1904  31195  2012        5           39        False         False   
4     1694   2356  2016        4           33        False         False   

   gender_Men  gender_Unisex  gender_Women  ...  season_Spring  season_Summer  \
0       False          False          True  ...          False          False   
1        True          False         False  ...          False           True   
2       False          False          True  ...          False           True   
3        True          False         False  ...          False           True   
4       False           True         False  ...          False          False   

   season_Winter  usage_Casual  usage_Ethnic  usage_Form

In [20]:
# Normalize numerical features
scaler = MinMaxScaler()
df[['year', 'Price (USD)']] = scaler.fit_transform(df[['year', 'Price (USD)']])


In [46]:
# Split data into train and test
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Prepare training and test data
X_train = [train['user_id'].values, train['id'].values, train.drop(columns=['user_id', 'id', 'ratings']).values]
X_test = [test['user_id'].values, test['id'].values, test.drop(columns=['user_id', 'id', 'ratings']).values]
y_train = train['ratings'].values
y_test = test['ratings'].values

# Convert to float32
X_train = [np.array(arr, dtype=np.float32) for arr in X_train]
X_test = [np.array(arr, dtype=np.float32) for arr in X_test]
y_train = np.array(y_train, dtype=np.float32)
y_test = np.array(y_test, dtype=np.float32)

In [49]:
from keras.layers import Embedding, Input, Flatten, Dense, Dropout, Concatenate, BatchNormalization
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.optimizers import Adam

# Define embedding size
embedding_size = 64
l2_reg = 0.01

In [57]:
from tensorflow.keras.layers import Input, Embedding, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2


# User input
user_input = Input(shape=(1,), name='user_input')
user_embedding = Embedding(input_dim=len(user_encoder.classes_), output_dim=embedding_size, embeddings_regularizer=l2(l2_reg))(user_input)
user_vec = Flatten()(user_embedding)

# Item input
item_input = Input(shape=(1,), name='item_input')
item_embedding = Embedding(input_dim=len(item_encoder.classes_), output_dim=embedding_size, embeddings_regularizer=l2(l2_reg))(item_input)
item_vec = Flatten()(item_embedding)

In [58]:
# Additional features input
additional_input = Input(shape=(X_train[2].shape[1],), name='additional_input')

In [59]:
# Concatenate embeddings and additional features
concat = Concatenate()([user_vec, item_vec, additional_input])


In [61]:
# Dense layers
dense1 = Dense(128, activation='relu', kernel_regularizer=l2(l2_reg))(concat)
dropout1 = Dropout(0.3)(dense1)
batch1 = BatchNormalization()(dropout1)
dense2 = Dense(64, activation='relu', kernel_regularizer=l2(l2_reg))(batch1)
dropout2 = Dropout(0.3)(dense2)
output = Dense(1, activation='linear')(dropout2)

In [62]:
# Build and compile model
model = Model(inputs=[user_input, item_input, additional_input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=256)

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {loss:.4f}")
print(f"Test MAE: {mae:.4f}")


Epoch 1/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 43ms/step - loss: 14.9205 - mae: 1.9723 - val_loss: 2.0785 - val_mae: 0.8779
Epoch 2/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 50ms/step - loss: 2.3655 - mae: 1.0430 - val_loss: 3.5286 - val_mae: 1.4210
Epoch 3/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - loss: 2.0685 - mae: 1.0084 - val_loss: 3.0133 - val_mae: 1.4561
Epoch 4/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 52ms/step - loss: 1.8117 - mae: 0.9544 - val_loss: 4.9380 - val_mae: 1.9445
Epoch 5/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 40ms/step - loss: 1.6361 - mae: 0.9175 - val_loss: 2.1936 - val_mae: 1.2531
Epoch 6/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 54ms/step - loss: 1.4848 - mae: 0.8755 - val_loss: 1.7294 - val_mae: 1.0969
Epoch 7/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms

In [66]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_compatible_products(input_item_id, num_recommendations=5):
    try:
        # Encode the input item ID
        input_item_idx = item_encoder.transform([input_item_id])[0]

        # Extract the correct embedding layer for items
        # Assuming 'embedding_2' corresponds to the item embeddings
        item_embedding_layer = model.get_layer('embedding_2')
        item_embedding_weights = item_embedding_layer.get_weights()[0]  # Shape: (num_items, embedding_size)

        # Compute similarity for the input item's embedding
        input_item_embedding = item_embedding_weights[input_item_idx].reshape(1, -1)
        similarities = cosine_similarity(input_item_embedding, item_embedding_weights)[0]

        # Get top N recommendations (excluding the input item itself)
        top_indices = similarities.argsort()[-(num_recommendations + 1):][::-1][1:]
        recommended_item_ids = item_encoder.inverse_transform(top_indices)

        # Debugging type consistency for filtering
        print("Recommended Item IDs (encoded):", recommended_item_ids)
        print("Type of 'id' column:", df['id'].dtype)

        # Ensure type consistency for filtering (if necessary)
        df['id'] = df['id'].astype(int)
        recommended_item_ids = [int(item) for item in recommended_item_ids]

        # Retrieve details of the recommended items
        recommended_items = df[df['id'].isin(recommended_item_ids)].reset_index(drop=True)

        # Select relevant columns for display
        recommendation_details = recommended_items[['id', 'masterCategory', 'subCategory', 'articleType', 'Price (USD)']]

        return recommendation_details, similarities[top_indices]
    except Exception as e:
        print(f"Error during recommendation: {e}")
        return pd.DataFrame(), []

# Example usage
input_item_id = 3940  # Replace with a valid item ID from your dataset
recommended_items, similarity_scores = recommend_compatible_products(input_item_id, num_recommendations=5)

# Display recommendations with similarity scores
print(f"Recommended Products for Item ID {input_item_id}:")
for item, score in zip(recommended_items.values, similarity_scores):
    print(f"Item ID: {item[0]}, Master Category: {item[1]}, SubCategory: {item[2]}, "
          f"Article Type: {item[3]}, Price: {item[4]}, Similarity: {score:.4f}")


Recommended Item IDs (encoded): [52112 43259 16067  6752  5618]
Type of 'id' column: int64
Error during recommendation: "['masterCategory', 'subCategory', 'articleType'] not in index"
Recommended Products for Item ID 3940:


In [67]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Predict ratings
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)

# Calculate metrics
precision = precision_score(y_test, y_pred_classes, average='weighted')
recall = recall_score(y_test, y_pred_classes, average='weighted')
f1 = f1_score(y_test, y_pred_classes, average='weighted')
accuracy = accuracy_score(y_test, y_pred_classes)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Accuracy: {accuracy:.4f}")


[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
Precision: 0.0111
Recall: 0.1054
F1-Score: 0.0201
Accuracy: 0.1054


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
