In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [8]:
data = pd.read_csv('Final_Fashion_Dataset.csv')
data.head()

Unnamed: 0,user_id,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,filename,link,ratings,review,Month,Price (USD)
0,93810,8493,Women,Apparel,Bottomwear,Skirts,Black,Fall,2011.0,Casual,Forever New Women Black Skirts,8493.jpg,http://assets.myntassets.com/v1/images/style/p...,5,Amazing quality! Definitely recommend. Would r...,November,46
1,24592,30757,Men,Apparel,Topwear,Kurtas,Grey,Summer,2012.0,Ethnic,Fabindia Men Grey Mangalgiri Kurta,30757.jpg,http://assets.myntassets.com/v1/images/style/p...,4,"Perfect for any occasion, great buy..",September,29
2,13278,14881,Women,Accessories,Bags,Handbags,Black,Summer,2011.0,Casual,United Colors of Benetton Women Solid Black Ha...,14881.jpg,http://assets.myntassets.com/v1/images/style/p...,3,"It's okay, nothing special.!",October,39
3,46048,48449,Men,Apparel,Topwear,Tshirts,Blue,Summer,2012.0,Casual,French Connection Men Blue T-shirt,48449.jpg,http://assets.myntassets.com/v1/images/style/p...,5,"Comfortable and stylish, worth the price. and ...",August,39
4,42098,4697,Unisex,Accessories,Watches,Watches,Black,Winter,2016.0,Sports,ADIDAS Unisex Digital Duramo Black Watch,4697.jpg,http://assets.myntassets.com/v1/images/style/p...,4,"Perfect for any occasion, great buy.!",March,33


In [9]:
categorical_cols = ['user_id', 'gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour', 'season', 'year', 'usage']
numerical_cols = ['ratings', 'Price (USD)']

In [10]:
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col].astype(str))
    label_encoders[col] = le

In [11]:
scaler = StandardScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

In [12]:
X = data[categorical_cols]
y = data['Price (USD)']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Build the DNN model
def create_recommendation_model(input_dim):
    model = keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)  # Output layer for regression
    ])

    return model

# Create and compile the model
model = create_recommendation_model(X_train.shape[1])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [15]:
# Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=32, verbose=1)

Epoch 1/100
[1m730/730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 94.3293 - mae: 3.7968 - val_loss: 4.1629 - val_mae: 1.7040
Epoch 2/100
[1m730/730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 4.2614 - mae: 1.4730 - val_loss: 2.2195 - val_mae: 1.2081
Epoch 3/100
[1m730/730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 5.0192 - mae: 1.5806 - val_loss: 5.0788 - val_mae: 1.8946
Epoch 4/100
[1m730/730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 6.2321 - mae: 1.7267 - val_loss: 1.7408 - val_mae: 1.0831
Epoch 5/100
[1m730/730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 2.7242 - mae: 1.2723 - val_loss: 1.2518 - val_mae: 0.9033
Epoch 6/100
[1m730/730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 2.7453 - mae: 1.2789 - val_loss: 1.3039 - val_mae: 0.9374
Epoch 7/100
[1m730/730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms

In [16]:
# Evaluate the model
evaluation = model.evaluate(X_test, y_test)
print("Test Loss:", evaluation[0])
print("Test MAE:", evaluation[1])

[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 884us/step - loss: 0.9148 - mae: 0.7917
Test Loss: 0.921700656414032
Test MAE: 0.7926486730575562


In [18]:
import numpy as np

def recommend_products(user_id, top_n=5):
    # Filter data for the specific user
    user_data = data[data['user_id'] == label_encoders['user_id'].transform([user_id])[0]]

    # Predict prices for user data
    predictions = model.predict(user_data[categorical_cols])

    # Add predictions to the user_data DataFrame
    user_data['Predicted Price (USD)'] = scaler.inverse_transform(np.column_stack((user_data['ratings'], predictions)))[1]

    # Sort by predicted price to get recommendations
    recommended_products = user_data.sort_values(by='Predicted Price (USD)', ascending=False).head(top_n)

    return recommended_products[['id', 'masterCategory', 'subCategory', 'Predicted Price (USD)']]

# Example usage
sample_recommendations = recommend_products(user_id=93810)
print(sample_recommendations)


ValueError: y contains previously unseen labels: 93810

In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Calculate predictions on the test set
y_pred = model.predict(X_test)

# Inverse transform to get actual price values
y_test_actual = scaler.inverse_transform(np.column_stack((np.zeros(len(y_test)), y_test)))[1]
y_pred_actual = scaler.inverse_transform(np.column_stack((np.zeros(len(y_pred)), y_pred)))[1]

# Calculate MAE and RMSE
mae = mean_absolute_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred_actual))

print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)


[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Mean Absolute Error (MAE): 9.64870694794027
Root Mean Squared Error (RMSE): 13.645332225140642


Available columns in the DataFrame: Index(['user_id', 'id', 'gender', 'masterCategory', 'subCategory',
       'articleType', 'baseColour', 'season', 'year', 'usage',
       'productDisplayName', 'filename', 'link', 'ratings', 'review', 'Month',
       'Price (USD)', 'user_id_encoded', 'product_id_encoded'],
      dtype='object')


KeyError: "'purchased' column not found in DataFrame."