In [None]:
import pandas as pd
import os
from PIL import Image
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import NearestNeighbors

# Load the synthetic dataset
synthetic_df = pd.read_csv('synthetic_fashion_data.csv')

# Set the path to the images folder
images_path = 'fashion_images/'

# Display a sample of the synthetic dataset
print(synthetic_df.head())


    user_id  item_id  rating    id gender masterCategory subCategory  \
0    user_1     2209       2  2209    Men        Apparel     Topwear   
1   user_44     2209       2  2209    Men        Apparel     Topwear   
2  user_177     2209       4  2209    Men        Apparel     Topwear   
3  user_267     2209       3  2209    Men        Apparel     Topwear   
4  user_271     2209       1  2209    Men        Apparel     Topwear   

  articleType baseColour season  year   usage  \
0     Tshirts      White   Fall  2010  Sports   
1     Tshirts      White   Fall  2010  Sports   
2     Tshirts      White   Fall  2010  Sports   
3     Tshirts      White   Fall  2010  Sports   
4     Tshirts      White   Fall  2010  Sports   

                       productDisplayName  
0  Adidas Men 3 Stripe White Polo T-shirt  
1  Adidas Men 3 Stripe White Polo T-shirt  
2  Adidas Men 3 Stripe White Polo T-shirt  
3  Adidas Men 3 Stripe White Polo T-shirt  
4  Adidas Men 3 Stripe White Polo T-shirt  


In [None]:
# Load the VGG16 model pretrained on ImageNet
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

def extract_features(image_path):
    # Open the image and convert to RGB if it's not already
    img = Image.open(image_path).convert('RGB').resize((224, 224))

    # Convert the image to an array
    img_array = img_to_array(img)

    # Expand dimensions to fit the model's input shape
    img_array = np.expand_dims(img_array, axis=0)

    # Preprocess the image array for the VGG16 model
    img_array = preprocess_input(img_array)

    # Extract features using the VGG16 model
    features = model.predict(img_array)

    # Flatten the features array and return it
    return features.flatten()

# Extract features for all products in the synthetic dataset
product_features = {}
for item_id in synthetic_df['item_id'].unique():
    image_file = os.path.join(images_path, f"{item_id}.jpg")
    if os.path.exists(image_file):
        product_features[item_id] = extract_features(image_file)

# Convert features to a DataFrame
features_df = pd.DataFrame.from_dict(product_features, orient='index')



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 574ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 523ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 549ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 534ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 516ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 718ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 503ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 590ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 747ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 577ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 586ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [None]:
# Create a user-item matrix
user_item_matrix = synthetic_df.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)

# Apply TruncatedSVD for dimensionality reduction
svd = TruncatedSVD(n_components=50)
user_item_matrix_svd = svd.fit_transform(user_item_matrix)

# Use NearestNeighbors for collaborative filtering
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_item_matrix_svd)

# Function to recommend items based on collaborative filtering
def collaborative_filtering(user_id, n_recommendations=5):
    user_index = user_item_matrix.index.tolist().index(user_id)
    distances, indices = model_knn.kneighbors([user_item_matrix_svd[user_index]], n_neighbors=n_recommendations+1)

    recommended_items = []
    for i in range(1, len(distances.flatten())):
        item_id = user_item_matrix.columns[indices.flatten()[i]]
        recommended_items.append(item_id)

    return recommended_items


In [None]:
# Function to recommend items based on image similarity
def content_based_filtering(item_id, n_recommendations=5):
    item_features = features_df.loc[item_id].values.reshape(1, -1)
    similarities = cosine_similarity(features_df, item_features).flatten()
    similar_indices = similarities.argsort()[-(n_recommendations+1):-1][::-1]

    recommended_items = features_df.index[similar_indices].tolist()
    return recommended_items

# Hybrid recommendation system
def hybrid_recommendation_system(user_id, n_recommendations=5):
    collaborative_recommendations = collaborative_filtering(user_id, n_recommendations)
    hybrid_recommendations = {}

    for item_id in collaborative_recommendations:
        content_recommendations = content_based_filtering(item_id, n_recommendations)
        hybrid_recommendations[item_id] = content_recommendations

    return hybrid_recommendations

# Example: Get recommendations for a user
user_id = 'user_1'
recommendations = hybrid_recommendation_system(user_id)
print(f"Recommendations for {user_id}: {recommendations}")


Recommendations for user_1: {2217: [2364, 2695, 2727, 2700, 1542], 1997: [1591, 2117, 2691, 2719, 2590], 1810: [1537, 1617, 2559, 1811, 1533], 1995: [2004, 2349, 2200, 2253, 2089], 2195: [2572, 2584, 2107, 1607, 2112]}
