In [29]:
import pandas as pd
data = pd.read_csv("fashion_products.csv")
data.head(10)

Unnamed: 0,User ID,Product ID,Product Name,Brand,Category,Price,Rating,Color,Size
0,19,1,Dress,Adidas,Men's Fashion,40,1.043159,Black,XL
1,97,2,Shoes,H&M,Women's Fashion,82,4.026416,Black,L
2,25,3,Dress,Adidas,Women's Fashion,44,3.337938,Yellow,XL
3,57,4,Shoes,Zara,Men's Fashion,23,1.049523,White,S
4,79,5,T-shirt,Adidas,Men's Fashion,79,4.302773,Black,M
5,98,6,Dress,Adidas,Men's Fashion,47,1.379566,Yellow,L
6,16,7,Jeans,Gucci,Men's Fashion,37,1.35675,White,XL
7,63,8,Sweater,Zara,Kids' Fashion,64,4.360303,Blue,XL
8,96,9,Sweater,H&M,Men's Fashion,53,4.466182,Green,XL
9,36,10,T-shirt,Zara,Kids' Fashion,55,4.093234,White,XL


In [30]:
data.isnull().sum()

User ID         0
Product ID      0
Product Name    0
Brand           0
Category        0
Price           0
Rating          0
Color           0
Size            0
dtype: int64

In [31]:
%pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


hybrid(content+collaborative)
1.content based-> movie, music, books
2.collaborative filtering-> user behaviour, pattern, rating

In [32]:
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [33]:
content_df = data[['Product ID', 'Product Name', 'Brand', 
                   'Category', 'Color', 'Size']]
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

content_df['Content']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)


0          1 Dress Adidas Men's Fashion Black XL
1            2 Shoes H&M Women's Fashion Black L
2       3 Dress Adidas Women's Fashion Yellow XL
3             4 Shoes Zara Men's Fashion White S
4         5 T-shirt Adidas Men's Fashion Black M
                         ...                    
995       996 Shoes Zara Women's Fashion Black M
996       997 Sweater Nike Kids' Fashion Green L
997       998 Sweater Zara Men's Fashion White L
998      999 T-shirt Zara Women's Fashion Blue S
999    1000 Shoes Adidas Women's Fashion White L
Name: Content, Length: 1000, dtype: object

In [None]:
# Content-Based Filtering

# Use TF-IDF vectorizer to convert content into a matrix of TF-IDF features
tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['Content'])

content_similarity = linear_kernel(content_matrix, content_matrix)

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['User ID', 
                                  'Product ID', 
                                  'Rating']], reader)

def get_content_based_recommendations(product_id, top_n):
    index = content_df[content_df['Product ID'] == product_id].index[0]
    similarity_scores = content_similarity[index]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n + 1]
    recommendations = content_df.loc[similar_indices, 'Product ID'].values
    return recommendations

In [35]:
content_df

Unnamed: 0,Product ID,Product Name,Brand,Category,Color,Size,Content
0,1,Dress,Adidas,Men's Fashion,Black,XL,1 Dress Adidas Men's Fashion Black XL
1,2,Shoes,H&M,Women's Fashion,Black,L,2 Shoes H&M Women's Fashion Black L
2,3,Dress,Adidas,Women's Fashion,Yellow,XL,3 Dress Adidas Women's Fashion Yellow XL
3,4,Shoes,Zara,Men's Fashion,White,S,4 Shoes Zara Men's Fashion White S
4,5,T-shirt,Adidas,Men's Fashion,Black,M,5 T-shirt Adidas Men's Fashion Black M
...,...,...,...,...,...,...,...
995,996,Shoes,Zara,Women's Fashion,Black,M,996 Shoes Zara Women's Fashion Black M
996,997,Sweater,Nike,Kids' Fashion,Green,L,997 Sweater Nike Kids' Fashion Green L
997,998,Sweater,Zara,Men's Fashion,White,L,998 Sweater Zara Men's Fashion White L
998,999,T-shirt,Zara,Women's Fashion,Blue,S,999 T-shirt Zara Women's Fashion Blue S


In [41]:
get_content_based_recommendations(5, 10)

[0.67858152 0.39430987 0.26140725 0.20988127 1.         0.45271172
 0.18965297 0.03341093 0.20413502 0.1626155  0.4103517  0.02145292
 0.02163341 0.0215532  0.11991258 0.16819266 0.02070968 0.02078757
 0.02167632 0.11828328 0.1255975  0.02154298 0.02171602 0.02093412
 0.02237172 0.0213533  0.31371478 0.02164768 0.02160372 0.02072757
 0.02270675 0.16287904 0.02178182 0.31012809 0.02166183 0.02240082
 0.02148314 0.16810392 0.26804871 0.19040402 0.02161779 0.0215692
 0.41397372 0.02182797 0.02151244 0.11499905 0.11957436 0.16679753
 0.02065643 0.17503816 0.16400865 0.02144135 0.26439227 0.12534164
 0.16749426 0.02085769 0.16718791 0.17016122 0.02154566 0.02184461
 0.16958993 0.26568412 0.02178976 0.02152572 0.02184461 0.02152917
 0.02154298 0.16625493 0.02141335 0.02143886 0.1686482  0.17515325
 0.26341732 0.02082434 0.02243942 0.33432567 0.02245553 0.25761768
 0.26439227 0.02089666 0.16710734 0.02248496 0.02167198 0.16718791
 0.02094328 0.11885255 0.11945966 0.02163074 0.11914952 0.12025

array([  1, 221, 989, 331, 570, 817, 890,   6, 423, 720], dtype=int64)

In [37]:
# Collaborative Filtering
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_filtering_recommendations(user_id, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommendations = [prediction.iid for prediction in predictions[:top_n]]
    return recommendations

In [38]:
get_collaborative_filtering_recommendations(79,10)

[104, 127, 715, 939, 681, 29, 579, 800, 343, 983]

In [39]:
# Hybrid Approach
def get_hybrid_recommendations(user_id, product_id, top_n):
    content_based_recommendations = get_content_based_recommendations(product_id, top_n)
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)
    hybrid_recommendations = list(set(content_based_recommendations + collaborative_filtering_recommendations))
    return hybrid_recommendations[:top_n]

In [40]:
user_id = 79
product_id = 5
top_n = 10
recommendations = get_hybrid_recommendations(user_id, product_id, top_n)

print(f"Hybrid Recommendations for User {user_id} based on Product {product_id}:")
for i, recommendation in enumerate(recommendations):
    print(f"{i + 1}. Product ID: {recommendation}")

[0.67858152 0.39430987 0.26140725 0.20988127 1.         0.45271172
 0.18965297 0.03341093 0.20413502 0.1626155  0.4103517  0.02145292
 0.02163341 0.0215532  0.11991258 0.16819266 0.02070968 0.02078757
 0.02167632 0.11828328 0.1255975  0.02154298 0.02171602 0.02093412
 0.02237172 0.0213533  0.31371478 0.02164768 0.02160372 0.02072757
 0.02270675 0.16287904 0.02178182 0.31012809 0.02166183 0.02240082
 0.02148314 0.16810392 0.26804871 0.19040402 0.02161779 0.0215692
 0.41397372 0.02182797 0.02151244 0.11499905 0.11957436 0.16679753
 0.02065643 0.17503816 0.16400865 0.02144135 0.26439227 0.12534164
 0.16749426 0.02085769 0.16718791 0.17016122 0.02154566 0.02184461
 0.16958993 0.26568412 0.02178976 0.02152572 0.02184461 0.02152917
 0.02154298 0.16625493 0.02141335 0.02143886 0.1686482  0.17515325
 0.26341732 0.02082434 0.02243942 0.33432567 0.02245553 0.25761768
 0.26439227 0.02089666 0.16710734 0.02248496 0.02167198 0.16718791
 0.02094328 0.11885255 0.11945966 0.02163074 0.11914952 0.12025