In [7]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [11]:
data = pd.read_csv('/kaggle/input/fashionproduct-1-csv/fashion_products.csv')
data.head()

Unnamed: 0,User ID,Product ID,Product Name,Brand,Category,Price,Rating,Color,Size
0,19,1,Dress,Adidas,Men's Fashion,40,1.043159,Black,XL
1,97,2,Shoes,H&M,Women's Fashion,82,4.026416,Black,L
2,25,3,Dress,Adidas,Women's Fashion,44,3.337938,Yellow,XL
3,57,4,Shoes,Zara,Men's Fashion,23,1.049523,White,S
4,79,5,T-shirt,Adidas,Men's Fashion,79,4.302773,Black,M


In [12]:
data.isnull().sum()

User ID         0
Product ID      0
Product Name    0
Brand           0
Category        0
Price           0
Rating          0
Color           0
Size            0
dtype: int64

In [13]:
# Content-Based Filtering
content_df = data[['Product ID', 'Product Name', 'Brand',
                  'Category', 'Color', 'Size']]
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

# Use the TF-IDF vectorizer to convert content into a matrix of TF-IDF features
tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['Content'])
content_similarity = linear_kernel(content_matrix, content_matrix)

reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(data[['User ID',
                                  'Product ID',
                                  'Rating']], reader)

def get_content_based_recommendations(product_id, top_n):
    index = content_df[content_df['Product ID'] == product_id].index[0]
    similarity_scores = content_similarity[index]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]
    recommendations = content_df.loc[similar_indices, 'Product ID'].values
    return recommendations

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)


In [14]:
get_content_based_recommendations(5, 10)

array([  1, 221, 989, 331, 570, 817, 890,   6, 423, 720])

In [15]:
# Collaborative Filtering

algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_filtering_recommendations(user_id, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)
    predictions.sort(key = lambda x: x.est, reverse = True)
    recommendations = [prediction.iid for prediction in predictions [:top_n]]
    return recommendations

In [18]:
get_collaborative_filtering_recommendations(79, 10)

[494, 970, 501, 71, 631, 450, 302, 142, 418, 231]

In [19]:
def get_hybrid_recommendations(user_id, product_id, top_n, content_weight=0.5):
    content_based_recommendations = get_content_based_recommendations(product_id, top_n)
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)
    hybrid_recommendations = list(set(content_based_recommendations +  collaborative_filtering_recommendations))
    return hybrid_recommendations[:top_n]

In [22]:
user_id = 80
product_id = 7
top_n = 10
recommendations = get_hybrid_recommendations(user_id, product_id, top_n)

print(f"Hybrid Recommendations for User {user_id} based on Product {product_id}")
for i, recommendation in enumerate(recommendations):
    print(f"{i +1}. Product ID: {recommendation}")

Hybrid Recommendations for User 80 based on Product 7
1. Product ID: 1474
2. Product ID: 1086
3. Product ID: 1605
4. Product ID: 713
5. Product ID: 1844
6. Product ID: 1268
7. Product ID: 1080
8. Product ID: 1049
9. Product ID: 698
10. Product ID: 446
