In [36]:
# !pip install scikit-surprise

In [37]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import warnings
warnings.filterwarnings('ignore')

### Content Based Filtering --> based on the content of the movie, music, books, etc.
### Collaborative Filtering --> based on the ratings, behaviour, pattern of the users
### Hybrid Recommendation Systems --> combination of content based and collaborative filtering

In [38]:
data = pd.read_csv('fashion_products.csv')
data.head(5)

Unnamed: 0,User ID,Product ID,Product Name,Brand,Category,Price,Rating,Color,Size
0,19,1,Dress,Adidas,Men's Fashion,40,1.043159,Black,XL
1,97,2,Shoes,H&M,Women's Fashion,82,4.026416,Black,L
2,25,3,Dress,Adidas,Women's Fashion,44,3.337938,Yellow,XL
3,57,4,Shoes,Zara,Men's Fashion,23,1.049523,White,S
4,79,5,T-shirt,Adidas,Men's Fashion,79,4.302773,Black,M


In [39]:
data.columns

Index(['User ID', 'Product ID', 'Product Name', 'Brand', 'Category', 'Price',
       'Rating', 'Color', 'Size'],
      dtype='object')

In [40]:
data.isna().sum()

User ID         0
Product ID      0
Product Name    0
Brand           0
Category        0
Price           0
Rating          0
Color           0
Size            0
dtype: int64

## Content Based Filtering

In [41]:
content_df = data[['Product ID', 'Product Name', 'Brand', 'Category', 'Color', 'Size']]
content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df['Content'])

content_similarity = linear_kernel(content_matrix, content_matrix)

In [42]:
def get_content_based_recommendations(product_id, top_k):
    index = content_df[content_df['Product ID'] == product_id].index[0]
    similarity_scores = content_similarity[index]
    similar_indices = similarity_scores.argsort()[::-1][1: top_k + 1]
    recommendations = content_df.loc[similar_indices, 'Product ID'].values
    return recommendations

In [43]:
get_content_based_recommendations(5, 5)

array([  1, 221, 989, 331, 570], dtype=int64)

## Collaborative Filtering

In [44]:
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(data[['User ID', 'Product ID', 'Rating']], reader)

In [45]:
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_filtering_recommendations(user_id, top_k):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommendations = [pred.iid for pred in predictions[:top_k]]
    return recommendations

In [46]:
get_collaborative_filtering_recommendations(1, 5)

[213, 772, 765, 792, 45]

## Hybrid Recommendation System

In [47]:
def get_hybrid_recommendations(user_id, product_id, top_k):
    content_based_filtering = get_content_based_recommendations(product_id, top_k)
    collaborative_filering = get_collaborative_filtering_recommendations(user_id, top_k)
    hybrid_recommendations = list(set(content_based_filtering + collaborative_filering))
    return hybrid_recommendations[:top_k]

In [48]:
user_id = 1
product_id = 19
top_k = 10
recommendations = get_hybrid_recommendations(user_id, product_id, top_k)

In [49]:
print(f"The Hybrid Recommendations for the User {user_id} based on the product {product_id}:")
for i, recommendation in enumerate(recommendations):
    print(f"{i+1}. Product Id : {recommendation}")

The Hybrid Recommendations for the User 1 based on the product 19:
1. Product Id : 769
2. Product Id : 742
3. Product Id : 1039
4. Product Id : 915
5. Product Id : 52
6. Product Id : 629
7. Product Id : 246
8. Product Id : 695
9. Product Id : 633
10. Product Id : 1371
