Hybrid(content + collaburative)
1.content based-> movie, music, books
2.collaburative filtering -> user behaviour, patterns, ratings 

In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel 

In [2]:
data = pd.read_csv("fashion_products.csv")

In [3]:
data.shape

(1000, 9)

In [4]:
data.head()

Unnamed: 0,User ID,Product ID,Product Name,Brand,Category,Price,Rating,Color,Size
0,19,1,Dress,Adidas,Men's Fashion,40,1.043159,Black,XL
1,97,2,Shoes,H&M,Women's Fashion,82,4.026416,Black,L
2,25,3,Dress,Adidas,Women's Fashion,44,3.337938,Yellow,XL
3,57,4,Shoes,Zara,Men's Fashion,23,1.049523,White,S
4,79,5,T-shirt,Adidas,Men's Fashion,79,4.302773,Black,M


In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   User ID       1000 non-null   int64  
 1   Product ID    1000 non-null   int64  
 2   Product Name  1000 non-null   object 
 3   Brand         1000 non-null   object 
 4   Category      1000 non-null   object 
 5   Price         1000 non-null   int64  
 6   Rating        1000 non-null   float64
 7   Color         1000 non-null   object 
 8   Size          1000 non-null   object 
dtypes: float64(1), int64(3), object(5)
memory usage: 70.4+ KB


In [5]:
data.isnull().sum()

User ID         0
Product ID      0
Product Name    0
Brand           0
Category        0
Price           0
Rating          0
Color           0
Size            0
dtype: int64

In [6]:
data.describe()

Unnamed: 0,User ID,Product ID,Price,Rating
count,1000.0,1000.0,1000.0,1000.0
mean,50.419,500.5,55.785,2.993135
std,28.78131,288.819436,26.291748,1.153185
min,1.0,1.0,10.0,1.000967
25%,26.0,250.75,33.0,1.992786
50%,50.0,500.5,57.0,2.984003
75%,75.0,750.25,78.25,3.985084
max,100.0,1000.0,100.0,4.987964


In [None]:
# Content based Filtering

content_df = data[["Product ID", "Product Name", "Brand", "Category", "Color", "Size"]]

content_df['Content'] = content_df.apply(lambda row: ' '.join(row.dropna().astype(str)), axis=1)

#Use TF-IDF vectorizer to convert content into a matrix of TF-IDF features

tfidf_vectorizer = TfidfVectorizer()
content_matrix = tfidf_vectorizer.fit_transform(content_df["Content"])

content_similarity = linear_kernel(content_matrix, content_matrix) 

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data[['User ID', 'Product ID','Rating']], reader)

def get_content_based_recomendations(product_id, top_n):
    index = content_df[content_df['Product ID'] == product_id].index[0]
    similarity_scores = content_similarity[index]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n + 1]
    recomendations = content_df.loc[similar_indices,'Product ID'].values
    return recomendations

In [8]:
get_content_based_recomendations(5, 10)

array([  1, 221, 989, 331, 570, 817, 890,   6, 423, 720], dtype=int64)

In [11]:
# Collaborative Filtering
algo = SVD()
trainset = data.build_full_trainset()
algo.fit(trainset)

def get_collaborative_filtering_recommendations(user_id, top_n):
    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == user_id, testset)
    predictions = algo.test(testset)
    predictions.sort(key=lambda x: x.est, reverse=True)
    recommendations = [prediction.iid for prediction in predictions[:top_n]]
    return recommendations

In [12]:
get_collaborative_filtering_recommendations(79, 10)

[534, 231, 126, 986, 199, 695, 877, 343, 445, 829]

In [13]:
# Hybrid Approach
def get_hybrid_recommendations(user_id, product_id, top_n):
    content_based_recommendations = get_content_based_recomendations(product_id, top_n)
    collaborative_filtering_recommendations = get_collaborative_filtering_recommendations(user_id, top_n)
    hybrid_recommendations = list(set(content_based_recommendations + collaborative_filtering_recommendations))
    return hybrid_recommendations[:top_n]

In [15]:
user_id = 7
product_id = 12
top_n = 10
recomendations = get_hybrid_recommendations(user_id, product_id, top_n)

print(f"Hybrid recomendations for User {user_id} based on Product {product_id}: ")
for i, recomendation in enumerate(recomendations):
    print(f"{i + 1}. Product ID : {recomendation}")

Hybrid recomendations for User 7 based on Product 12: 
1. Product ID : 1216
2. Product ID : 546
3. Product ID : 1195
4. Product ID : 1515
5. Product ID : 494
6. Product ID : 175
7. Product ID : 1007
8. Product ID : 1457
9. Product ID : 1722
10. Product ID : 1436
