# Recommender Systems (рекомендаційні системи)

![types](https://assets-global.website-files.com/60f03643ffba6a48a3bda298/62690b50cf469a4a8f4f35b7_qj772QmOASv8t2tqRbTaQLmEsnO5dEgD0rIRqsOXh8K8qaCCplZaB2wHTnc5h5oePrXmbW4lLVyrHEI9ybjjBiz3KpmdUl4QNkkY9m3TMvu5IPQngtibC2J4WhKTAk7nXEubAOhq.jpeg)

In [1]:
# !pwd

In [1]:
import pandas as pd

# Source: https://cseweb.ucsd.edu/~jmcauley/datasets.html#clothing_fit

df = pd.read_json("../../data/modcloth_final_data.json", lines=True)

In [2]:
df.head()

Unnamed: 0,item_id,waist,size,quality,cup size,hips,bra size,category,bust,height,user_name,length,fit,user_id,shoe size,shoe width,review_summary,review_text
0,123373,29.0,7,5.0,d,38.0,34.0,new,36.0,5ft 6in,Emily,just right,small,991571,,,,
1,123373,31.0,13,3.0,b,30.0,36.0,new,,5ft 2in,sydneybraden2001,just right,small,587883,,,,
2,123373,30.0,7,2.0,b,,32.0,new,,5ft 7in,Ugggh,slightly long,small,395665,9.0,,,
3,123373,,21,5.0,dd/e,,,new,,,alexmeyer626,just right,fit,875643,,,,
4,123373,,18,5.0,b,,36.0,new,,5ft 2in,dberrones1,slightly long,small,944840,,,,


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82790 entries, 0 to 82789
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   item_id         82790 non-null  int64  
 1   waist           2882 non-null   float64
 2   size            82790 non-null  int64  
 3   quality         82722 non-null  float64
 4   cup size        76535 non-null  object 
 5   hips            56064 non-null  float64
 6   bra size        76772 non-null  float64
 7   category        82790 non-null  object 
 8   bust            11854 non-null  object 
 9   height          81683 non-null  object 
 10  user_name       82790 non-null  object 
 11  length          82755 non-null  object 
 12  fit             82790 non-null  object 
 13  user_id         82790 non-null  int64  
 14  shoe size       27915 non-null  float64
 15  shoe width      18607 non-null  object 
 16  review_summary  76065 non-null  object 
 17  review_text     76065 non-null 

In [4]:
df = df[~df["user_id"].isna()]  # selects only the rows where user_id is NOT NaN
df = df[~df["item_id"].isna()]
df = df[~df["quality"].isna()]  # rating
df = df[~df["review_text"].isna()]
df = df.reset_index()

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76000 entries, 0 to 75999
Data columns (total 19 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   index           76000 non-null  int64  
 1   item_id         76000 non-null  int64  
 2   waist           2687 non-null   float64
 3   size            76000 non-null  int64  
 4   quality         76000 non-null  float64
 5   cup size        70190 non-null  object 
 6   hips            51599 non-null  float64
 7   bra size        70392 non-null  float64
 8   category        76000 non-null  object 
 9   bust            10894 non-null  object 
 10  height          75001 non-null  object 
 11  user_name       76000 non-null  object 
 12  length          75976 non-null  object 
 13  fit             76000 non-null  object 
 14  user_id         76000 non-null  int64  
 15  shoe size       25810 non-null  float64
 16  shoe width      17279 non-null  object 
 17  review_summary  76000 non-null 

In [6]:
len(df.item_id.unique())

1322

In [7]:
len(df.user_id.unique())

44811

In [8]:
len(df)

76000

## Неперсоналізовані рекомендаційні системи

Popularity-based recommender systems: Popularity-based recommenders work by suggesting the most frequently purchased products to customers. As the name suggests, Popularity based recommendation system works with the trend. It basically uses the items which are in trend right now. 

### Frequency of purchase
Popularity-based recommenders work by suggesting the most frequently purchased products to customers. This vague idea can be turned into at least two concrete implementations:
- Check which articles are bought most often across all customers. Recommend these articles to each customer.
Source: https://towardsdatascience.com/how-to-build-popularity-based-recommenders-with-polars-cc7920ad3f68#:~:text=Popularity%2Dbased%20recommenders%20work%20by,these%20articles%20to%20each%20customer.

In [9]:
items_popularity = df.groupby("item_id")["user_id"].count().sort_values(ascending=False)
items_popularity = items_popularity.reset_index()
items_popularity

Unnamed: 0,item_id,user_id
0,539980,2007
1,668696,1555
2,397005,1506
3,175771,1438
4,407134,1437
...,...,...
1317,513606,1
1318,535950,1
1319,536646,1
1320,504440,1


In [10]:
items_popularity.iloc[:3]["item_id"].to_list()

[539980, 668696, 397005]

In [11]:
import random
popular_items = items_popularity.iloc[:3]["item_id"].to_list()

def present_recommended_products(popular_items: list):
    print("**Currently trending products**")
    print("")

    for index, item_id_ in enumerate(popular_items):
        slice_df = df[df["item_id"] == item_id_]
        print(f"Recommended item {index+1}/{len(popular_items)}: product {item_id_}")
    
        category = slice_df["category"].unique()[0]
        print(f"{category=}")
    
        slice_with_reviews = slice_df[~slice_df["review_text"].isna()]
        reviews_for_slice = list(slice_with_reviews["review_text"].unique())
        if len(reviews_for_slice) > 0:
            reviews = random.sample(reviews_for_slice, min(len(reviews_for_slice), 3))
            print(f"User reviews:")
            for review in reviews:
                print("-", review)
            print("...")
        else:
            print("There are no reviews for this product yet.")
        print("")

present_recommended_products(popular_items)

**Currently trending products**

Recommended item 1/3: product 539980
category='tops'
User reviews:
- This is a beautiful cardigan, and well worth the money I paid for it.  I can't wait to wear it out! Also, the color is bright and vibrant without being loud.
- Lovely soft material, and the color is gorgeous!! The material quality is excellent, but I do wish it was slightly longer. The bottom hits just at my hip bone.
- I cannot stop wearing this cardigan. It's perfect! The mustard definitely isn't as bright as it is in the photos and it makes for an amazing fall color. I'll definitely be snagging this in other shades!
...

Recommended item 2/3: product 668696
category='bottoms'
User reviews:
- This skirt fits perfectly.  It sits snugly around my natural waist before flaring out, which gives it a very retro look.  I think it will quickly become one of my favorite pieces of clothing.
- Usually get a L, got an XL because reviews indicated it was a little small. I wasn't able to zip it up

# Content-based personalized systems

In [12]:
df_reviews = df[["item_id", "review_text", "category"]][~df["review_text"].isna()]
df_reviews.head()

Unnamed: 0,item_id,review_text,category
0,152702,"I liked the color, the silhouette, and the fab...",new
1,152702,From the other reviews it seems like this dres...,new
2,152702,I love the design and fit of this dress! I wo...,new
3,152702,I bought this dress for work it is flattering...,new
4,152702,This is a very professional look. It is Great ...,new


In [13]:
len(df_reviews)

76000

In [14]:
df_grouped = df_reviews.groupby(["item_id", "category"]).agg({'review_text': ' '.join})
df_grouped = df_grouped.reset_index()
df_grouped.head()

Unnamed: 0,item_id,category,review_text
0,152702,new,"I liked the color, the silhouette, and the fab..."
1,153494,new,I wanted to fit in this dress so bad so I made...
2,153798,new,Unfortunately the fabric is soooo thin and wri...
3,154411,new,My only complaint is that people notice when I...
4,154882,new,Most of the other reviews said size up one but...


In [15]:
len(df_grouped)

1322

Creating recommendations based on TF-IDF score:

![if-idf2](https://miro.medium.com/v2/resize:fit:860/format:webp/1*dug-uXDMOD6H5JMnYNpgfQ.png)
![tf-idf](https://miro.medium.com/v2/resize:fit:4800/format:webp/1*Uucq42G4ntPGJKzI84b3aA.png)

https://medium.com/@imamun/creating-a-tf-idf-in-python-e43f05e4d424

In [16]:
# TF-IDF Vectorization
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df_grouped['review_text'])
tfidf_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 420695 stored elements and shape (1322, 22088)>

In [17]:
len(df_grouped['review_text'])

1322

In [18]:
pd.DataFrame(tfidf_matrix.toarray())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22078,22079,22080,22081,22082,22083,22084,22085,22086,22087
0,0.001489,0.0,0.0,0.0,0.0016,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1,0.000000,0.0,0.0,0.0,0.0000,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.001636,0.0,0.0,0.0,0.0
2,0.000000,0.0,0.0,0.0,0.0000,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
3,0.000000,0.0,0.0,0.0,0.0000,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
4,0.000000,0.0,0.0,0.0,0.0000,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1317,0.000949,0.0,0.0,0.0,0.0000,0.0,0.001298,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1318,0.000000,0.0,0.0,0.0,0.0000,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1319,0.000000,0.0,0.0,0.0,0.0000,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
1320,0.000000,0.0,0.0,0.0,0.0000,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [19]:
tfidf_vectorizer.vocabulary_

{'liked': 10972,
 'the': 19327,
 'color': 4221,
 'silhouette': 17207,
 'and': 1434,
 'fabric': 7073,
 'of': 13143,
 'this': 19449,
 'dress': 6173,
 'but': 3253,
 'ruching': 16247,
 'just': 10353,
 'looked': 11200,
 'bunchy': 3188,
 'ruined': 16261,
 'whole': 21489,
 'thing': 19418,
 'was': 21178,
 'so': 17764,
 'disappointed': 5804,
 'really': 15414,
 'waned': 21119,
 'to': 19692,
 'like': 10971,
 'runs': 16283,
 'little': 11066,
 'small': 17583,
 'would': 21798,
 'need': 12635,
 'size': 17309,
 'up': 20643,
 'make': 11502,
 'it': 10080,
 'workappropriate': 21735,
 'from': 8011,
 'other': 13428,
 'reviews': 15992,
 'seems': 16695,
 'either': 6473,
 'works': 21756,
 'for': 7818,
 'your': 22023,
 'body': 2703,
 'type': 20212,
 'or': 13370,
 'doesn': 5978,
 'have': 8970,
 'waist': 21058,
 'flabby': 7543,
 'tummy': 20099,
 'is': 10058,
 'perfect': 14002,
 'me': 11738,
 'detail': 5615,
 'around': 1702,
 'front': 8017,
 'hides': 9154,
 'everything': 6827,
 'clingyness': 4058,
 'makes': 11507

In [20]:
# Compute cosine similarity between user input and items
user_input = 'Knee-long skirt made of silk'
user_tfidf = tfidf_vectorizer.transform([user_input])
user_df = pd.DataFrame(user_tfidf.toarray())
user_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22078,22079,22080,22081,22082,22083,22084,22085,22086,22087
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
tfidf_vectorizer.vocabulary_["silk"]

17211

In [22]:
user_df[tfidf_vectorizer.vocabulary_["silk"]]

0    0.726013
Name: 17211, dtype: float64

In [23]:
tfidf_vectorizer.idf_[tfidf_vectorizer.vocabulary_["silk"]]

np.float64(4.855452653939752)

Similarity: https://scikit-learn.org/stable/modules/metrics.html#cosine-similarity

In [24]:
from sklearn.metrics.pairwise import cosine_similarity 

cosine_similarities = cosine_similarity(user_tfidf, tfidf_matrix).flatten()
cosine_similarities

array([0.04200172, 0.04524237, 0.        , ..., 0.02718421, 0.03061408,
       0.04886547])

In [25]:
len(cosine_similarities)

1322

In [26]:
# Get indices of items sorted by similarity
top_n = 3
item_indices = cosine_similarities.argsort()[:-top_n-1:-1]
print("indices:", item_indices)
# Get recommended item names
recommendations = df_grouped['item_id'].iloc[item_indices].tolist()
print("item_ids:", recommendations)

indices: [ 915 1105 1133]
item_ids: [605558, 701811, 714723]


In [27]:
def content_based_recommender(df, user_input, top_n=3):
    """
    Content-based recommender system using TF-IDF.

    Parameters:
    - df: pandas DataFrame with 'item_id' and 'review_text' columns.
    - user_input: textual input representing user preferences.
    - top_n: number of top items to recommend.

    Returns:
    - recommendations: a list of top_n recommended item names.
    """
    # Combine relevant text features into a single string
    df['text_features'] = df['category'] + ' ' + df['review_text']

    # TF-IDF Vectorization
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(df['text_features'])

    # Compute cosine similarity between user input and items
    user_tfidf = tfidf_vectorizer.transform([user_input])
    cosine_similarities = cosine_similarity(user_tfidf, tfidf_matrix).flatten()

    # Get indices of items sorted by similarity
    item_indices = cosine_similarities.argsort()[:-top_n-1:-1]

    # Get recommended item names
    recommendations = df['item_id'].iloc[item_indices].tolist()

    return recommendations

# Example usage
user_preferences = 'Knee-long skirt made of silk'
recommended_products = content_based_recommender(df_grouped, user_preferences, top_n=3)
present_recommended_products(recommended_products)

**Currently trending products**

Recommended item 1/3: product 605558
category='tops'
User reviews:
- Just as pictured; soft material; looks and feels good quality. I usually wear an XL or L (I'm an apple shape) and got an L for this item and it fits well.
- Very good quality. The fabric feels like silk. :)
...

Recommended item 2/3: product 701811
category='bottoms'
User reviews:
- When I received this skirt, I was pleasantly surprised on the quality of the skirt. The buckle by the waist is a cute feature that makes the skirt unique. My only complaint is that the skirt is too long for my 5'4 stature. It reached about twothree inches above my ankles. Unfortunately, this skirt will be going back.
- The skirt is made very well but is comically long. I am only 4'10 so I expect things to be bigger on me but it goes to my ankles. The material is sturdy and thick but not like wool so it can be used in spring and early summer too. Overall a great find just very long.
- I liked the librarian l

## Collaborative Filtering (Колаборативні рекомендаційні системи)

## Item-based filtering

Item-based collaborative filtering recommends items based on the similarity between items, particularly their past interactions with users. 

In [28]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Sample dataset (user_id, item_id, rating)
data = {'user_id': [1, 1, 2, 2, 3, 3, 4, 4],
        'item_id': ['A', 'B', 'A', 'C', 'C', 'D', 'B', 'D'],
        'rating': [5, 4, 3, 2, 4, 5, 1, 3]}

df_sample = pd.DataFrame(data)
df_sample

Unnamed: 0,user_id,item_id,rating
0,1,A,5
1,1,B,4
2,2,A,3
3,2,C,2
4,3,C,4
5,3,D,5
6,4,B,1
7,4,D,3


In [29]:
# Pivot the DataFrame to create a user-item matrix
user_item_matrix = df_sample.pivot_table(index='user_id', columns='item_id', values='rating', fill_value=0)
user_item_matrix

item_id,A,B,C,D
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,4.0,0.0,0.0
2,3.0,0.0,2.0,0.0
3,0.0,0.0,4.0,5.0
4,0.0,1.0,0.0,3.0


In [30]:
user_item_matrix.T

user_id,1,2,3,4
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,5.0,3.0,0.0,0.0
B,4.0,0.0,0.0,1.0
C,0.0,2.0,4.0,0.0
D,0.0,0.0,5.0,3.0


In [31]:
# Calculate cosine similarity between items
item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity

array([[1.        , 0.83189033, 0.2300895 , 0.        ],
       [0.83189033, 1.        , 0.        , 0.12478355],
       [0.2300895 , 0.        , 1.        , 0.76696499],
       [0.        , 0.12478355, 0.76696499, 1.        ]])

In [32]:
# Retrieve the target item's similarity scores with all other items
target_item = 'A'
target_item_index = user_item_matrix.columns.get_loc(target_item)
print("item:", target_item, ", index:", target_item_index)
target_item_similarity = item_similarity[target_item_index]
print("item:", target_item, ", similarity scores:", target_item_similarity)

item: A , index: 0
item: A , similarity scores: [1.         0.83189033 0.2300895  0.        ]


In [33]:
# Get indices of items sorted by similarity
top_n = 2
item_indices = target_item_similarity.argsort()[:-top_n-1:-1]
print(f"{item_indices}")

# Get recommended item ids
recommendations = user_item_matrix.columns[item_indices].tolist()
print("Recommended Items:", recommendations)

[0 1]
Recommended Items: ['A', 'B']


In [34]:
def item_based_recommender(df, target_item, top_n=2):
    """
    Item-based collaborative filtering recommender system.

    Parameters:
    - df: pandas DataFrame with 'user_id', 'item_id', and 'rating' columns.
    - target_item: item for which recommendations are sought.
    - top_n: number of top items to recommend.

    Returns:
    - recommendations: a list of top_n recommended items.
    """
    # Pivot the DataFrame to create a user-item matrix
    user_item_matrix = df.pivot_table(index='user_id', columns='item_id', values='quality', fill_value=0)  # df["rating"] = df["quality"]
    # Calculate cosine similarity between items
    item_similarity = cosine_similarity(user_item_matrix.T)

    # Retrieve the target item's similarity scores with all other items
    target_item_index = user_item_matrix.columns.get_loc(target_item)
    target_item_similarity = item_similarity[target_item_index]

    # Get indices of items sorted by similarity
    item_indices = target_item_similarity.argsort()[:-top_n-1:-1]

    # Get recommended item ids
    recommendations = user_item_matrix.columns[item_indices].tolist()

    return recommendations

In [35]:
target_item = 714723
recommended_products = item_based_recommender(df, target_item, top_n=4)
present_recommended_products(recommended_products)

**Currently trending products**

Recommended item 1/4: product 714723
category='bottoms'
User reviews:
- Adorable skirt. Beautiful fabric and flow to it. My new fave.
- This skirt fit me exactly right  it was snug but not too tight around my waist and then laid perfectly down to my knees.  The skirt swishes and sways when you walk and is so fun to twirl in.  My only problem with the skirt is that it has a lot a lot a lot of fabric (but of course I don't really mind that because that gives the skirt all the life it has).  This is my new favorite skirt and it's my favorite thing to dance in.
- Love this skirt! It's even better than I hoped for and I'm very pleased with the quality.
...

Recommended item 2/4: product 731618
category='bottoms'
User reviews:
- I have been an almost completely consistent medium for as long as I have been shopping on Modcloth.  So of course, I ordered a medium in this skirt.  This skirt had a completely inelastic waistband and zipper closure and I could barel

## User-based filtering

In [36]:
df_sample

Unnamed: 0,user_id,item_id,rating
0,1,A,5
1,1,B,4
2,2,A,3
3,2,C,2
4,3,C,4
5,3,D,5
6,4,B,1
7,4,D,3


In [37]:
# Pivot the DataFrame to create a user-item matrix
user_item_matrix = df_sample.pivot_table(index='user_id', columns='item_id', values='rating', fill_value=0)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity

array([[1.        , 0.64972212, 0.        , 0.19754592],
       [0.64972212, 1.        , 0.34651847, 0.        ],
       [0.        , 0.34651847, 1.        , 0.7407972 ],
       [0.19754592, 0.        , 0.7407972 , 1.        ]])

In [38]:
user_item_matrix

item_id,A,B,C,D
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,4.0,0.0,0.0
2,3.0,0.0,2.0,0.0
3,0.0,0.0,4.0,5.0
4,0.0,1.0,0.0,3.0


In [39]:
item_similarity

array([[1.        , 0.83189033, 0.2300895 , 0.        ],
       [0.83189033, 1.        , 0.        , 0.12478355],
       [0.2300895 , 0.        , 1.        , 0.76696499],
       [0.        , 0.12478355, 0.76696499, 1.        ]])

In [40]:
print("item matrix")
print(user_item_matrix.T)
print("")
print("user matrix")
print(user_item_matrix)

item matrix
user_id    1    2    3    4
item_id                    
A        5.0  3.0  0.0  0.0
B        4.0  0.0  0.0  1.0
C        0.0  2.0  4.0  0.0
D        0.0  0.0  5.0  3.0

user matrix
item_id    A    B    C    D
user_id                    
1        5.0  4.0  0.0  0.0
2        3.0  0.0  2.0  0.0
3        0.0  0.0  4.0  5.0
4        0.0  1.0  0.0  3.0


In [41]:
# Retrieve the target user's similarity scores with all other users
target_user = 1
target_user_index = user_item_matrix.index.get_loc(target_user)
target_user_similarity = user_similarity[target_user_index]

# Get indices of users sorted by similarity
top_n = 3
user_indices = target_user_similarity.argsort()[:-top_n-1:-1]
user_indices

array([0, 1, 3])

In [42]:
user_item_matrix.iloc[user_indices].mean()

item_id
A    2.666667
B    1.666667
C    0.666667
D    1.000000
dtype: float64

In [43]:
# Get recommended item ids based on similar users

#recommendations = 
user_item_matrix.iloc[user_indices].mean().sort_values(ascending=False).index.tolist()


['A', 'B', 'D', 'C']

In [44]:
user_item_matrix

item_id,A,B,C,D
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,4.0,0.0,0.0
2,3.0,0.0,2.0,0.0
3,0.0,0.0,4.0,5.0
4,0.0,1.0,0.0,3.0


In [None]:
def user_based_recommender(df, target_user, top_n=2):
    """
    User-based collaborative filtering recommender system.

    Parameters:
    - df: pandas DataFrame with 'user_id', 'item_id', and 'rating' columns.
    - target_user: user for whom recommendations are sought.
    - top_n: number of top items to recommend.

    Returns:
    - recommendations: a list of top_n recommended items.
    """
    # Pivot the DataFrame to create a user-item matrix
    df = df[~df["quality"].isna()]
    user_item_matrix = df.pivot_table(index='user_id', columns='item_id', values='quality', fill_value=0)

    # Calculate cosine similarity between users
    user_similarity = cosine_similarity(user_item_matrix)

    # Retrieve the target user's similarity scores with all other users
    target_user_index = user_item_matrix.index.get_loc(target_user)
    target_user_similarity = user_similarity[target_user_index]

    # Get indices of users sorted by similarity
    user_indices = target_user_similarity.argsort()[:-top_n-1:-1]

    # Get recommended item ids based on similar users
    recommendations = user_item_matrix.iloc[user_indices].mean().sort_values(ascending=False).index.tolist()

    return recommendations

# Example usage
target_user = 320458
recommended_items = user_based_recommender(df, target_user, top_n=3)
print("Recommended Items:", recommended_items[:3])

In [None]:
present_recommended_products(recommended_items[:3])

## Hybrid Filtering

In [None]:
# Collaborative Filtering
collaborative_recommendations = user_based_recommender(df, target_user=320458, top_n=3)

In [None]:
collaborative_recommendations = collaborative_recommendations[:3]

In [None]:
collaborative_recommendations

In [None]:
# Content-Based Filtering

content_recommendations = content_based_recommender(df_grouped, user_input='Knee-long skirt made of silk', top_n=len(df_grouped["item_id"].unique()))

In [None]:
content_recommendations = content_recommendations[:3]

In [None]:
content_recommendations

In [None]:
# Hybrid Recommendations (Combining Collaborative and Content-Based)
# Union - https://uk.wikipedia.org/wiki/Об%27єднання_множин
top_n = 3

hybrid_recommendations = set(collaborative_recommendations).union(set(content_recommendations)) 

hybrid_recommendations = [item for item in hybrid_recommendations]

print("Recommended Items:", hybrid_recommendations[:top_n])


In [None]:
present_recommended_products(hybrid_recommendations[:top_n])