# Description

# 0. Setting Up the Env

## 0.1 Import Packages

In [14]:
import numpy as np
import pandas as pd
import random

from sklearn.metrics.pairwise import cosine_similarity

## 0.2 Constants

In [5]:
np.random.seed(99)

# 1. Data

**Data Descriptions**
* There are 10 Users
* 10 Items
* Users with a history of buying items(Only Once) & 1 Rating

In [18]:
 
users = ["User" + str(i) for i in range(1,11)]
items = ["Item" + str(i) for i in range(1,11)]
ratings = np.random.randint(1, 6, size=(10, 10))  

df = pd.DataFrame(ratings, index=users, columns=items)
print("Ratings Matrix:\n", df)

Ratings Matrix:
         Item1  Item2  Item3  Item4  Item5  Item6  Item7  Item8  Item9  Item10
User1       3      5      4      2      5      1      4      5      5       3
User2       4      1      2      1      3      2      2      1      3       5
User3       2      2      3      3      1      1      1      5      4       2
User4       4      3      4      3      5      2      4      2      4       5
User5       2      3      3      2      3      1      4      1      2       1
User6       5      1      3      4      4      3      1      5      3       2
User7       2      5      1      1      5      5      5      1      1       4
User8       3      4      4      2      2      2      1      2      5       4
User9       4      5      4      1      3      1      5      3      5       5
User10      3      3      4      3      3      4      2      2      4       2


**Data Descriptions for lvl 2 data**
* There are 10 Users
* 10 Items
* Users with a history of buying items, mre than once, and not consistent across users

In [16]:
num_rows = 1000
num_users = len(users)

users_list = users.copy()

users_list += random.choices(users, k=num_rows - len(users_list))

random.shuffle(users_list)

items = random.choices(items, k=num_rows)
ratings = np.random.randint(1, 6, size=num_rows)  # 1 to 5 inclusive

df2 = pd.DataFrame({
    'users_list': users_list,
    'items': items,
    'ratings': ratings
})



In [19]:

print(df2.head())
print("\nUsers count:\n", df2['users_list'].value_counts())

  users_list  items  ratings
0      User8  Item2        2
1      User4  Item8        5
2     User10  Item5        4
3      User1  Item3        1
4      User5  Item3        1

Users count:
 users_list
User6     118
User5     111
User3     109
User1     100
User9     100
User4      99
User7      98
User2      97
User10     85
User8      83
Name: count, dtype: int64


In [21]:
df2_piv = df2.pivot_table(index='users_list', columns='items', values='ratings', aggfunc='mean', fill_value=0)
df2_piv.head()

items,Item1,Item10,Item2,Item3,Item4,Item5,Item6,Item7,Item8,Item9
users_list,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
User1,3.333333,5.0,2.9,2.916667,3.4,2.625,3.333333,3.0,3.363636,2.9
User10,3.25,2.727273,3.25,2.25,4.166667,3.714286,2.666667,3.625,2.9,2.181818
User2,2.8,2.818182,2.461538,4.0,2.0,3.333333,3.636364,3.578947,4.428571,2.714286
User3,2.8,3.25,2.625,2.785714,2.777778,3.363636,3.0625,3.083333,3.125,3.083333
User4,2.5,3.333333,3.090909,3.357143,3.0,2.9,3.666667,2.6,3.909091,2.888889


# 2. Similarity Lvl 1

In [10]:
user_similarity = cosine_similarity(df)
user_similarity_df = pd.DataFrame(user_similarity, index=users, columns=users)
user_similarity_df.head(10)

Unnamed: 0,User1,User2,User3,User4,User5,User6,User7,User8,User9,User10
User1,1.0,0.868037,0.836455,0.822474,0.707107,0.847459,0.874044,0.7,0.746203,0.847935
User2,0.868037,1.0,0.847625,0.876762,0.844672,0.887904,0.855193,0.840961,0.74366,0.975024
User3,0.836455,0.847625,1.0,0.808171,0.852058,0.957824,0.968246,0.745356,0.81574,0.816122
User4,0.822474,0.876762,0.808171,1.0,0.847791,0.809641,0.812605,0.820157,0.726105,0.830223
User5,0.707107,0.844672,0.852058,0.847791,1.0,0.858008,0.867528,0.789603,0.741215,0.850492
User6,0.847459,0.887904,0.957824,0.809641,0.858008,1.0,0.954097,0.83205,0.827837,0.881906
User7,0.874044,0.855193,0.968246,0.812605,0.867528,0.954097,1.0,0.721688,0.748807,0.814152
User8,0.7,0.840961,0.745356,0.820157,0.789603,0.83205,0.721688,1.0,0.88123,0.873742
User9,0.746203,0.74366,0.81574,0.726105,0.741215,0.827837,0.748807,0.88123,1.0,0.813512
User10,0.847935,0.975024,0.816122,0.830223,0.850492,0.881906,0.814152,0.873742,0.813512,1.0


In [11]:
# Users similar to User 1
user1_ratings = df.loc['User1']
similar_users = user_similarity_df['User1'].sort_values(ascending=False).index[1:]  # Exclude User1 itself
similar_users

Index(['User7', 'User2', 'User10', 'User6', 'User3', 'User4', 'User9', 'User5',
       'User8'],
      dtype='object')

In [12]:
# Get weighted ratings from similar users
recommendations = []
for item in df.columns:
    weighted_sum = 0
    total_similarity = 0
    for user in similar_users:
        if pd.notna(df.loc[user, item]):
            weighted_sum += user_similarity_df.loc['User1', user] * df.loc[user, item]
            total_similarity += user_similarity_df.loc['User1', user]
    if total_similarity > 0:
        recommendations.append((item, weighted_sum / total_similarity))
    else:
        recommendations.append((item, 0))
 
recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)
print("\nRecommendations for User1:")
for item, score in recommendations:
    print(f"{item}: {score:.2f}")


Recommendations for User1:
Item10: 3.62
Item7: 3.51
Item5: 3.37
Item6: 3.19
Item9: 3.14
Item8: 3.07
Item1: 3.06
Item3: 2.85
Item4: 2.61
Item2: 2.16


# 3. Similarity Lvl 2

In [22]:
user_similarity = cosine_similarity(df2_piv)
user_similarity_df = pd.DataFrame(user_similarity, index=users, columns=users)
user_similarity_df.head(10)

Unnamed: 0,User1,User2,User3,User4,User5,User6,User7,User8,User9,User10
User1,1.0,0.958544,0.952111,0.982845,0.980174,0.980379,0.973894,0.971778,0.969638,0.982492
User2,0.958544,1.0,0.94045,0.976893,0.960683,0.986213,0.980654,0.97554,0.967683,0.973015
User3,0.952111,0.94045,1.0,0.979174,0.982911,0.972138,0.951128,0.980083,0.965969,0.976702
User4,0.982845,0.976893,0.979174,1.0,0.989793,0.995873,0.98604,0.991287,0.98573,0.989553
User5,0.980174,0.960683,0.982911,0.989793,1.0,0.986151,0.982215,0.993137,0.987753,0.993414
User6,0.980379,0.986213,0.972138,0.995873,0.986151,1.0,0.987773,0.987332,0.986431,0.991445
User7,0.973894,0.980654,0.951128,0.98604,0.982215,0.987773,1.0,0.988645,0.988238,0.978579
User8,0.971778,0.97554,0.980083,0.991287,0.993137,0.987332,0.988645,1.0,0.986637,0.985911
User9,0.969638,0.967683,0.965969,0.98573,0.987753,0.986431,0.988238,0.986637,1.0,0.986603
User10,0.982492,0.973015,0.976702,0.989553,0.993414,0.991445,0.978579,0.985911,0.986603,1.0


In [23]:
# Users similar to User 1
user1_ratings = df2_piv.loc['User1']
similar_users = user_similarity_df['User1'].sort_values(ascending=False).index[1:]  # Exclude User1 itself
similar_users

Index(['User4', 'User10', 'User6', 'User5', 'User7', 'User8', 'User9', 'User2',
       'User3'],
      dtype='object')

In [24]:
# Get weighted ratings from similar users
recommendations = []
for item in df.columns:
    weighted_sum = 0
    total_similarity = 0
    for user in similar_users:
        if pd.notna(df.loc[user, item]):
            weighted_sum += user_similarity_df.loc['User1', user] * df.loc[user, item]
            total_similarity += user_similarity_df.loc['User1', user]
    if total_similarity > 0:
        recommendations.append((item, weighted_sum / total_similarity))
    else:
        recommendations.append((item, 0))
 
recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)
print("\nRecommendations for User1:")
for item, score in recommendations:
    print(f"{item}: {score:.2f}")


Recommendations for User1:
Item9: 3.44
Item10: 3.33
Item5: 3.23
Item1: 3.22
Item3: 3.11
Item2: 3.00
Item7: 2.78
Item8: 2.44
Item6: 2.34
Item4: 2.23


# End