In [1]:
import pandas as pd
from pathlib import Path
from sklearn.metrics.pairwise import cosine_similarity

> src: https://developers.google.com/machine-learning/recommendation/overview/candidate-generation

| Type                   | Definition                                                                                                     | Example                                                                                       |
|------------------------|----------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------|
| content-based filtering | Uses similarity between items to recommend items similar to what the user likes.                         | If user A watches two cute cat videos, then the system can recommend cute animal videos to that user. |
| collaborative filtering| Uses similarities between queries and items simultaneously to provide recommendations.                   | If user A is similar to user B, and user B likes video 1, then the system can recommend video 1 to user A (even if user A hasn’t seen any videos similar to video 1). |

In [2]:
DATA_DIR = Path("../data/steam")
PREPRO_WORK_DIR = DATA_DIR / "preprocessed"

PIVOT_USERS_GAMES_RECSCORE_DF_PATH = PREPRO_WORK_DIR / "pivot_users_games_recscore_df.pkl"

In [3]:
df = pd.read_pickle(PIVOT_USERS_GAMES_RECSCORE_DF_PATH)
df

item_id,10,100,10000,1002,100400,100410,10080,10090,100970,10100,...,9970,99700,9980,99810,99830,99890,9990,99900,99910,99920
steam_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561197960304530,165023.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,161700.0,...,-1.0,161700.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
76561197960493731,40514.0,-1.0,40500.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,40523.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
76561197960540939,110300.0,110300.0,110300.0,-1.0,-1.0,-1.0,-1.0,110300.0,-1.0,110300.0,...,-1.0,110300.0,-1.0,110300.0,110300.0,-1.0,-1.0,-1.0,-1.0,-1.0
76561197960889906,59036.0,59000.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,59019.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
76561197961040696,41206.0,41200.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76561198280059944,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
76561198295803313,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1101.0,-1.0,-1.0
76561198297906261,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
76561198299095634,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [4]:
df.keys()

Index(['10', '100', '10000', '1002', '100400', '100410', '10080', '10090',
       '100970', '10100',
       ...
       '9970', '99700', '9980', '99810', '99830', '99890', '9990', '99900',
       '99910', '99920'],
      dtype='object', name='item_id', length=9750)

In [5]:
users_similarity_matrix = cosine_similarity(df)
users_similarity_matrix_df = pd.DataFrame(users_similarity_matrix)
users_similarity_matrix_df.set_index(df.index.copy(), inplace=True)
users_similarity_matrix_df.columns = users_similarity_matrix_df.index.copy()
users_similarity_matrix_df

steam_id,76561197960304530,76561197960493731,76561197960540939,76561197960889906,76561197961040696,76561197961397085,76561197962231295,76561197963586191,76561197963904076,76561197964273287,...,76561198236893796,76561198255348595,76561198255497369,76561198256756969,76561198267779591,76561198280059944,76561198295803313,76561198297906261,76561198299095634,76561198308665434
steam_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561197960304530,1.000000,0.321423,0.406916,0.421869,0.337723,0.059089,0.180903,0.245719,0.357208,0.230440,...,0.032709,0.031759,0.111616,0.073518,0.044215,0.013690,0.034342,0.068080,0.073661,0.150407
76561197960493731,0.321423,1.000000,0.354757,0.310554,0.235436,0.127672,0.153857,0.321687,0.318094,0.254813,...,0.058294,0.018160,0.126429,0.108746,0.097926,0.059366,0.079019,0.048051,0.091338,0.120522
76561197960540939,0.406916,0.354757,1.000000,0.401291,0.290515,0.066260,0.181876,0.335350,0.386085,0.214414,...,0.039111,0.008286,0.108439,0.061138,0.050458,0.021097,0.040012,0.055466,0.077102,0.133507
76561197960889906,0.421869,0.310554,0.401291,1.000000,0.370875,0.093861,0.164392,0.262713,0.315749,0.286430,...,0.056922,0.057140,0.136484,0.131478,0.086908,0.068541,0.067918,0.050888,0.108809,0.177006
76561197961040696,0.337723,0.235436,0.290515,0.370875,1.000000,0.100510,0.168780,0.246494,0.384994,0.223243,...,0.049608,0.015884,0.105722,0.152502,0.105237,0.052352,0.090280,0.025901,0.074715,0.186417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76561198280059944,0.013690,0.059366,0.021097,0.068541,0.052352,-0.001044,0.131011,0.040377,0.034863,0.069218,...,0.260914,-0.000276,0.168565,0.592343,0.471239,1.000000,0.577135,0.039937,0.093603,-0.000402
76561198295803313,0.034342,0.079019,0.040012,0.067918,0.090280,-0.000530,0.200952,0.061756,0.048125,0.070952,...,0.305969,-0.000247,0.202759,0.369767,0.724533,0.577135,1.000000,0.077634,0.042761,-0.000735
76561198297906261,0.068080,0.048051,0.055466,0.050888,0.025901,-0.000989,-0.001196,0.021430,0.084611,0.023282,...,0.085556,-0.000297,0.159633,0.071508,-0.000365,0.039937,0.077634,1.000000,0.078047,0.014646
76561198299095634,0.073661,0.091338,0.077102,0.108809,0.074715,0.153992,0.077995,0.073852,0.069870,0.054704,...,0.062206,-0.000314,0.125550,0.134271,0.020230,0.093603,0.042761,0.078047,1.000000,-0.000373


In [6]:
games_similarity_matrix = cosine_similarity(df.transpose())
games_similarity_matrix_df = pd.DataFrame(games_similarity_matrix)
games_similarity_matrix_df.set_index(df.keys().copy(), inplace=True)
games_similarity_matrix_df.columns = games_similarity_matrix_df.index.copy()
games_similarity_matrix_df

item_id,10,100,10000,1002,100400,100410,10080,10090,100970,10100,...,9970,99700,9980,99810,99830,99890,9990,99900,99910,99920
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,1.000000,0.924861,0.022490,0.449665,-0.000614,0.080107,-0.000165,0.620438,0.015398,0.685553,...,0.606150,0.752650,0.655018,0.489799,0.080986,0.615927,0.613826,0.702233,0.746313,0.586221
100,0.924861,1.000000,0.022484,0.506052,-0.000598,0.068133,-0.000161,0.689477,0.017406,0.757256,...,0.681961,0.760154,0.693718,0.534951,0.048248,0.690537,0.690350,0.714516,0.757977,0.659576
10000,0.022490,0.022484,1.000000,0.003623,-0.000090,-0.000017,-0.000026,0.245918,0.073416,0.068372,...,-0.000007,0.246246,0.221046,0.299504,0.170324,-0.000008,-0.000008,0.178232,-0.000013,-0.000008
1002,0.449665,0.506052,0.003623,1.000000,-0.000056,-0.000010,-0.000016,0.605565,-0.000046,0.631355,...,0.731393,0.529152,0.605030,0.001583,-0.000010,0.716236,0.712071,0.495327,0.591294,0.722563
100400,-0.000614,-0.000598,-0.000090,-0.000056,1.000000,-0.000092,-0.000054,-0.000395,-0.000076,-0.000132,...,-0.000078,-0.000296,-0.000140,0.021656,-0.000117,-0.000098,-0.000102,-0.000530,-0.000189,-0.000099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99890,0.615927,0.690537,-0.000008,0.716236,-0.000098,-0.000015,-0.000027,0.799122,-0.000081,0.849991,...,0.964074,0.725584,0.814553,0.464009,0.003944,1.000000,0.938608,0.621702,0.741774,0.886079
9990,0.613826,0.690350,-0.000008,0.712071,-0.000102,-0.000015,-0.000028,0.823156,-0.000084,0.853863,...,0.973589,0.715008,0.792960,0.486254,-0.000014,0.938608,1.000000,0.614936,0.720842,0.880848
99900,0.702233,0.714516,0.178232,0.495327,-0.000530,0.059554,-0.000143,0.603264,-0.000436,0.676072,...,0.631043,0.727844,0.739703,0.427343,0.001935,0.621702,0.614936,1.000000,0.832646,0.688910
99910,0.746313,0.757977,-0.000013,0.591294,-0.000189,-0.000026,-0.000051,0.623633,-0.000156,0.806658,...,0.740399,0.785748,0.806699,0.382866,-0.000024,0.741774,0.720842,0.832646,1.000000,0.818042


In [7]:
users_similarity_matrix.min(), users_similarity_matrix.max()

(-0.006081349794668391, 1.0000000000000142)

In [8]:
games_similarity_matrix.min(), games_similarity_matrix.max()

(-0.009969048501835305, 1.000000000000014)

In [9]:
# from sklearn.neighbors import NearestNeighbors

# # Step 1: Prepare your user-item matrix and similarity matrix
# # Step 2: Create a NearestNeighbors model
# k = 10  # Number of neighbors to consider
# model = NearestNeighbors(n_neighbors=k, metric='cosine')
# model.fit(df)  # Use user-user or item-item similarity matrix

# # Step 3: For a new user (cold start), recommend popular items or use demographics/context
# # For existing users, find K-nearest neighbors and recommend items

# # Replace with the target user's ID
# user_id = "76561197960304530"
# user_history = df.loc[user_id, :].to_numpy()  # User's interaction history
# neighbors = model.kneighbors([user_history], n_neighbors=k)

# # Step 4: Calculate recommendations based on neighbors' interactions
# recommended_items = []
# for neighbor_id in neighbors:
#     neighbor_history = df.loc[neighbor_id, :].to_numpy()
#     for item_id in range(len(neighbor_history)):
#         if neighbor_history[item_id] == 1 and user_history[item_id] == 0:
#             recommended_items.append(item_id)

# # Step 5: Return the top 10 recommended items
# top_n_recommendations = recommended_items[:10]
# top_n_recommendations

In [10]:
# # game_id = "10"
# # game_scores = df.loc[:, game_id].to_numpy()

# user_id = "76561197960304530"
# user_per_game_scores = df.loc[user_id, :].to_numpy()

# kneighbors_distances, kneighbors_indices = model.kneighbors(
#     X=[user_per_game_scores],
#     n_neighbors=5
# )

# print(f"{kneighbors_distances=}")
# print(f"{kneighbors_indices=}")
# print(f"{df.keys()[kneighbors_indices]}")