In [446]:
import sys
sys.path.append("..")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from reco_systems.filter import filter_df
from reco_systems.user_game_matrix import *

from surprise import NMF
from surprise import Dataset
from surprise.reader import Reader
from sklearn.cluster import KMeans
import seaborn as sns
from sklearn.manifold import TSNE

from PIL import ImageColor
import plotly.express as px

sns.set_theme()

In [447]:
folder = "../database_cleaned"
jeux_clean  = pd.read_csv(f"{folder}/jeux_clean.csv", index_col=0)
avis_clean  = pd.read_csv(f"{folder}/avis_clean.csv", index_col=0)
users       = pd.read_csv(f"{folder}/users.csv", index_col=0)

min_reviews = 10 # change to set one
# filter data with the minimum reviews
rev_filter = filter_df(avis_clean, min_reviews)

11201 10549
2252 2676
1957 2618
1947 2616
1946 2615
1945 2614


## NNMF -> 20 latent factors

In [448]:
model = NMF(n_factors=20, random_state=42, biased=False, reg_pu= 0.1, reg_qi= 0.1)
data = Dataset.load_from_df(rev_filter[["User id", "Game id", "Rating"]], reader=Reader(rating_scale=(0, 10)))
trainset = data.build_full_trainset()
nmf = model.fit(trainset)

# Extract matrices
U = nmf.pu  # User-feature matrix (W)
G = nmf.qi  # Item-feature matrix (H)

games_ids = np.array([trainset.to_raw_iid(i) for i in range(len(G))])
users_ids = np.array([trainset.to_raw_uid(u) for u in range(len(U))])
G = G[np.argsort(games_ids), :]
U = U[np.argsort(users_ids), :]

### 30 Clusters and t-SNE (perplexity = $40$)

In [449]:
sns.set_theme(rc={"figure.figsize":(6, 5)})
NB_CLUSTERS = 30
kmeans = KMeans(n_clusters=NB_CLUSTERS, random_state=42) 
distances = kmeans.fit_transform(G) 
game_labels = kmeans.labels_ 
G_embedded = TSNE(n_components=3, perplexity=40, max_iter=2000, random_state=1).fit_transform(G)

### Pushing clusters away from each other

In [233]:
centers_pos = {

    24 : [0, 0, 10],  # best  
    10 : [0, 0, -10], # worst 

    1  : [-20, -20, -20], # collecte 
    26 : [-20+4, -20+4, -20],
    17 : [-20+4, -20+4, -20+4],

    28 : [20, -20, -20],  # atmosphere, settings 
    7  : [16, -16, -20],
    6  : [18, -18, -16],

    12  : [-20, 20, -20], # civilisation
    18  : [-16, 16, -20], 
    21  : [-18, 18, -16], 

    5   : [-20, 20, 20], # guerre

    3   : [0, 20, 0], # capture territoire 
    11  : [0, 16, -2], 
    27  : [-2, 20, 2], 

    2   :[20-4, 20+4, 20], # complexe
    9   :[20-2, 20-2, 20-2], 
    13  :[20+4, 20-4, 20], 
    25  :[20+4, 20+4, 20-2], 
    29  :[20, 20, 20+4], 

    22  : [20, -20, 23], # logique, déduction
    23  : [20, -20, 20],

    4 : [0, -20, -2], # great visuals
    8 : [0, -20, 2], 

    14 : [-20, -2, -2], # rapide & tactique
    20 : [-20, 2, 2],
 
    15 : [20, -2, 0], # eurogames 
    16 : [20, 2, 0], 

    0 : [20, 20, -16], # construction & expansion
    19 : [20, 20, -20],
}

clusters_groups = [[0, 19], [22, 23], [3, 11, 27], [2, 9, 13, 25, 29], [14, 20], [15, 16], [4, 8], [12, 18, 21], [1, 8, 17, 26], [6, 7, 28]]

In [234]:
def translate(centers_pos, centers, points, labels):
    for cluster, center in enumerate(centers):
        translate_vector = np.array(centers_pos[cluster]) * 10 - center

        mask = (labels == cluster)
        points[mask] += translate_vector
        centers[cluster] += translate_vector

def recalc_centers(clusters_groups, translated_centers):
    groups_centers = []
    for group in clusters_groups:
        groups_centers.append(translated_centers[group, :].mean(axis=0))
    return np.array(groups_centers)

def push_points(centers, centers_groups, points, labels):
    for group_idx, group in enumerate(clusters_groups):
        for cluster in group:
            cluster_mask = (labels == cluster)
            direction = centers[cluster] - centers_groups[group_idx]
            if np.linalg.norm(direction) == 0:
                continue
            unit_vector = direction / np.linalg.norm(direction)
            points[cluster_mask] += unit_vector * 20

centers = []
for cluster in np.sort(np.unique(kmeans.labels_)):
    mask = (kmeans.labels_ == cluster)
    centers.append(G_embedded[mask].mean(axis=0))
centers = np.array(centers)

In [None]:
# Move centers
centers_copy = centers.copy()
translated_points = G_embedded.copy()

translate(centers_pos, centers_copy, translated_points, kmeans.labels_)

# Calc center for each cluster group
centers_groups = recalc_centers(clusters_groups, centers_copy)

# Push points away from each other
push_points(centers_copy, centers_groups, translated_points, kmeans.labels_)
np.save("tsne_pushed.npy", translated_points)

### Generate data for app (t-SNE)

In [268]:
matrix_ratings, mask_ratings, users_table, games_table = get_matrix_user_game(rev_filter)
users_table

0          0
1          1
2          2
3          3
4          4
        ... 
1940    9509
1941    9583
1942    9596
1943    9701
1944    9965
Name: User id, Length: 1945, dtype: int64

In [239]:
clusters = kmeans.labels_
themes = {
    "Tout les clusters": [],
    "🧩🕵️ Logique et Déduction": [22, 23],
    "🏗️🏰 Construction & expansion": [0, 19],
    "🧠⚡ Rapide & Tactique": [14, 20],
    "📚⏳ Longs & complexes": [2, 9, 13, 25, 29],
    "💎🃏 Collecte":  [1, 17, 26],
    "🪖💣 Guerre": [5],
    "🏛️🎲 Eurogames": [15, 16],
    "🌍🏺 Civilisation": [12, 18, 21],
    "🗡️🚩 Capture territoire": [3, 11, 27],
    "🌅🖼️ Superbes visuels": [4, 8],
    "🐉📜 Culture | Fantaisie": [6, 7, 28],
    "👎 Coups de blues": [10],
    "🏆 Coups de coeurs": [24],
}
themes_inverse = {cluster: theme for theme, clusters in themes.items() for cluster in clusters}


colors = {22: "#6B4C9A", 23: "#B39DDB",  # logique, déductions
          0: "#99582a", 19: "#ffe6a7",  # construction expansion
          14: "#fb8500", 20: "#ffb703",  # rapide, tactique
          2: "#48cae4", 9: "#00b4d8", 13: "#0096c7", 25: "#023e8a", 29: "#0077b6",
          1: "#1e96fc", 17: "#fcf300", 26: "#a2d6f9",  # collecte
          5: "#4B4B4B",  # guerre
          12: "#8C6D31", 18: "#C2B280", 21: "#4C6B5C",  # civilisation
          15: "#4E79A7", 16: "#A6C8E0",  # eurogames
          3: "#E15759", 11: "#F28E8E", 27: "#B63A3A",  # capture territoire
          4: "#ffc300", 8: "#ffd60a",  # super visuals
          6: "#00509d", 7: "#38b000", 28: "#A77DC2",  # cultural, fantastic settings
          24: "#f00000",
          10: "#000000",
          }

colors_points = [list(ImageColor.getcolor(colors[cluster], "RGB")) for cluster in clusters]

In [None]:
color_palette_30 = [
    "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b",
    "#e377c2", "#7f7f7f", "#bcbd22", "#17becf", "#393b79", "#637939",
    "#8c6d31", "#843c39", "#7b4173", "#5254a3", "#6b6ecf", "#9c9ede",
    "#8ca252", "#b5cf6b", "#cedb9c", "#bd9e39", "#e7ba52", "#e7969c",
    "#ad494a", "#a55194", "#ce6dbd", "#de9ed6", "#6b6b6b", "#c7c7c7"
]
label_names = {str(i): f"Cluster {i}" for i in range(30)}

colors = [color_palette_30[i] for i in kmeans.labels_]
games_info = pd.DataFrame(data={"x":translated_points[:, 0], "y":translated_points[:, 1], "z":translated_points[:, 2],
                                "color":[color_palette_30[i] for i in kmeans.labels_],
                                "size":[1] * translated_points.shape[0], "cluster":kmeans.labels_})

df_all = pd.DataFrame(data={
    "game_id": games_table.values,

    "x": translated_points[:, 0].tolist(),
    "y": translated_points[:, 1].tolist(),
    "z": translated_points[:, 2].tolist(),

    "color": colors_points,
    "cluster": clusters,
    "name": [themes_inverse[cluster] for cluster in clusters],
    "game index":  games_table.index
})
df_all.to_json("games_info.json", orient="records", force_ascii=False)



'{"columns":["game_id","x","y","z","color","cluster","name","game index"],"index":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256

### Games for app

In [295]:
jeux_clean = jeux_clean[jeux_clean["Game id"].isin(rev_filter["Game id"])]
games = jeux_clean[["Game id", "Game name year", "Description", "Type", "Min number of players", "Max number of players", "Age min", "Age max"]]
np.sort(games["Max number of players"].unique())

array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
        12.,  14.,  15.,  16.,  18.,  20.,  23.,  24.,  29.,  36.,  50.,
        69.,  99., 100., 200., 999.,  nan])

In [296]:
ages = games[["Age min", "Age max"]]
ages_text = []
for age_min, age_max in ages.itertuples(index=False):
    txt = "-"
    if (not np.isnan(age_max)) and (not np.isnan(age_min)):
        if age_max <= 25:
            txt = f"{int(age_min)}-{int(age_max)}"
        else:
            txt = f"{int(age_min)}+"
    
    if np.isnan(age_max) and (not np.isnan(age_min)):
        txt = f"{int(age_min)}+"
    
    ages_text.append(txt)

In [297]:
players = games[["Min number of players", "Max number of players"]]
players_text = []
for pl_min, pl_max in players.itertuples(index=False):
    txt = "-"
    if (not np.isnan(pl_min)) and (not np.isnan(pl_max)):
        if pl_max <= 18:
            txt = f"{int(pl_min)}-{int(pl_max)}"
        else:
            txt = f"{int(pl_min)}+"
    
    if np.isnan(pl_max) and (not np.isnan(pl_min)):
        txt = f"{int(pl_min)}+"
    
    players_text.append(txt)
print(len(players_text))
players_text

2614


['2-6',
 '2-4',
 '2-7',
 '2-2',
 '2-4',
 '2-5',
 '3-6',
 '1-3',
 '1-2',
 '2-5',
 '1-2',
 '3-5',
 '1-2',
 '1-2',
 '1-2',
 '3-6',
 '2-6',
 '2-5',
 '2-10',
 '3-7',
 '2-7',
 '2-8',
 '2-8',
 '2-8',
 '3-7',
 '2-7',
 '2-7',
 '2-2',
 '2-2',
 '1-4',
 '2-2',
 '2-8',
 '3-5',
 '2-5',
 '1-2',
 '2-4',
 '1-2',
 '1-4',
 '1-2',
 '2-4',
 '2-5',
 '2-4',
 '2-4',
 '2-5',
 '2-5',
 '2-6',
 '3-5',
 '2-5',
 '3-5',
 '2-8',
 '1-5',
 '1-2',
 '3-6',
 '3-10',
 '3-6',
 '2-4',
 '2-4',
 '2-5',
 '2-5',
 '2-4',
 '1-2',
 '3-6',
 '2-6',
 '1-2',
 '2-2',
 '3-8',
 '1-2',
 '1-4',
 '2-2',
 '1-5',
 '1-5',
 '2+',
 '2-5',
 '2-5',
 '2-4',
 '2-2',
 '3-5',
 '2-5',
 '2-4',
 '2-4',
 '3-4',
 '2-5',
 '2-4',
 '2-5',
 '2-5',
 '2-5',
 '2-4',
 '2-6',
 '2-6',
 '2-6',
 '2-6',
 '3-5',
 '2-4',
 '1-2',
 '3-6',
 '2-4',
 '2-6',
 '2-6',
 '2-6',
 '2-4',
 '2-5',
 '1-5',
 '3-4',
 '3-5',
 '2-4',
 '1-1',
 '2-4',
 '2-10',
 '3-8',
 '1-2',
 '2-4',
 '1-2',
 '1-2',
 '1-4',
 '2-6',
 '2-2',
 '3-5',
 '2-2',
 '3-5',
 '2-4',
 '2-5',
 '2-5',
 '2-6',
 '2-5',
 '2-4'

In [298]:
means = rev_filter.groupby("Game id")["Rating"].mean().reset_index()
games = games.assign(Players=players_text, Age=ages_text)
games = games.merge(means, on="Game id")
games = games.drop(columns=["Age min", "Age max", "Min number of players", "Max number of players"])
games.loc[:, "Type"] = games["Type"].fillna("-")

games.loc[:, "Type"] = games["Type"].str.split("|").apply(lambda row:", ".join(row[:2]))
games = games.sort_values(by="Game id")
games["Game index"] = list(range(games.shape[0]))
#games
games

Unnamed: 0,Game id,Game name year,Description,Type,Players,Age,Rating,Game index
0,6,"1, 2 Truie ! (2010)",Chaque joueur essaie de se débarrasser le plus...,Jeux de cartes,2-6,6+,5.827273,0
1,15,10' to kill (2015),"""Dans quelle galère je me suis encore fourrée ...","Animaux, Bande dessinée",2-4,10+,7.428000,1
2,25,11 nimmt (2010),Aucune description,Jeux de cartes,2-7,8+,7.420000,2
3,28,"13 jours, la crise des missiles de Cuba, 1962 ...",Incarnez les deux surpuissances de la Guerre F...,"Card-driven, Politique",2-2,14+,8.460000,3
4,34,1775 - la révolution américaine (2013),Nous sommes en 1775. Outragées par les nouvell...,"Points d'action, Affrontement",2-4,14+,8.420000,4
...,...,...,...,...,...,...,...,...
2609,10507,À l'École des Fantômes (2003),But du jeu Être le premier joueur à effrayer l...,-,1-6,5+,6.060714,2609
2610,10508,À la Carte (2010),Ce jeu est la réactualisation (2009) de la pre...,"Jeux de plateau, Cuisine",3-4,8+,7.222000,2610
2611,10514,À la gloire d'Odin (2016),«À la Gloire d’Odin» raconte une saga épique à...,-,1-4,12+,8.529412,2611
2612,10531,Échecs,"Mettez en échec le roi de votre adversaire, mi...","Affrontement, Bluff",1-2,6+,7.436232,2612


## Users

In [485]:
users = pd.read_csv(f"{folder}/users.csv", index_col=0)
true_ratings, mask_ratings, users_table, games_table = get_matrix_user_game(rev_filter)

users_table = users_table.reset_index().rename(columns={"index":"User index"})
print(users_table)
users_count = rev_filter.groupby("User id")["Game id"].count().reset_index().sort_values("Game id").rename(columns={"Game id":"Number reviews"})
special_id = 9701
nb_users = {0:9, 1:10, 2:10, 3:7}

slices_max = [50, 100, 200, 1700]
slices_min = [10, 50, 100, 200]

users_ids = [special_id, 208, 201, 1191]
users_count.tail(10)
np.random.seed(1)
for i in range(len(slices_max)):
    mx, mn = slices_max[i], slices_min[i]
    slice_users = users_count[(users_count["Number reviews"] < mx) & (users_count["Number reviews"] >= mn) & (~users_count["User id"].isin(users_count))]["User id"]
    users_ids += slice_users.sample(nb_users[i], replace=False).values.tolist()

users_indices = users_table[users_table["User id"].isin(users_ids)].index
users_indices

      User index  User id
0              0        0
1              1        1
2              2        2
3              3        3
4              4        4
...          ...      ...
1940        1940     9509
1941        1941     9583
1942        1942     9596
1943        1943     9701
1944        1944     9965

[1945 rows x 2 columns]


Index([  40,   53,   62,   63,  117,  121,  159,  229,  305,  362,  364,  386,
        400,  416,  418,  485,  550,  561,  570,  617,  624,  636,  657,  690,
        722,  723,  856,  896,  982, 1032, 1037, 1104, 1114, 1385, 1470, 1505,
       1632, 1774, 1829, 1943],
      dtype='int64')

### Predicted ratings

In [506]:
pred = U @ G.T - 2
pred = np.clip(pred, a_min=0, a_max=10) / (10 - 2)
pred = pred[users_indices]
np.save("nnmf_prediction.npy", pred)
pred

array([[0.3184538 , 0.52102016, 0.66611314, ..., 0.74587768, 0.59816444,
        0.6166772 ],
       [0.45316331, 0.56937741, 0.5383043 , ..., 0.88282491, 0.73944015,
        0.52022415],
       [0.52480835, 0.60787463, 0.69184887, ..., 0.74459157, 0.58499953,
        0.65373782],
       ...,
       [0.34309405, 0.4976013 , 0.56969413, ..., 0.82456165, 0.74331712,
        0.55048076],
       [0.36149858, 0.61725551, 0.77966745, ..., 0.85989479, 0.70315946,
        0.6741162 ],
       [0.45720069, 0.41706234, 0.51188721, ..., 0.61161684, 0.65993214,
        0.4835151 ]])

In [486]:
users = users[users["User id"].isin(rev_filter["User id"])].sort_values("User id")
users = users.merge(users_table, on="User id").merge(users_count, on="User id")
users = users[users["User id"].isin(users_ids)]
users

Unnamed: 0,Username,User id,User index,Number reviews
40,limp,83,40,781
53,Oystercult,98,53,254
62,jquelin,107,62,78
63,shaudron,108,63,64
117,Gigi,201,117,619
121,PtitJu,208,121,1111
159,adesco,334,159,12
229,harveyfox,553,229,113
305,léo(nard),732,305,263
362,Samy974,934,362,296


In [487]:
# ratings = pred[0, :]
# red, green, blue = (255 * (1 - ratings)).astype(int), (255 * ratings).astype(int), np.zeros(ratings.shape, dtype=int)

# games["color"] = [[r, g, b] for r, g, b in zip(red, green, blue)]
# green[green > 255]

In [488]:
users_games = rev_filter[["User id", "Game id"]].merge(games[["Game id", "Game index"]], on="Game id")[["User id", "Game index"]]
users_games["Game index"] = users_games["Game index"].astype(int)
users_games = users_games.groupby("User id").agg(list).reset_index().rename(columns={"Game index":"Rated games index"})
users = users.merge(users_games, on="User id")
users

Unnamed: 0,Username,User id,User index,Number reviews,Rated games index
0,limp,83,40,781,"[2301, 705, 2320, 264, 2548, 626, 37, 1087, 19..."
1,Oystercult,98,53,254,"[2280, 2435, 731, 2302, 410, 2127, 621, 1166, ..."
2,jquelin,107,62,78,"[2280, 2127, 2336, 2090, 1926, 1693, 2064, 842..."
3,shaudron,108,63,64,"[2280, 1785, 1503, 450, 428, 364, 2329, 1296, ..."
4,Gigi,201,117,619,"[2280, 2021, 491, 641, 1776, 1722, 621, 2489, ..."
5,PtitJu,208,121,1111,"[1125, 1938, 2320, 427, 626, 101, 1906, 1923, ..."
6,adesco,334,159,12,"[1090, 2320, 2533, 2587, 1617, 1225, 1764, 252..."
7,harveyfox,553,229,113,"[2418, 2472, 1812, 1902, 1860, 2309, 1015, 201..."
8,léo(nard),732,305,263,"[1602, 1673, 410, 1903, 2054, 1579, 1248, 499,..."
9,Samy974,934,362,296,"[2419, 2559, 2566, 1854, 1376, 2493, 2564, 100..."


In [None]:
pred = U @ G.T
#print(pred.shape)
users_top_games = []
users_top_ratings = []
for user in users_indices:
    
    indices = np.arange(0, pred.shape[1]) # all indices
    rated_games = users[users["User index"] == user]["Rated games index"].item() # already rated games
    not_rated_games = np.setdiff1d(indices, rated_games)
    
    ratings = pred[user, :]
    ratings[rated_games] = 0

    top_games = np.argpartition(-ratings, kth=5)[:5]
    sorted_top_games = list(top_games[np.argsort(ratings[top_games])[::-1]])
    print(ratings[sorted_top_games])
    users_top_games.append([game for game in sorted_top_games])
    print(users[users["User index"] == user]["Username"].item(), sorted_top_games)
    games_ids = games_table[games_table.index.isin(sorted_top_games)].values

    print(jeux_clean[jeux_clean["Game id"].isin(games_ids)]["Game name year"])

    pred[user, rated_games] = true_ratings[user, rated_games].toarray()
    users_top_ratings.append([rating for rating in ratings])

#type(users_top_games[0])

[9.64534492 9.57323361 9.42244814 9.30627921 9.28515077]
limp [2247, 1713, 1703, 1867, 30]
['A Few Acres of Snow (2011)' 'Neuroshima Hex ! : Babel 13 (2008)'
 'Nieuw Amsterdam (2012)' 'Pique Prune (2010)'
 'Summoner Wars : le royaume déchu (2013)']
[10.07212633  9.79482371  9.72364569  9.66210001  9.65268943]
Oystercult [405, 2345, 440, 2327, 876]
['Cast News (2006)' 'Cheesy Gonzola (2008)'
 'Galaxy Trucker : La Grosse Extension (2010)' 'The New Era (2011)'
 'Tide of Iron (2007)']
[9.84975359 9.54199313 9.24925887 9.17635079 9.16814939]
jquelin [2434, 1381, 2267, 516, 2133]
['Concordia (2017)' 'Le Trône de Fer - JdS : la bataille des rois (2006)'
 'Shogun (2006)' 'TIKAL (2016)'
 'Twilight Imperium : Shattered Empire (2006)']
[10.9272204  10.72200774 10.63860353 10.34513921 10.27447129]
shaudron [231, 1195, 1799, 2467, 1867]
['Battle Cry (2000)' 'Kubb' 'Pandemic Legacy Saison 1 (2015)'
 'Pique Prune (2010)' 'Ursuppe : Frisch Abgeschmeckt (1998)']
[9.19355876 9.06388081 8.9383692  8.9078

In [497]:
pred = pred[users_indices, :]

In [500]:
indices = [2247, 1713, 1703, 1867, 30]
#[games_info["game index"].isin(indices)]
pred[0, indices]

array([9.64534492, 9.57323361, 9.42244814, 9.30627921, 9.28515077])

In [505]:
pred = U @ G.T - 2
pred = np.clip(pred, a_min=0, a_max=10) / (10 - 2)
pred = pred[users_indices]
pred[0, indices] * 8 + 2

array([9.64534492, 9.57323361, 9.42244814, 9.30627921, 9.28515077])

In [416]:
pred = np.clip(pred[users_indices], a_min=0, a_max=10) / 10
np.save("nnmf_prediction.npy", pred)

#pd.DataFrame({"Ratings":pred[users_indices].tolist(), "User index":users_indices})
#np.save("nnmf_prediction.npy", pred)
#print(pred)

#np.save("nnmf_prediction.npy", pred)

In [321]:
users_indices

Index([  40,   53,   62,   63,  117,  121,  159,  229,  305,  362,  364,  386,
        400,  416,  418,  485,  550,  561,  570,  617,  624,  636,  657,  690,
        722,  723,  856,  896,  982, 1032, 1037, 1104, 1114, 1385, 1470, 1505,
       1632, 1774, 1829, 1943],
      dtype='int64')

In [481]:
mapping = {user_index : i for i, user_index in enumerate(users_indices)}
users.loc[:, "User index"] = users["User index"].map(mapping)

In [482]:
users["Top games"] = users_top_games
users["Username"] = users.apply(lambda row : f"{row['Username']} ({row['Number reviews']} avis)", axis=1)
users = users.sort_values("Number reviews", ascending=False).drop(columns="Number reviews")
users

Unnamed: 0,Username,User id,User index,Rated games index,Top games
5,PtitJu (1111 avis),208,5,"[1125, 1938, 2320, 427, 626, 101, 1906, 1923, ...","[871, 985, 2434, 405, 1073]"
0,limp (781 avis),83,0,"[2301, 705, 2320, 264, 2548, 626, 37, 1087, 19...","[2247, 1713, 1703, 1867, 30]"
4,Gigi (619 avis),201,4,"[2280, 2021, 491, 641, 1776, 1722, 621, 2489, ...","[2434, 2221, 154, 1681, 61]"
13,glouglou (405 avis),1191,13,"[2435, 707, 463, 2127, 1850, 621, 1707, 1166, ...","[676, 2007, 1073, 1043, 2029]"
15,Frenchcrusader (323 avis),1434,15,"[100, 327, 1812, 1918, 1080, 2100, 436, 1542, ...","[1966, 1560, 676, 1159, 2434]"
9,Samy974 (296 avis),934,9,"[2419, 2559, 2566, 1854, 1376, 2493, 2564, 100...","[2434, 1195, 61, 1043, 2417]"
8,léo(nard) (263 avis),732,8,"[1602, 1673, 410, 1903, 2054, 1579, 1248, 499,...","[1713, 1826, 2099, 211, 1867]"
1,Oystercult (254 avis),98,1,"[2280, 2435, 731, 2302, 410, 2127, 621, 1166, ...","[405, 2345, 440, 2327, 876]"
17,ybkam (229 avis),1815,17,"[707, 497, 1785, 1384, 2107, 351, 2421, 2576, ...","[2434, 2345, 2221, 985, 503]"
21,JJL (214 avis),1939,21,"[497, 1785, 1227, 507, 1049, 2107, 1559, 2477,...","[1043, 2434, 1140, 51, 6]"


In [483]:
users.to_json("users_info.json", orient="records", force_ascii=False)

In [None]:
users

Unnamed: 0,Username,User id,User index,Rated games index,Top games
5,PtitJu (1111 avis),208,5,"[1125, 1938, 2320, 427, 626, 101, 1906, 1923, ...","[871, 985, 2434, 405, 1073]"
0,limp (781 avis),83,0,"[2301, 705, 2320, 264, 2548, 626, 37, 1087, 19...","[2247, 1713, 1703, 1867, 30]"
4,Gigi (619 avis),201,4,"[2280, 2021, 491, 641, 1776, 1722, 621, 2489, ...","[2434, 2221, 154, 1681, 61]"
13,glouglou (405 avis),1191,13,"[2435, 707, 463, 2127, 1850, 621, 1707, 1166, ...","[676, 2007, 1073, 1043, 2029]"
15,Frenchcrusader (323 avis),1434,15,"[100, 327, 1812, 1918, 1080, 2100, 436, 1542, ...","[1966, 1560, 676, 1159, 2434]"
9,Samy974 (296 avis),934,9,"[2419, 2559, 2566, 1854, 1376, 2493, 2564, 100...","[2434, 1195, 61, 1043, 2417]"
8,léo(nard) (263 avis),732,8,"[1602, 1673, 410, 1903, 2054, 1579, 1248, 499,...","[1713, 1826, 2099, 211, 1867]"
1,Oystercult (254 avis),98,1,"[2280, 2435, 731, 2302, 410, 2127, 621, 1166, ...","[405, 2345, 440, 2327, 876]"
17,ybkam (229 avis),1815,17,"[707, 497, 1785, 1384, 2107, 351, 2421, 2576, ...","[2434, 2345, 2221, 985, 503]"
21,JJL (214 avis),1939,21,"[497, 1785, 1227, 507, 1049, 2107, 1559, 2477,...","[1043, 2434, 1140, 51, 6]"


***
### For optimization

In [444]:
# df_all = pd.read_json("games_info.json", orient="records")
# games_info = pd.read_csv("games_info.csv", index_col=0).drop(columns="Description")
# games_info = games_info.rename(columns={"Game id":"game id", "Game index":"game index", "Game name year":"game name year",
#                             "Type":"type", "Players":"players", "Age":"age", "Rating":"rating"}).merge(df_all, on="game index")
# games_info = games_info.drop(columns=["game_id"])
# games_info.loc[:, "theme"] = games_info["name"]
# games_info.to_json("games_info.json", orient="records", force_ascii=False)