# Fitrage Collaboratif

## Importation des librairies et des datasets

In [None]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds # SVD
import tensorflow as tf # NMF
import seaborn as sns

In [None]:
# Dataset des paris réalisés
paris_df = pd.read_csv("paris.csv",sep=",")
paris_df.head()

Unnamed: 0,userId,matchId,pari,timestamp
0,1,349,13.0,964982563
1,1,423,18.0,964982363
2,1,457,2.0,964981909
3,1,527,16.0,964984002
4,1,954,11.0,964983219


In [None]:
paris_df.describe()

Unnamed: 0,userId,matchId,pari,timestamp
count,16697.0,16697.0,16697.0,16697.0
mean,332.822004,19456.480146,9.830149,1192111000.0
std,183.047299,36117.474168,5.399528,213100600.0
min,1.0,4.0,1.0,828124600.0
25%,182.0,1246.0,5.0,1002403000.0
50%,339.0,3247.0,10.0,1171832000.0
75%,477.0,8014.0,15.0,1415714000.0
max,610.0,193585.0,19.0,1537799000.0


In [None]:
# Dataset des matchs effectués
matchs_df = pd.read_csv("matchs.csv",sep=",")
matchs_df

Unnamed: 0,matchId,sport,team_1,team_2
0,4,Football,5.0,8.0
1,9,Basketball,3.0,18.0
2,11,Football,8.0,1.0
3,14,Rubgy,15.0,12.0
4,26,Rubgy,18.0,3.0
...,...,...,...,...
2521,190213,Rubgy,15.0,13.0
2522,190215,Rubgy,11.0,12.0
2523,190221,Baseball,18.0,6.0
2524,193579,Baseball,12.0,17.0


In [None]:
matchs_df.describe()

Unnamed: 0,matchId,team_1,team_2
count,2526.0,2526.0,2526.0
mean,38268.67696,9.874901,9.689232
std,50129.194702,5.571301,5.404987
min,4.0,1.0,1.0
25%,3141.5,5.0,5.0
50%,6806.5,10.0,10.0
75%,66773.25,15.0,14.0
max,193585.0,19.0,19.0


## Nettoyage des données

In [None]:
# définition d'un seuil sur le nombre de paris par utilisateur
count_per_user = (paris_df.groupby(by = ['userId'])['pari']
                                .count()
                                .reset_index()
                                .rename(columns = {'pari': 'pari_count'})
                  )
count_per_user.describe()

Unnamed: 0,userId,pari_count
count,600.0,600.0
mean,306.248333,27.828333
std,175.949625,54.311341
min,1.0,1.0
25%,155.75,5.0
50%,307.5,12.0
75%,457.25,27.0
max,610.0,606.0


In [None]:
# définition d'un seuil sur le nombre de paris par match
count_per_match = (paris_df.groupby(by = ['matchId'])['pari']
                                .count()
                                .reset_index()
                                .rename(columns = {'pari': 'pari_count'})
                  )
count_per_match.describe()

Unnamed: 0,matchId,pari_count
count,2519.0,2519.0
mean,38349.317189,6.628424
std,50172.411909,14.535162
min,4.0,1.0
25%,3139.5,1.0
50%,6809.0,2.0
75%,67347.0,5.0
max,193585.0,220.0


In [None]:
# La colonne sport du dataframe est convertit en str
matchs_df['sport'] = matchs_df.sport.astype("str")

In [None]:
# On ne va garder que les matchs sur lesquels plus de 2 paris ont été effectués
match_threshold = 2
count_per_match = count_per_match.query('pari_count >= @match_threshold')

In [None]:
# construction du dataframe filtré sur les matchs avec seuil
paris = pd.merge(count_per_match, paris_df, left_on = 'matchId', right_on = 'matchId', how='left')

# proportion de paris conservées après le filtrage
len(paris) / len(paris_df) # 94%

cols = ['timestamp', 'pari_count']
paris.drop(cols, axis = 1, inplace = True)

num_matchs = paris['matchId'].nunique()
num_users = paris['userId'].nunique()
print('Nombre de matchs uniques : ', num_matchs)
print('Nombre d\'utilisateurs uniques : ', num_users)

Nombre de matchs uniques :  1458
Nombre d'utilisateurs uniques :  599


In [None]:
# création de mapping pour les ID
# comme on a filtré les données, les id ne sont pas continus, ainsi on rend les id continus

def generate_id_mappings(ids_list):
  userId_map = {new_id : old_id for new_id, old_id in enumerate(ids_list)}
  inverse_userId_map = {old_id: new_id for new_id, old_id in enumerate(ids_list)}
  return userId_map, inverse_userId_map

userId_map, inverse_userId_map = generate_id_mappings(paris.userId.unique())
matchId_map, inverse_matchId_map = generate_id_mappings(paris.matchId.unique())

paris['mUserId'] = paris['userId'].map(inverse_userId_map)
paris['mMatchId'] = paris['matchId'].map(inverse_matchId_map)

## Recommendation content based

In [None]:
# Calcul de similarité entre les matchs

# matchs pariés par l'utilisateur
liked_matchs = paris.query('userId == 1 and pari == 13')['matchId']
matchs_df.query('matchId in @liked_matchs')

Unnamed: 0,matchId,sport,team_1,team_2
83,349,Hockey,13.0,9.0
468,2353,Boxe,6.0,13.0
752,3729,Hockey,1.0,13.0


In [None]:
# Importation de la métrique de similarité
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# One-hot Encoding
matchs_sports_df = matchs_df.join(matchs_df['sport'].str.get_dummies())
matchs_sports_df['mMatchId'] = matchs_sports_df['matchId'].map(inverse_matchId_map)

matchs_sports_df.set_index('mMatchId', inplace=True) # les mMatchId sont les index
matchs_sports_df = matchs_sports_df.loc[matchs_sports_df.index.dropna()]

# Application de la mesure de similarité
cos_sim = cosine_similarity(matchs_sports_df.iloc[:,3:])

In [None]:
matchs_sports_df

Unnamed: 0_level_0,matchId,sport,team_1,team_2,Badminton,Baseball,Basketball,Boxe,Football,Handball,Hockey,Judo,Ping-Pong,Rubgy,Tennis,Volleyball,Water Polo
mMatchId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0.0,4,Football,5.0,8.0,0,0,0,0,1,0,0,0,0,0,0,0,0
1.0,9,Basketball,3.0,18.0,0,0,1,0,0,0,0,0,0,0,0,0,0
2.0,11,Football,8.0,1.0,0,0,0,0,1,0,0,0,0,0,0,0,0
3.0,14,Rubgy,15.0,12.0,0,0,0,0,0,0,0,0,0,1,0,0,0
4.0,26,Rubgy,18.0,3.0,0,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1453.0,178061,Rubgy,10.0,13.0,0,0,0,0,0,0,0,0,0,1,0,0,0
1454.0,179817,Tennis,1.0,2.0,0,0,0,0,0,0,0,0,0,0,1,0,0
1455.0,180095,Rubgy,1.0,13.0,0,0,0,0,0,0,0,0,0,1,0,0,0
1456.0,180497,Volleyball,11.0,4.0,0,0,0,0,0,0,0,0,0,0,0,1,0


In [None]:
# Extraction des 5 matchs similaires au matchId = 11
cos_sim.shape

Basketball_mMatchId = inverse_matchId_map[11]
basketball_top5 = np.argsort(cos_sim[Basketball_mMatchId])[-5:][::1]
basketball_top5_matchId = [matchId_map[x] for x in basketball_top5]

matchs_df.query('matchId in @basketball_top5_matchId')

Unnamed: 0,matchId,sport,team_1,team_2
2,11,Football,8.0,1.0
189,898,Football,12.0,1.0
1815,56941,Football,16.0,1.0
2034,86817,Football,16.0,1.0
2452,164909,Football,15.0,1.0


## Factorisation de matrice par SVD (collaborative filtering by model)

In [None]:
# SVD
from scipy.sparse import csr_matrix
from scipy.linalg import sqrtm

R_df = paris.pivot(index='mUserId', columns='mMatchId', values='pari').fillna(0)
R_df.head()

mMatchId,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,1418,1419,1420,1421,1422,1423,1424,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434,1435,1436,1437,1438,1439,1440,1441,1442,1443,1444,1445,1446,1447,1448,1449,1450,1451,1452,1453,1454,1455,1456,1457
mUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,8.0,0.0,1.0,0.0,18.0,9.0,0.0,15.0,0.0,2.0,0.0,0.0,4.0,13.0,0.0,0.0,0.0,9.0,11.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,6.0,3.0,0.0,13.0,0.0,0.0,0.0,11.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8.0,0.0,8.0,15.0,0.0,9.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
R = R_df.values
user_paris_mean = np.mean(R, axis = 1)
R_demeaned = R - user_paris_mean.reshape(-1, 1)

In [None]:
latent_dimension = 30
U, sigma, Vt = svds(R_demeaned, k = latent_dimension) # np.linalg.svd ou tf.linalg.svd ou sklearn.decomposition.TruncatedSVD

print("Dimension de U : %d * %d" % (U.shape))
print("Dimension de sigma : %d" % (sigma.shape))
print("Dimension de Vt : %d * %d" % (Vt.shape))

Dimension de U : 599 * 30
Dimension de sigma : 30
Dimension de Vt : 30 * 1458


In [None]:
sigma = np.diag(sigma) # transforme en matrice (50,50)
s_root = sqrtm(sigma) # racine carré

# représentations latentes des utilisateurs et des matchs en moindre dimension
Usk = np.dot(U, s_root)
skV = np.dot(s_root, Vt)

predicted_paris = np.dot(Usk, skV)
predicted_paris = predicted_paris + user_paris_mean.reshape(-1, 1)

In [None]:
preds_df = pd.DataFrame(predicted_paris, columns = R_df.columns)
preds_df.head()

mMatchId,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,1418,1419,1420,1421,1422,1423,1424,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434,1435,1436,1437,1438,1439,1440,1441,1442,1443,1444,1445,1446,1447,1448,1449,1450,1451,1452,1453,1454,1455,1456,1457
0,2.911764,2.417175,5.201711,5.10756,4.593324,11.997951,0.303767,5.236157,0.35217,0.974625,4.635204,-1.060972,1.424989,16.136575,0.580413,3.277495,1.067089,2.929118,2.66906,1.962825,0.304546,1.219354,0.347875,0.315855,1.994006,0.103451,-1.567724,1.67862,2.642368,0.401522,-2.026618,3.650692,4.66284,3.825652,0.628435,2.689369,0.214744,0.646742,1.198235,1.852078,...,-0.454343,0.063291,0.356816,-0.443061,0.279483,1.017654,0.235189,0.540466,-0.185405,0.390911,0.071633,0.007799,0.516741,0.744239,0.55355,0.232909,0.778216,0.171883,0.311202,0.913284,1.675381,0.345819,0.368539,-0.049478,-0.097395,0.665301,0.579023,0.719048,0.327444,0.291938,0.35872,-0.134128,0.154822,0.386479,0.580475,1.126775,0.361057,0.202084,0.293658,0.272183
1,1.210985,0.225983,1.482067,0.986072,1.730445,3.56621,0.041283,1.483674,0.023417,0.311548,1.527282,-0.434586,0.311186,2.225893,-0.160585,1.545327,0.359088,0.625965,0.604995,0.830198,-0.025792,0.195913,0.001116,-0.099815,0.284097,0.033468,-0.82704,0.216658,0.488529,-0.155638,-0.935536,1.409112,1.506047,1.317871,0.107415,0.80157,0.040642,0.194839,0.087518,0.576532,...,-0.312463,-0.010176,-0.075904,-0.269037,0.229787,0.0682,-0.226649,-0.087961,-0.081554,0.155085,0.322044,0.248284,0.111211,0.270824,-0.1896,0.05908,0.249858,-0.432435,0.037838,0.170805,0.206765,0.042693,-0.044362,0.285027,0.095615,0.126375,-0.160597,0.157317,0.097557,-0.059867,-0.087973,0.022557,0.126756,-0.022454,0.118823,0.216935,0.065818,0.065514,0.054325,-0.240472
2,2.297969,3.45706,5.965821,8.154115,2.963324,9.848282,-0.046199,3.927213,-0.062518,0.411103,7.613808,4.218383,0.868091,27.577452,1.796133,2.301707,0.89404,2.181567,1.641958,1.655208,0.741383,0.568818,-0.175548,1.163374,0.448791,1.166083,2.727709,2.499448,2.381748,1.248757,-0.333668,3.832956,4.583432,0.0134,0.850822,1.070589,-0.003991,0.714977,4.005083,0.362332,...,-1.073997,-0.21714,0.180104,-0.560776,0.140953,-1.231723,-0.444361,0.284687,-0.477158,-0.098562,-0.047726,-1.415101,-0.693004,-0.293383,1.355378,-0.15616,-0.069956,0.153012,-0.055859,0.802274,0.45933,0.049857,0.692404,0.135689,-1.146967,-0.068976,0.233876,0.05952,-0.22588,-0.241127,-0.000102,-0.682576,-0.663909,-0.432714,-0.042728,0.265575,-0.115806,-0.277742,-0.110473,0.003564
3,0.801573,0.815004,2.28024,2.970765,1.307983,5.436186,-0.021771,1.76406,-0.186498,0.175519,1.400773,0.536181,0.176532,16.149746,0.584937,0.345405,0.428136,0.483495,-0.038825,0.972247,0.093327,0.171039,0.024989,0.221014,-0.050137,0.075692,0.857172,0.499382,0.798409,0.272388,0.337351,1.523882,0.753817,0.75897,0.082267,0.068965,0.313646,0.243593,0.499807,0.365348,...,-0.500456,-0.094437,0.018809,0.053711,0.390279,0.945384,-0.04925,0.076314,-0.103198,0.268702,0.237523,0.104361,0.01374,-0.014183,-0.036066,0.089647,0.123602,0.192127,-0.004858,-0.070602,0.191234,-0.03095,-0.194927,0.281331,-0.047718,-0.055733,0.289429,-0.064954,0.066585,-0.029154,-0.118762,0.027256,-0.010265,0.160508,-0.05924,-0.276839,-0.094387,-0.042402,-0.015166,0.175777
4,0.189402,0.68746,0.995072,0.772806,0.931561,0.6327,0.05824,0.547332,0.200006,0.304351,1.164935,1.874044,0.1237,3.352198,0.267766,0.098471,0.20023,0.229415,0.084264,0.456549,0.207606,0.336896,0.023258,0.384232,0.456736,0.223596,0.805863,0.313455,0.51137,0.282484,0.640398,1.009903,0.658825,1.02684,0.139457,0.30468,0.027091,0.56944,0.20077,0.255961,...,0.285121,0.234195,0.130193,0.272362,0.044145,0.676088,0.09898,0.047614,0.192967,0.149477,0.074917,0.244835,-0.015362,0.150528,0.206949,0.056107,0.212878,0.052419,0.130771,0.23675,0.073743,0.075395,0.295686,0.161291,0.227508,0.137415,0.179039,0.03041,0.052866,-0.000259,0.168904,0.218783,0.242609,-0.062041,0.077537,0.2949,0.100629,0.001912,0.105148,0.13545


In [None]:
def recommend_matchsSVD(predictions_df, userId, matchs_df, original_paris_df, num_recommendations=10):
    # Ordonner les prédictions pour l'utilisateur
    mUserId = inverse_userId_map[userId]
    sorted_user_predictions = preds_df.iloc[mUserId].sort_values(ascending = False)

    # Récupérer les paris déjà réalisés par l'utilisateur avec les sports des matchs
    user_data = original_paris_df[original_paris_df.mUserId == (mUserId)]
    user_full = (user_data.merge(matchs_df, how = 'left', left_on = 'matchId', right_on = 'matchId').
                      sort_values(['pari'], ascending = False)
                  )[['matchId', 'sport']]

    print('L\'utilisateur %d a déjà parié sur %d matchs.' % (userId, user_full.shape[0]))

    # Recommander les matchs les plus similaires pas encore pariés
    recommendations = (matchs_df[~matchs_df['matchId'].isin(user_full['matchId'])].
        merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left', 
              left_on = 'mMatchId',
              right_on = 'mMatchId').
        rename(columns = {mUserId: 'Predictions'}).
        sort_values('Predictions', ascending = False).
                      iloc[:num_recommendations, :-1]
                      )
    return user_full, recommendations               

In [None]:
# maintenant tous les dataframe contiennent les 2 colonnes d'identifiants mappés
matchs_df['mMatchId']=matchs_df['matchId'].map(inverse_matchId_map)

# faire des prédictions sur 10 matchs pour les 3 premiers utilisateurs
for user in list(inverse_userId_map.keys())[:3]:
  already_paris, predictions = recommend_matchsSVD(preds_df, user, matchs_df, paris, 10)
  print(already_paris.head(10).to_string())
  print(predictions.to_string())

L'utilisateur 6 a déjà parié sur 70 matchs.
    matchId      sport
58      839       Boxe
48      540  Badminton
33      361   Football
23      279      Rubgy
69     1082      Rubgy
2        26      Rubgy
25      282      Rubgy
18      240  Badminton
40      491      Rubgy
31      354      Rubgy
     matchId       sport  team_1  team_2  mMatchId
40       280  Volleyball    12.0    16.0      54.0
60       376        Boxe    16.0     2.0      80.0
1         14       Rubgy    15.0    12.0       3.0
148     1079    Handball    14.0    18.0     174.0
4         52    Football    14.0     2.0      10.0
82       529       Rubgy    16.0     5.0     108.0
32       227        Boxe    18.0     6.0      41.0
25       193       Rubgy     8.0     7.0      31.0
494     2840  Volleyball     2.0    16.0     449.0
36       246    Baseball    16.0     4.0      47.0
L'utilisateur 14 a déjà parié sur 13 matchs.
    matchId      sport
2       282      Rubgy
3       337      Rubgy
7       491      Rubgy
9    

In [None]:
# Prédiction de 10 matchs pour l'utilisateur 610
already_paris, predictions = recommend_matchsSVD(preds_df, 610, matchs_df, paris, 10)
print(already_paris.head(10).to_string())
print(predictions.to_string())

L'utilisateur 610 a déjà parié sur 187 matchs.
     matchId       sport
90      8784    Football
184   161634   Badminton
3        412       Rubgy
172   118702      Tennis
96     30812       Rubgy
9       1128  Water Polo
89      8781   Badminton
88      7367    Handball
31      3246       Rubgy
167   104944       Rubgy
      matchId       sport  team_1  team_2  mMatchId
1033     5784  Water Polo    10.0    15.0     824.0
1256     7265       Rubgy    19.0    14.0     946.0
1513    33085        Judo    17.0    11.0    1075.0
184       910    Handball    10.0    19.0     156.0
676      3510  Volleyball    19.0     1.0     556.0
1011     5609        Boxe     5.0    19.0     807.0
1799    72489      Hockey     8.0    19.0    1258.0
5          31       Rubgy    16.0     9.0       5.0
1354     8529    Football    16.0    12.0     994.0
1372     8815        Judo    12.0    16.0    1003.0


In [None]:
# Mise en forme du dataframe paris
paris.sort_values(by='userId', ascending=True, inplace=True)
paris = paris.reset_index(drop=True)

In [None]:
# Prédiction sur 5 matchs pour tous les utilisateurs
preds = pd.DataFrame()
for user in paris['userId'].unique():
  all_already_paris, all_predictions = recommend_matchsSVD(preds_df, user, matchs_df, paris, 5)
  preds = preds.append(all_predictions, ignore_index=True)

user = []
for i in paris['userId'].unique():
  for j in range(5):
    user.append(i)

user_df = pd.DataFrame(data=user)
preds['userId'] = user_df

L'utilisateur 1 a déjà parié sur 20 matchs.
L'utilisateur 2 a déjà parié sur 5 matchs.
L'utilisateur 3 a déjà parié sur 8 matchs.
L'utilisateur 4 a déjà parié sur 40 matchs.
L'utilisateur 5 a déjà parié sur 9 matchs.
L'utilisateur 6 a déjà parié sur 70 matchs.
L'utilisateur 7 a déjà parié sur 17 matchs.
L'utilisateur 8 a déjà parié sur 6 matchs.
L'utilisateur 9 a déjà parié sur 9 matchs.
L'utilisateur 10 a déjà parié sur 28 matchs.
L'utilisateur 11 a déjà parié sur 15 matchs.
L'utilisateur 12 a déjà parié sur 5 matchs.
L'utilisateur 13 a déjà parié sur 7 matchs.
L'utilisateur 14 a déjà parié sur 13 matchs.
L'utilisateur 15 a déjà parié sur 14 matchs.
L'utilisateur 16 a déjà parié sur 15 matchs.
L'utilisateur 17 a déjà parié sur 17 matchs.
L'utilisateur 18 a déjà parié sur 59 matchs.
L'utilisateur 19 a déjà parié sur 100 matchs.
L'utilisateur 20 a déjà parié sur 18 matchs.
L'utilisateur 21 a déjà parié sur 28 matchs.
L'utilisateur 22 a déjà parié sur 30 matchs.
L'utilisateur 23 a déjà p

In [None]:
# Nettoyage de notre dataframe de toutes les prédictions
preds.drop(['team_1','team_2','mMatchId'], axis=1, inplace=True)
first_column = preds.pop('userId') 
preds.insert(0, 'userId', first_column) 

In [None]:
preds

Unnamed: 0,userId,matchId,sport
0,1,1608,Boxe
1,1,3301,Handball
2,1,2194,Ping-Pong
3,1,4022,Rubgy
4,1,4262,Ping-Pong
...,...,...,...
2990,610,5784,Water Polo
2991,610,7265,Rubgy
2992,610,33085,Judo
2993,610,910,Handball
