In [0]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings(action='once')

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

sns.set_style('white')

In [0]:
df = pd.read_csv('preprocessed_ratings.csv')
df_train = pd.read_csv('train_data.csv')
df_test = pd.read_csv('test_data.csv')

In [3]:
df_pivot = pd.pivot_table(df, values='rating', index='username', columns='gameid')
df_pivot.head()

gameid,1,3,5,7,10,11,12,13,14,15,...,232918,233078,233247,233371,234277,234487,236191,237182,244522,244992
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000rpm,,8.0,8.0,,6.0,7.0,8.0,7.0,8.0,,...,,,,,,,,,,8.0
549sd,,7.0,7.0,,7.0,7.0,,7.0,,,...,7.0,,,7.0,,7.0,,,,
Abso,,9.0,,,5.0,4.0,7.0,3.0,,,...,,,,,8.0,8.0,,,7.0,5.0
Acido,,6.75,7.2,,,7.1,6.8,7.0,,,...,5.8,6.2,7.0,6.0,6.0,6.35,,,6.7,
Adonai_80,,6.0,6.0,,,,7.0,4.0,,,...,7.0,10.0,,,,,,6.0,,


In [4]:
df_pivot.to_pickle('df_pivotPickle')
df_pivot.shape

(1322, 1493)

In [5]:
df_train_pivot = pd.pivot_table(df_train, values='rating', index='username', columns='gameid')
df_train_pivot.head()

gameid,1,3,5,7,10,11,12,13,14,15,...,232918,233078,233247,233371,234277,234487,236191,237182,244522,244992
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000rpm,,8.0,8.0,,6.0,7.0,8.0,,,,...,,,,,,,,,,8.0
549sd,,7.0,7.0,,7.0,7.0,,7.0,,,...,7.0,,,7.0,,,,,,
Abso,,9.0,,,5.0,4.0,7.0,3.0,,,...,,,,,8.0,8.0,,,7.0,5.0
Acido,,,7.2,,,7.1,6.8,7.0,,,...,5.8,6.2,7.0,,,6.35,,,6.7,
Adonai_80,,6.0,6.0,,,,,4.0,,,...,7.0,10.0,,,,,,6.0,,


In [6]:
df_train_pivot.to_pickle('df_train_pivotPickle')
df_train_pivot.shape

(1322, 1493)

In [7]:
df_test_pivot = pd.pivot_table(df_test, values='rating', index='username', columns='gameid')
df_test_pivot.head()

gameid,1,3,5,7,10,11,12,13,14,15,...,232918,233078,233247,233371,234277,234487,236191,237182,244522,244992
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000rpm,,,,,,,,7.0,8.0,,...,,,,,,,,,,
549sd,,,,,,,,,,,...,,,,,,7.0,,,,
Abso,,,,,,,,,,,...,,,,,,,,,,
Acido,,6.75,,,,,,,,,...,,,,6.0,6.0,,,,,
Adonai_80,,,,,,,7.0,,,,...,,,,,,,,,,


In [8]:
df_test_pivot.to_pickle('df_test_pivotPickle')
df_test_pivot.shape

(1322, 1493)

In [0]:
ratings_sparse = sparse.csr_matrix(df_pivot.fillna(0))
train_sparse = sparse.csr_matrix(df_train_pivot.fillna(0))
test_sparse = sparse.csr_matrix(df_test_pivot.fillna(0))

In [10]:
print(ratings_sparse)
sparse.save_npz('ratings_sparse', ratings_sparse)

  (0, 1)	8.0
  (0, 2)	8.0
  (0, 4)	6.0
  (0, 5)	7.0
  (0, 6)	8.0
  (0, 7)	7.0
  (0, 8)	8.0
  (0, 10)	9.0
  (0, 15)	7.0
  (0, 16)	6.0
  (0, 18)	7.0
  (0, 21)	7.0
  (0, 22)	8.0
  (0, 23)	6.0
  (0, 26)	9.0
  (0, 28)	6.0
  (0, 32)	6.0
  (0, 33)	7.0
  (0, 35)	9.0
  (0, 36)	7.0
  (0, 40)	6.0
  (0, 41)	7.0
  (0, 42)	7.0
  (0, 44)	7.0
  (0, 45)	6.0
  :	:
  (1321, 1413)	9.0
  (1321, 1416)	9.0
  (1321, 1420)	8.0
  (1321, 1430)	8.0
  (1321, 1431)	8.8
  (1321, 1437)	7.8
  (1321, 1440)	9.5
  (1321, 1441)	9.0
  (1321, 1443)	8.8
  (1321, 1445)	8.0
  (1321, 1446)	8.2
  (1321, 1448)	9.0
  (1321, 1449)	8.0
  (1321, 1458)	8.1
  (1321, 1459)	7.0
  (1321, 1461)	8.0
  (1321, 1464)	8.3
  (1321, 1467)	7.0
  (1321, 1468)	6.7
  (1321, 1473)	9.2
  (1321, 1478)	9.0
  (1321, 1480)	8.1
  (1321, 1485)	9.2
  (1321, 1486)	9.1
  (1321, 1492)	7.9


In [11]:
print(train_sparse)
sparse.save_npz('train_sparse', train_sparse)

  (0, 1)	8.0
  (0, 2)	8.0
  (0, 4)	6.0
  (0, 5)	7.0
  (0, 6)	8.0
  (0, 10)	9.0
  (0, 15)	7.0
  (0, 16)	6.0
  (0, 18)	7.0
  (0, 21)	7.0
  (0, 22)	8.0
  (0, 23)	6.0
  (0, 26)	9.0
  (0, 33)	7.0
  (0, 35)	9.0
  (0, 36)	7.0
  (0, 40)	6.0
  (0, 41)	7.0
  (0, 44)	7.0
  (0, 45)	6.0
  (0, 46)	6.0
  (0, 51)	7.0
  (0, 55)	6.0
  (0, 60)	7.0
  (0, 65)	9.0
  :	:
  (1321, 1402)	8.6
  (1321, 1406)	9.2
  (1321, 1409)	8.5
  (1321, 1411)	8.5
  (1321, 1413)	9.0
  (1321, 1416)	9.0
  (1321, 1420)	8.0
  (1321, 1430)	8.0
  (1321, 1431)	8.8
  (1321, 1437)	7.8
  (1321, 1440)	9.5
  (1321, 1441)	9.0
  (1321, 1445)	8.0
  (1321, 1446)	8.2
  (1321, 1448)	9.0
  (1321, 1449)	8.0
  (1321, 1458)	8.1
  (1321, 1459)	7.0
  (1321, 1461)	8.0
  (1321, 1464)	8.3
  (1321, 1468)	6.7
  (1321, 1473)	9.2
  (1321, 1485)	9.2
  (1321, 1486)	9.1
  (1321, 1492)	7.9


In [12]:
print(test_sparse)
sparse.save_npz('test_sparse', test_sparse)

  (0, 7)	7.0
  (0, 8)	8.0
  (0, 28)	6.0
  (0, 32)	6.0
  (0, 42)	7.0
  (0, 56)	6.0
  (0, 71)	8.0
  (0, 87)	9.0
  (0, 116)	7.0
  (0, 129)	7.0
  (0, 156)	7.0
  (0, 178)	5.0
  (0, 180)	7.0
  (0, 206)	7.0
  (0, 223)	7.0
  (0, 224)	7.0
  (0, 263)	6.0
  (0, 277)	5.0
  (0, 285)	7.0
  (0, 290)	9.0
  (0, 315)	7.0
  (0, 336)	7.0
  (0, 341)	6.0
  (0, 386)	7.0
  (0, 394)	7.0
  :	:
  (1321, 1215)	7.0
  (1321, 1219)	7.5
  (1321, 1222)	8.8
  (1321, 1255)	8.5
  (1321, 1259)	7.0
  (1321, 1275)	8.3
  (1321, 1281)	7.0
  (1321, 1289)	7.5
  (1321, 1307)	9.4
  (1321, 1311)	6.8
  (1321, 1334)	7.5
  (1321, 1335)	7.5
  (1321, 1340)	8.8
  (1321, 1346)	8.2
  (1321, 1347)	9.9
  (1321, 1364)	7.0
  (1321, 1368)	8.5
  (1321, 1375)	8.5
  (1321, 1379)	8.4
  (1321, 1381)	8.8
  (1321, 1407)	9.2
  (1321, 1443)	8.8
  (1321, 1467)	7.0
  (1321, 1478)	9.0
  (1321, 1480)	8.1
