#  Recommendation - Loading

In [None]:
import os
import zipfile
import pandas as pd
import dask.dataframe as dd
import matplotlib.pyplot as plt
import numpy as np
from scipy.sparse import csc_matrix
from scipy.sparse import vstack
import pickle
from pathlib import Path

In [None]:
pd.set_option('display.float_format', lambda x: '%.1f' % x)

In [None]:
def loading_recommendations(path):
    # df = dd.read_csv(path)
    df = pd.read_csv(path)
    # keep only users that do recommend, eliminate those who does not recommend
    df = df[df["is_recommended"]==True]
    df["is_recommended"] = df["is_recommended"].apply(lambda x: 1 if x == True else 0)
    df['user_id_categorical'] = pd.Categorical(df['user_id']).codes
    ## App_ID starts from 1. I leave the 0 to the user_id column. See later
    df['app_id_categorical'] = pd.Categorical(df['app_id']).codes + 1
    df = df.sort_values(by="user_id_categorical")
    #df = df.iloc[:20000]
    return df

In [None]:
recommendations = loading_recommendations("resources/recommendations.csv")
recommendations

Unnamed: 0,app_id,helpful,funny,date,is_recommended,hours,user_id,review_id,user_id_categorical,app_id_categorical
30478477,235540,28,2,2015-10-17,1,21.9,0,30478477,0,1698
24437527,49520,35,16,2015-08-12,1,2.1,0,24437527,0,895
34510797,627690,8,0,2022-03-06,1,103.3,0,34510797,0,12333
22036352,1454400,6,2,2022-02-18,1,762.6,0,22036352,0,28682
36809055,317400,15,0,2015-08-01,1,18.0,0,36809055,0,3676
...,...,...,...,...,...,...,...,...,...,...
5228835,397540,0,0,2020-11-25,1,160.9,14306059,5228835,12663129,6218
39735230,1112830,0,0,2020-10-10,1,1.0,14306060,39735230,12663130,22680
23151356,1407200,0,0,2022-09-02,1,171.0,14306061,23151356,12663131,27909
25486974,1987080,0,0,2022-08-28,1,15.5,14306062,25486974,12663132,35826


In [None]:
NUMBER_OF_UNIQUE_USERS = recommendations["user_id_categorical"].nunique()
NUMBER_OF_UNIQUE_GAMES = recommendations["app_id_categorical"].nunique()

In [None]:
print("There are", '{0:,.0f}'.format(NUMBER_OF_UNIQUE_USERS) , "users")
print("There are", '{0:,.0f}'.format(NUMBER_OF_UNIQUE_GAMES) , "games")

There are 12,663,134 users
There are 37,419 games


**Average number of reviews per user**

In [None]:
review_user = recommendations[["user_id","review_id"]].groupby("user_id").count()
review_user = review_user.reset_index()
review_user.columns = [["user_id", "#_reviews"]]
review_user.head()

Unnamed: 0,user_id,#_reviews
0,0,11
1,2,4
2,3,2
3,4,1
4,5,1


In [None]:
review_user["#_reviews"].describe()

Unnamed: 0,#_reviews
count,13781059.0
mean,3.0
std,8.1
min,1.0
25%,1.0
50%,1.0
75%,3.0
max,6045.0


In [None]:
# For instance, let's take the reviews of user_id == 0
# recommendations[recommendations["user_id"]==0]

**Average number of reviews per game**

In [None]:
review_game = recommendations[["app_id","review_id"]].groupby("app_id").count()
review_game = review_game.reset_index()
review_game.columns = [["app_id", "#_reviews"]]
review_game.head()

Unnamed: 0,app_id,#_reviews
0,10,41043
1,20,4284
2,30,4432
3,40,1610
4,50,9721


In [None]:
review_game["#_reviews"].describe()

Unnamed: 0,#_reviews
count,37610.0
mean,1094.3
std,7689.3
min,1.0
25%,13.0
50%,39.0
75%,179.8
max,319492.0


In [None]:
# For instance, let's take the reviews of app_id == 10
recommendations[recommendations["app_id"]==10]

Unnamed: 0,app_id,helpful,funny,date,is_recommended,hours,user_id,review_id
6175295,10,0,0,2021-12-06,1,22.6,2524579,6175295
6175299,10,0,0,2021-11-25,1,3.5,7761084,6175299
6175300,10,0,0,2021-11-25,1,5.4,9377644,6175300
6175304,10,0,0,2021-10-16,1,211.4,2713079,6175304
6175307,10,0,0,2021-07-26,1,359.6,10214996,6175307
...,...,...,...,...,...,...,...,...
38989335,10,2,0,2020-01-11,1,698.0,8010358,38989335
38989815,10,0,0,2021-07-17,1,13.0,6258273,38989815
38990023,10,2,0,2021-11-15,1,194.0,166934,38990023
38991478,10,0,0,2020-12-18,1,30.0,13780121,38991478


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=78c133f5-defd-458d-ba8f-cbdc9ae58cfb' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>