# RQ5

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def dateparse (time_in_secs):
    return pd.to_datetime(time_in_secs,unit='s')

In [None]:
dataset = pd.read_csv('./steam_reviews.csv', header='infer', nrows=20000, parse_dates=['timestamp_created', 'timestamp_updated', 'author.last_played'], date_parser=dateparse)


In [None]:
dataset['author.playtime_forever']=pd.to_timedelta(dataset['author.playtime_forever'], unit='m')

In [None]:
dataset.info()

In [None]:
dataset[(dataset["author.num_reviews"] == 2063)]

#### 1 - Plot the top 10 most popular reviewers and the number of reviews.

I look at the "author.num_reviews" variable to see how many reviews a person has done for a given application. I add up the various values that the variable takes for the various applications to see the total number of reviews that a given person has made. Finally, I group these values by the author's name (or better, by the author's identification code -> "author.steamid").

I sort in descending order to see which are the most popular authors.

In [None]:
dataset.groupby(["author.steamid"])["author.num_reviews"].first().sort_values(ascending=False).head(10)

In [None]:
def mostPopularReviewers(n):
    return dataset[["author.steamid", "author.num_reviews"]].groupby(["author.steamid"]).first().sort_values("author.num_reviews",ascending=False).head(n)

In [None]:
mostPopularReviewers(1).index[0]

In [None]:
dataset[dataset['author.steamid']==mostPopularReviewers(1).index[0]].app_name.unique()

In [None]:
def mostPopularReviewers_plot(n):

    data = mostPopularReviewers(n)

    height = data["author.num_reviews"].array
    val = list(map(str,data.index))

    my_cmap = plt.get_cmap('Greys')
    my_norm = plt.Normalize(vmin=0,vmax=(data["author.num_reviews"].max())*0.2)
    
    plt.figure(figsize=(30,5))
    
    plot = plt.bar(val, height, color=my_cmap(my_norm(height)));
    plt.xlabel('author.steamid', labelpad=25.0, size="xx-large")
    plt.ylabel('Number of reviews', labelpad=25.0, size="xx-large")
    plt.title('Reviews by author')
    if(n > 10):
        plt.xticks(rotation="vertical")
        
    return plot
 

In [None]:
dataset.groupby(["author.steamid"])["author.num_reviews"].sum().sort_values(ascending=False).head(10).plot.bar(figsize=(18,9), xlabel='author.steamid', ylabel='Number of reviews', title='Reviews by author')

In [None]:
mostPopularReviewers_plot(11);

#### 2 - What applications did the most popular author review?

I consider the identification code of the most popular author and I check which applications he has reviewed.

In [None]:
dataset[dataset['author.steamid']==76561198202529859].app_name.unique()

In [None]:
dataset.info()

In [None]:
dataset[['author.steamid', "steam_purchase", "received_for_free"]].groupby('author.steamid', as_index=False).sum()

In [None]:
t = dataset[['author.steamid',"app_name", "timestamp_created", "author.num_reviews","steam_purchase",'received_for_free', "author.playtime_forever", "author.num_games_owned"]]

In [None]:
t[dataset['author.steamid']==76561198125392509]

#### 3 - How many applications did he purchase, and how many did he get as free? Provide the number (count) and the percentage.

In [None]:
def percentageOfReceived():
    
    freeAndPurchase = dataset[['author.steamid', "steam_purchase", "received_for_free"]].groupby('author.steamid', as_index=False).sum()
    
    freeAndPurchase = freeAndPurchase[freeAndPurchase['author.steamid']==mostPopularReviewers(1).index[0]]
   
    percPurchase = freeAndPurchase[["steam_purchase", "received_for_free"]].apply(lambda row: row["steam_purchase"]/(row["steam_purchase"] + row["received_for_free"]), axis=1)
    
    percFree = freeAndPurchase[["steam_purchase", "received_for_free"]].apply(lambda row: row["received_for_free"]/(row["steam_purchase"] + row["received_for_free"]),axis=1)
    
    final = freeAndPurchase
    
    final["percentage_of_purchase"] = percPurchase
    final["percentage_of_free"] = percFree
    
    return final

In [None]:
percentageOfReceived()

I check among the apps he reviewed which ones he bought on steam and which ones he didn't buy on steam and how many are these apps.

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['steam_purchase']==True)].app_name.unique()

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['steam_purchase']==False)].app_name.unique()

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['steam_purchase']==False)].app_name.nunique()

I see that this person hasn't bought any apps on steam.

I check how many apps the author has received for free and how many are these apps.

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==True)].app_name.unique()

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==True)].app_name.nunique()

I calculate the percentage of apps received for free.

In [None]:
ds1=dataset[dataset['author.steamid']==76561197974092119].app_name.nunique()
ds2=dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==True)].app_name.nunique()
perc1=ds2/ds1*100
perc1

I check how many apps the author has not received for free and how many are these apps.

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==False)].app_name.unique()

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==False)].app_name.nunique()

I calculate the percentage of apps not received for free.

In [None]:
ds3=dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==False)].app_name.nunique()
perc2=ds3/ds1*100
perc2

#### 4 - How many of the applications he purchased reviewed positively, and how many negatively? How about the applications he received for free?

I check among the apps that he has reviewed and received for free which apps he recommended and which he did not recommend.

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==True) & (dataset['recommended']==True)].app_name.unique()

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==True) & (dataset['recommended']==False)].app_name.unique()

I check among the apps that he has reviewed and he has not received for free which apps he has recommended and which he has not recommended

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==False) & (dataset['recommended']==True)].app_name.unique()

In [None]:
dataset[(dataset['author.steamid']==76561197974092119) & (dataset['received_for_free']==False) & (dataset['recommended']==False)].app_name.unique()

In [None]:
def reviewedApplicationFromTheMostPopularReviewer():
    return dataset[dataset['author.steamid']==mostPopularReviewers(1).index[0]].app_name

In [None]:
t = reviewedApplicationFromTheMostPopularReviewer()

In [None]:
h = dataset[['author.steamid', "recommended", "steam_purchase", "received_for_free"]]

In [None]:
h = h[h['author.steamid']==mostPopularReviewers(1).index[0]]

In [None]:
h

In [None]:
h.groupby("recommended", as_index=False).sum(numeric_only=True).drop(["author.steamid"], axis=1)