# Creating imports, formats etc.

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

import pandas_profiling as pp

plt.style.use('fivethirtyeight')

In [2]:
def mil_format(x):
    return "${:.1f}M".format(x/1000000)

In [3]:
def k_format(x):
    return "${:.1f}K".format(x/1000)

# Pulling data, '_df' is the backup and the others are meant to modify

In [5]:
players_df = pd.read_csv('data/highest_earning_players.csv')
players = pd.read_csv('data/highest_earning_players.csv')
#players_df.head()

In [6]:
teams_df = pd.read_csv('data/highest_earning_teams.csv')
teams = pd.read_csv('data/highest_earning_teams.csv')
#teams_df.head()

In [7]:
countries_df = pd.read_csv('data/country-and-continent-codes-list.csv')
countries = pd.read_csv('data/country-and-continent-codes-list.csv')
#countries_df.head()

In [8]:
# Important to note, all games were play same amount of times (100)
games = players[['TotalUSDPrize', 'Game', 'Genre']]
games_grouped = games.groupby('Game')
games_grouped.head()#.sort_values(ascending=False)

Unnamed: 0,TotalUSDPrize,Game,Genre
0,1822989.41,Counter-Strike: Global Offensive,First-Person Shooter
1,1799288.57,Counter-Strike: Global Offensive,First-Person Shooter
2,1787489.88,Counter-Strike: Global Offensive,First-Person Shooter
3,1652350.75,Counter-Strike: Global Offensive,First-Person Shooter
4,1416448.64,Counter-Strike: Global Offensive,First-Person Shooter
100,6952596.58,Dota 2,Multiplayer Online Battle Arena
101,6470000.02,Dota 2,Multiplayer Online Battle Arena
102,6000411.96,Dota 2,Multiplayer Online Battle Arena
103,5554297.41,Dota 2,Multiplayer Online Battle Arena
104,5470902.57,Dota 2,Multiplayer Online Battle Arena


In [9]:
# Summary of everything
profile = pp.ProfileReport(players_df, title='Pandas Profiling Report', explorative=True)
profile.to_widgets()

HBox(children=(HTML(value='Summarize dataset'), FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(HTML(value='Generate report structure'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value='Render widgets'), FloatProgress(value=0.0, max=1.0), HTML(value='')))

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

# Plotting individual games

In [None]:
games_individual_sorted = games.sort_values(ascending=False, by='TotalUSDPrize')
games_individual_sorted_k = games_individual_sorted['TotalUSDPrize']#.apply(k_format)
games_individual_sorted_k.head()

In [None]:
unique_names = games_grouped_individual_sorted.Game.unique()

unique_names

In [None]:
'''df_dict = {elem : pd.DataFrame for elem in unique_names}

for key in df_dict.keys():
    df_dict[key] = games_grouped_individual_sorted[:][games_grouped_individual_sorted.Game == key]'''

In [None]:
df_dict['PUBG']

In [None]:
#fig, ax = plt.subplots(figsize=(14,10))

ax = games_grouped_individual_sorted['TotalUSDPrize'].hist(figsize=(14,14), bins=50)

#ax.hist(individual_game_prizes)

ax.set_title('Prize Earnings Per Game')
ax.set_xlabel('Individual_Games')
ax.set_ylabel('Prize_USD')
ax.set_yticks([1000000, 2000000, 3000000, 4000000, 5000000, 6000000, 7000000])
ax.set_yticklabels(['$1m','$2m','$3m','$4m', '$5m', '$6m', '$7m'])
#ax.tick_params(labelbottom=False)

fig = ax.figure
fig.set_size_inches(14,14)
fig.tight_layout(pad=1)

# Analyzing prize money grouped by game in Players DataFrame

In [None]:
games_max_prize = games.groupby('Game')['TotalUSDPrize'].agg(np.max)
games_max_prize_sorted = games_max_prize.sort_values(ascending=False).apply(mil_format)
#games_max_prize_sorted

In [None]:
games_min_prize = games.groupby('Game')['TotalUSDPrize'].agg(np.min)
games_min_prize_sorted = games_min_prize.sort_values(ascending=False).apply(k_format)
#games_min_prize_sorted

In [None]:
game_total_prizes = games.groupby('Game')['TotalUSDPrize'].agg(np.sum)
game_total_prizes_mil = game_total_prizes.apply(mil_format).sort_values(ascending=True)
#game_total_prizes_mil

In [None]:
ax = game_total_prizes.sort_values(ascending=True).plot.barh()

ax.set_title('Total Prize Per Game')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Game')
ax.set_xticks(ticks=[25000000, 50000000, 75000000, 100000000, 125000000, 150000000, 175000000, 200000000])
ax.set_xticklabels(['$25m','$50m','$75m','$100m','$125m','$150m','$175m','$200m'])

fig = ax.figure
fig.set_size_inches(14,6)
fig.tight_layout(pad=1)
#fig.savefig('images/hist_prize_per_game.png')

#plt.close()

In [None]:
ax = game_total_prizes.sort_values().plot.pie(autopct='%1.1f%%')

ax.set_title('Prize Share Per Game')
ax.set_ylabel('')

fig = ax.figure
fig.set_size_inches(18,10)
#fig.savefig('images/pie_prize_share.png')

# Analyzing prize money grouped by Country in Players DataFrame

In [None]:
country_total_earnings = players.groupby('CountryCode')['TotalUSDPrize'].sum()
country_total_earnings_sorted = country_total_earnings.sort_values(ascending=False).apply(mil_format)
#country_total_earnings_sorted

In [None]:
country_average_earnings = players.groupby('CountryCode')['TotalUSDPrize'].mean()
country_average_earnings_sorted = country_average_earnings.sort_values(ascending=False).apply(k_format)
#country_average_earnings_sorted

In [None]:
country_total_earnings.describe()

In [None]:
ax = country_total_earnings.sort_values().plot.barh()

ax.set_title('Total Prize Earnings Per Country')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Game')
ax.set_xticks(ticks=[25000000, 50000000, 75000000, 100000000])
ax.set_xticklabels(['$25m','$50m','$75m','$100m'])

fig = ax.figure
fig.set_size_inches(14,14)
fig.tight_layout(pad=1)

In [None]:
ax = country_average_earnings.sort_values().plot.barh()

ax.set_title('Average Prize Per Game Grouped By Country')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Game')
ax.set_xticks(ticks=[1000000, 2000000, 3000000, 4000000, 5000000])
ax.set_xticklabels(['$1m','$2m','$3m','$4m','$5m'])

fig = ax.figure
fig.set_size_inches(14,14)
fig.tight_layout(pad=1)