# Creating imports, formats etc.

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')

In [2]:
def mil_format(x):
    return "${:.1f}M".format(x/1000000)

In [3]:
def k_format(x):
    return "${:.1f}K".format(x/1000)

# Pulling data, '_df' is the backup and the others are meant to modify

In [4]:
players_df = pd.read_csv('data/highest_earning_players.csv')
players = pd.read_csv('data/highest_earning_players.csv')
players_df.head()

Unnamed: 0,PlayerId,NameFirst,NameLast,CurrentHandle,CountryCode,TotalUSDPrize,Game,Genre
0,3883,Peter,Rasmussen,dupreeh,dk,1822989.41,Counter-Strike: Global Offensive,First-Person Shooter
1,3679,Andreas,Højsleth,Xyp9x,dk,1799288.57,Counter-Strike: Global Offensive,First-Person Shooter
2,3885,Nicolai,Reedtz,dev1ce,dk,1787489.88,Counter-Strike: Global Offensive,First-Person Shooter
3,3672,Lukas,Rossander,gla1ve,dk,1652350.75,Counter-Strike: Global Offensive,First-Person Shooter
4,17800,Emil,Reif,Magisk,dk,1416448.64,Counter-Strike: Global Offensive,First-Person Shooter


In [7]:
# Important to note, all games were play same amount of times (100)
games = players[['TotalUSDPrize', 'Game', 'Genre']]
games_grouped = games.groupby('Game')
#games_grouped.head(1)

In [8]:
teams_df = pd.read_csv('data/highest_earning_teams.csv')
teams = pd.read_csv('data/highest_earning_teams.csv')
teams_df.head()

Unnamed: 0,TeamId,TeamName,TotalUSDPrize,TotalTournaments,Game,Genre
0,760,San Francisco Shock,3105000.0,7,Overwatch,First-Person Shooter
1,776,London Spitfire,1591136.5,13,Overwatch,First-Person Shooter
2,768,New York Excelsior,1572618.5,18,Overwatch,First-Person Shooter
3,773,Philadelphia Fusion,1186278.5,15,Overwatch,First-Person Shooter
4,766,Seoul Dynasty,1130000.0,6,Overwatch,First-Person Shooter


In [9]:
countries_df = pd.read_csv('data/country-and-continent-codes-list.csv')
countries = pd.read_csv('data/country-and-continent-codes-list.csv')
countries_df.head()

Unnamed: 0,Continent_Name,Continent_Code,Country_Name,Two_Letter_Country_Code,Three_Letter_Country_Code,Country_Number
0,Asia,AS,"Afghanistan, Islamic Republic of",AF,AFG,4.0
1,Europe,EU,"Albania, Republic of",AL,ALB,8.0
2,Antarctica,AN,Antarctica (the territory South of 60 deg S),AQ,ATA,10.0
3,Africa,AF,"Algeria, People's Democratic Republic of",DZ,DZA,12.0
4,Oceania,OC,American Samoa,AS,ASM,16.0


# Adding Columns

In [10]:
# Adding column for prize per tournament to teams dataframe
teams['PrizePerTournament'] = round((teams['TotalUSDPrize'] / teams['TotalTournaments']), ndigits=2)
teams.head()

Unnamed: 0,TeamId,TeamName,TotalUSDPrize,TotalTournaments,Game,Genre,PrizePerTournament
0,760,San Francisco Shock,3105000.0,7,Overwatch,First-Person Shooter,443571.43
1,776,London Spitfire,1591136.5,13,Overwatch,First-Person Shooter,122395.12
2,768,New York Excelsior,1572618.5,18,Overwatch,First-Person Shooter,87367.69
3,773,Philadelphia Fusion,1186278.5,15,Overwatch,First-Person Shooter,79085.23
4,766,Seoul Dynasty,1130000.0,6,Overwatch,First-Person Shooter,188333.33


# Team Data

In [13]:
teams_grouped = teams.groupby(['TeamName','Game']).count()
teams_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,TeamId,TotalUSDPrize,TotalTournaments,Genre,PrizePerTournament
TeamName,Game,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
(monkey) Business,Dota 2,1,1,1,1,1
100 Thieves,Counter-Strike: Global Offensive,1,1,1,1,1
100 Thieves,Fortnite,1,1,1,1,1
100 Thieves,League of Legends,1,1,1,1,1
303 Esports,PUBG,1,1,1,1,1
...,...,...,...,...,...,...
paiN Gaming,League of Legends,1,1,1,1,1
vsBANDITS,Fortnite,1,1,1,1,1
wisdom Nerve victory,PUBG,1,1,1,1,1
x6tence,Arena of Valor,1,1,1,1,1


In [12]:
teams_grouped.count() > 1

TeamId                True
TotalUSDPrize         True
TotalTournaments      True
PrizePerTournament    True
dtype: bool

In [None]:
teams_sorted = teams.sort_values(ascending=False, by='PrizePerTournament')
teams_sorted.head(50)

In [None]:
teams_genre_grouped_per_tournament = teams_sorted.groupby('Genre')['PrizePerTournament'].agg(np.mean)
teams_genre_grouped_per_tournament.head()

In [None]:
ax = teams_genre_grouped_per_tournament.sort_values(ascending=True).plot.barh(color=['firebrick', 'darkorange', 'forestgreen', 'dodgerblue', 'darkblue'])

ax.set_title('Average Team Prize Per Tournament by Genre')
ax.set_xlabel('Prize_USD (Thousands)')
ax.set_ylabel('Genre')
ax.set_xticks(ticks=[5000, 10000, 15000, 20000, 25000, 30000, 35000])
ax.set_xticklabels(['$5k','$10k','$15k','$20k','$25k','$30k','$35k'])

fig = ax.figure
fig.set_size_inches(10,6)
fig.tight_layout(pad=1)
#fig.savefig('images/avg_prize_per_team_by_genre.png')

#plt.close()

In [None]:
teams_genre_grouped_per_tournament_game = teams_sorted.groupby('Game')['PrizePerTournament'].agg(np.mean)
teams_genre_grouped_per_tournament_game

In [None]:
ax = teams_genre_grouped_per_tournament_game.sort_values(ascending=True).plot.barh(color=['firebrick', 'darkblue', 'forestgreen', 'darkorange', 'dodgerblue', 'dodgerblue', 'forestgreen', 'darkblue', 'darkblue', 'darkblue'])

ax.set_title('Average Team Prize Per Tournament by Game')
ax.set_xlabel('Prize_USD (Thousands)')
ax.set_ylabel('Game')
ax.set_xticks(ticks=[10000, 20000, 30000, 40000, 50000])
ax.set_xticklabels(['$10k','$20k','$30k','$40k','$50k'])

fig = ax.figure
fig.set_size_inches(10,6)
fig.tight_layout(pad=1)
#fig.savefig('images/average_team_prize_per_game.png')

#plt.close()

# Analyzing prize money in Players DataFrame

## Using Averages grouped by game

In [None]:
# Averages 
games_mean_prize = games.groupby('Game')['TotalUSDPrize'].agg(np.mean)
games_mean_prize_sorted = games_mean_prize.sort_values(ascending=False).apply(k_format)
games_mean_prize_sorted

In [None]:
# Averages
color_games_averages = ['darkblue', 'darkblue', 'forestgreen', 'dodgerblue', 'darkorange', 'firebrick', 'darkblue', 'dodgerblue', 'forestgreen',  'darkblue']

In [None]:
# Averages
ax = games_mean_prize.sort_values(ascending=True).plot.barh(color=color_games)

ax.set_title('Total Prize Per Game by Player')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Game')
ax.set_xticks(ticks=[250000, 500000, 750000, 1000000, 1250000, 1500000, 1750000, 2000000])
ax.set_xticklabels(['$250k','$500k','$750k','$1m','$1.25m','$1.50m','$1.75m','$2m'])

fig = ax.figure
fig.set_size_inches(14,6)
fig.tight_layout(pad=1)
#fig.savefig('images/barh_players_prize_per_game.png')

#plt.close()

## Using averages grouped by genre

In [None]:
# Averges by Genre
genre_mean_prize = games.groupby('Genre')['TotalUSDPrize'].agg(np.mean)
genre_mean_prize_sorted = genre_mean_prize.sort_values(ascending=False)#.apply(k_format)
genre_mean_prize_sorted

In [None]:
# Averages
color_genre_averages = ['darkorange', 'firebrick', 'dodgerblue' , 'forestgreen', 'darkblue']

In [None]:
# Averages
ax = genre_mean_prize.sort_values(ascending=True).plot.barh(color=color_genre_averages)

ax.set_title('Average Player Earnings Per Genre')
ax.set_xlabel('Prize_USD (Thousands)')
ax.set_ylabel('Game')
ax.set_xticks(ticks=[100000, 200000, 300000, 400000, 500000, 600000])
ax.set_xticklabels(['$100k','$200k','$300k','$400k','$500k','$600k'])

fig = ax.figure
fig.set_size_inches(14,6)
fig.tight_layout(pad=1)
#fig.savefig('images/barh_players_prize_per_genere.png')

## Using totals grouped by Game

In [None]:
# Totals
game_total_prizes = games.groupby('Game')['TotalUSDPrize'].agg(np.sum)
game_total_prizes_mil = game_total_prizes.apply(mil_format).sort_values(ascending=True)
game_total_prizes_mil

In [None]:
# Totals
ax = game_total_prizes.sort_values(ascending=True).plot.barh()

ax.set_title('Total Prize Per Game')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Game')
ax.set_xticks(ticks=[25000000, 50000000, 75000000, 100000000, 125000000, 150000000, 175000000, 200000000])
ax.set_xticklabels(['$25m','$50m','$75m','$100m','$125m','$150m','$175m','$200m'])

fig = ax.figure
fig.set_size_inches(14,6)
fig.tight_layout(pad=1)
#fig.savefig('images/hist_prize_per_game.png')

#plt.close()

In [None]:
# Totals
ax = game_total_prizes.sort_values().plot.pie(autopct='%1.1f%%')

ax.set_title('Prize Share Per Game')
ax.set_ylabel('')

fig = ax.figure
fig.set_size_inches(18,10)
#fig.savefig('images/pie_prize_share.png')

# Plotting individual games

In [None]:
games_individual_sorted = games.sort_values(ascending=False, by='TotalUSDPrize')
games_individual_sorted_k = games_individual_sorted['TotalUSDPrize']#.apply(k_format)
games_individual_sorted_k.head()

In [None]:
unique_names = games_grouped_individual_sorted.Game.unique()

unique_names

In [None]:
#fig, ax = plt.subplots(figsize=(14,10))

ax = games_grouped_individual_sorted['TotalUSDPrize'].hist(figsize=(14,14), bins=50)

#ax.hist(individual_game_prizes)

ax.set_title('Prize Earnings Per Game')
ax.set_xlabel('Individual_Games')
ax.set_ylabel('Prize_USD')
ax.set_yticks([1000000, 2000000, 3000000, 4000000, 5000000, 6000000, 7000000])
ax.set_yticklabels(['$1m','$2m','$3m','$4m', '$5m', '$6m', '$7m'])
#ax.tick_params(labelbottom=False)

fig = ax.figure
fig.set_size_inches(14,14)
fig.tight_layout(pad=1)

# Analyzing prize money by Country in Players DataFrame

## Total Player Earnings Per Country:

In [None]:
country_total_earnings = players.groupby('CountryCode')['TotalUSDPrize'].sum()
country_total_earnings_sorted = country_total_earnings.sort_values(ascending=False)#.apply(mil_format)
#country_total_earnings_sorted

In [None]:
ax = country_total_earnings.sort_values().plot.barh()

ax.set_title('Total Prize Earnings Per Country')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Country Code')
ax.set_xticks(ticks=[25000000, 50000000, 75000000, 100000000])
ax.set_xticklabels(['$25m','$50m','$75m','$100m'])

fig = ax.figure
fig.set_size_inches(14,14)
fig.tight_layout(pad=1)
#fig.savefig('images/barh_conrties_prize_total.png')

## Average Player Earnings Per Country

In [None]:
country_average_earnings = players.groupby('CountryCode')['TotalUSDPrize'].mean()
country_average_earnings_sorted = country_average_earnings.sort_values(ascending=False).apply(k_format)
#country_average_earnings_sorted

In [None]:
ax = country_average_earnings.sort_values().plot.barh()

ax.set_title('Average Prize Per Game Grouped By Country')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Country')
ax.set_xticks(ticks=[1000000, 2000000, 3000000, 4000000, 5000000])
ax.set_xticklabels(['$1m','$2m','$3m','$4m','$5m'])

fig = ax.figure
fig.set_size_inches(14,14)
fig.tight_layout(pad=1)
#fig.savefig('images/barh_conrties_prize_per_game.png')

# Where are the top 100 players?

In [None]:
player_by_country = players.groupby('CountryCode')['Game'].count()
#player_by_country = player_by_country.sort_values(ascending=True)
#player_by_country

In [None]:
ax = player_by_country.sort_values().plot.barh()

ax.set_title('Average Prize Per Game Grouped By Country')
ax.set_xlabel('Prize_USD (Millions)')
ax.set_ylabel('Country')
ax.set_xticks(ticks=[1000000, 2000000, 3000000, 4000000, 5000000])
ax.set_xticklabels(['$1m','$2m','$3m','$4m','$5m'])

fig = ax.figure
fig.set_size_inches(14,14)
fig.tight_layout(pad=1)