In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from collections import defaultdict
import pickle
%matplotlib inline

In [2]:
folder = 'D:/OMSCS/CSE6242/Project/Plot1/data/'
files = ['hero_names', 'match_outcome', 'player_ratings', 'players']

In [3]:
data = {}
for file in files:
    data[file] = pd.read_csv(folder+file+'.csv')
    print('Done reading {}'.format(file))

Done reading hero_names
Done reading match_outcome
Done reading player_ratings
Done reading players


### need table by player id
account_id | total_matches |total_wins | trueskill_mu | trueskill_sigma

In [4]:
# remove null column
player_ratings = data['player_ratings'].drop('Unnamed: 0', axis=1)
player_ratings.head()

In [6]:
# get all players with public profiles
# id = 0 means the profile is private
player_ratings.sort_values(['total_matches', 'account_id'], axis=0, ascending=[0,1], inplace=True)
player_ratings = player_ratings.loc[player_ratings['account_id'] != 0].reset_index(drop=True)
player_ratings.head()

Unnamed: 0,account_id,total_wins,total_matches,trueskill_mu,trueskill_sigma
0,6647,396,785,25.383555,1.463347
1,12357,341,623,27.023187,1.51144
2,77866,302,570,26.43428,1.683808
3,35770,290,561,27.056765,1.588747
4,28485,262,526,26.190546,1.57963


In [22]:
# get player and heroes used
player_picks = data['players'].drop('Unnamed: 0', axis=1)
# get pulic profiles as before
player_picks = player_picks.loc[player_picks['account_id'] != 0].reset_index(drop=True)
player_picks.head()

Unnamed: 0,match_id,account_id,hero_id,player_slot,gold,gold_spent,gold_per_min,xp_per_min,kills,deaths,...,unit_order_glyph,unit_order_eject_item_from_stash,unit_order_cast_rune,unit_order_ping_ability,unit_order_move_to_direction,unit_order_patrol,unit_order_vector_target_position,unit_order_radar,unit_order_set_item_combine_lock,unit_order_continue
0,0,1,51,1,2954,17760,494,659,13,3,...,,,,14.0,,,,,,
1,0,2,11,3,1179,22505,599,605,8,4,...,1.0,,,13.0,,,,,,
2,0,3,67,4,3307,23825,613,762,20,3,...,3.0,,,23.0,,,,,,
3,0,4,106,128,476,12285,397,524,5,6,...,,,,2.0,,,,,,
4,0,5,46,130,2390,13395,452,517,4,8,...,,,,4.0,110.0,,,,,


In [64]:
# group by player and hero played
fav_heros = player_picks.groupby(['account_id', 'hero_id']).agg({'match_id':'count', 'xp_per_min': 'mean'})
fav_heros.reset_index(inplace=True)
# sort by count, performance
fav_heros = fav_heros.sort_values(['account_id', 'match_id', 'xp_per_min'], ascending=[True, False,False])
# group by player and take only top 1 hero
fav_heros = fav_heros.groupby('account_id').agg({'match_id': 'first', 'hero_id': 'first'})
fav_heros.reset_index(inplace=True)
fav_heros.rename({'match_id': 'match_count'}, axis=1, inplace=True)
fav_heros.head()

Unnamed: 0,account_id,hero_id,match_id,xp_per_min
1,1,67,1,676.0
0,1,51,1,659.0
2,2,11,1,605.0
4,2,82,1,503.0
3,2,51,1,452.0


In [72]:
# fill out hero names
fav_heros = fav_heros.merge(
    data['hero_names'][['hero_id', 'localized_name']],
    on = 'hero_id'
).reset_index(drop=True)
fav_heros.rename({'localized_name': 'hero_name'}, axis=1, inplace=True)
fav_heros.head()

Unnamed: 0,account_id,match_count,hero_id,hero_name
0,1,1,67,Spectre
1,3,1,67,Spectre
2,5,4,67,Spectre
3,180,1,67,Spectre
4,236,1,67,Spectre


In [73]:
# create summary table for each player and his/her favorate hero
summary = fav_heros.merge(player_ratings)
# calculate win rate
summary['win_rate'] = summary['total_wins'] / summary['total_matches']
summary.head()

Unnamed: 0,account_id,match_count,hero_id,hero_name,total_wins,total_matches,trueskill_mu,trueskill_sigma
0,1,1,67,Spectre,14,24,26.232905,4.854238
1,3,1,67,Spectre,5,16,20.221006,5.961434
2,5,4,67,Spectre,72,108,32.190551,2.93714
3,180,1,67,Spectre,46,85,27.108094,3.323146
4,236,1,67,Spectre,2,2,28.043292,7.891427


In [75]:
with open(folder+'summary', 'wb') as handle:
    pickle.dump(summary, handle, protocol=pickle.HIGHEST_PROTOCOL)