# Loading packages

In [19]:
Data_path = "D:\Huge datasets\Clash_royale\data"

import pandas as pd
from os import path
import json
import codecs
import ast
import numpy as np
import seaborn as sns
from itertools import chain
from sklearn.linear_model import LogisticRegression

# Loading Files

In [20]:
file_path = path.join(Data_path,'matches.txt')

file = codecs.open(file_path, encoding='utf-8') # use codecs to read file (another encoding problem)

cards_info = pd.read_csv(Data_path+'/info_cards.csv') # load card info to get elixir
games = []

errors = 0
for i,line in enumerate(file.readlines()):
    try:
        dic = dict(ast.literal_eval(line.strip()))
        games.append(dic)
    except:
        errors+=1
        
def parse(line):
    "flatten the json to get it in pandas format"
    keys = line.keys()
    data_out = {}
    for k in keys:
        data_out[k+'_clan'] = line[k]['clan']
        data_out[k+"_deck"] = line[k]['deck']
        data_out[k+'_name']  = line[k]['name']
        data_out[k+'_trophy']  = line[k]['trophy']
        data_out[k+'_deck_list'] = [name for name, level in line[k]['deck']]
        data_out[k+'_level_list'] = [level for name, level in line[k]['deck']]
    return data_out

games_df = pd.DataFrame(games)
add_var = pd.DataFrame(list(games_df.players.map(lambda x: parse(x))))
games_data = pd.concat([games_df.drop("players",axis=1), add_var],axis = 1)
res = pd.DataFrame(list(games_data.result), columns=['left_crowns', 'right_crowns'])
games_data = pd.concat([games_data.drop('result',axis=1), res],axis=1)
games_data.head()

Unnamed: 0,time,type,left_clan,left_deck,left_deck_list,left_level_list,left_name,left_trophy,right_clan,right_deck,right_deck_list,right_level_list,right_name,right_trophy,left_crowns,right_crowns
0,2017-07-12,ladder,battusai,"[[Fireball, 9], [Archers, 12], [Goblins, 12], ...","[Fireball, Archers, Goblins, Minions, Bomber, ...","[9, 12, 12, 11, 12, 2, 12, 13]",Supr4,4325,TwoFiveOne,"[[Mega Minion, 9], [Electro Wizard, 3], [Arrow...","[Mega Minion, Electro Wizard, Arrows, Lightnin...","[9, 3, 11, 5, 9, 2, 9, 5]",gpa raid,4258,2,0
1,2017-07-12,ladder,battusai,"[[Royal Giant, 13], [Ice Wizard, 2], [Bomber, ...","[Royal Giant, Ice Wizard, Bomber, Knight, Fire...","[13, 2, 12, 12, 9, 12, 2, 12]",Supr4,4296,The Wolves,"[[Ice Spirit, 10], [Valkyrie, 9], [Hog Rider, ...","[Ice Spirit, Valkyrie, Hog Rider, Inferno Towe...","[10, 9, 9, 9, 12, 9, 12, 9]",TITAN,4237,1,0
2,2017-07-12,ladder,battusai,"[[Royal Giant, 13], [Ice Wizard, 2], [Bomber, ...","[Royal Giant, Ice Wizard, Bomber, Knight, Fire...","[13, 2, 12, 12, 9, 12, 2, 12]",Supr4,4267,@LA PERLA NEGRA,"[[Miner, 3], [Ice Golem, 9], [Spear Goblins, 1...","[Miner, Ice Golem, Spear Goblins, Minion Horde...","[3, 9, 12, 12, 8, 2, 6, 10]",Victor,4300,0,1
3,2017-07-12,ladder,battusai,"[[Royal Giant, 13], [Ice Wizard, 2], [Bomber, ...","[Royal Giant, Ice Wizard, Bomber, Knight, Fire...","[13, 2, 12, 12, 9, 12, 2, 12]",Supr4,4299,ADAMLAR,"[[Golem, 6], [Zap, 12], [The Log, 2], [Elixir ...","[Golem, Zap, The Log, Elixir Collector, Mega M...","[6, 12, 2, 9, 9, 13, 12, 6]",toprak,4240,1,0
4,2017-07-12,ladder,Nova l Imperio,"[[Graveyard, 3], [Ice Wizard, 3], [Zap, 13], [...","[Graveyard, Ice Wizard, Zap, Mega Minion, Gian...","[3, 3, 13, 10, 10, 9, 13, 13]",ACM1PT!,4598,帝愛,"[[Baby Dragon, 6], [Ice Spirit, 12], [Goblins,...","[Baby Dragon, Ice Spirit, Goblins, Golem, Torn...","[6, 12, 12, 6, 6, 6, 3, 3]",兵藤 和尊,4628,1,1


# Preprocessing Data

In [21]:
ids = games_data.type =="ladder"
games_data = games_data[ids]

good_players = games_data.left_trophy.astype(int)>3000 # subset best players only
games_data = games_data[good_players]
games_data["mean_trophies"] = (games_data.left_trophy.astype(int)+  games_data.right_trophy.astype(int))/2 # get mean trophy of two players
games_data["diff_trophies"] = (games_data.left_trophy.astype(int)+  games_data.right_trophy.astype(int)) # get trophy difference (probably not so much informative)
games_data['left_diff'] = games_data.left_crowns.astype(int).astype(int) - games_data.right_crowns.astype(int)  # compute difference of column as target
games_data['right_diff'] = games_data.right_crowns.astype(int) - games_data.left_crowns.astype(int) # same for right player

left_deck = games_data[["left_level_list","left_deck_list","left_crowns","left_diff","mean_trophies","diff_trophies"]] # subset useful cplumns
right_deck = games_data[["right_level_list","right_deck_list","right_crowns",'right_diff',"mean_trophies","diff_trophies"]]
right_deck.columns = left_deck.columns # change name of left deck
data = left_deck.append(right_deck) # concatenate rows of right players with rows of left players as if they were independant
# might be a problem...

data.reset_index(drop=True, inplace=True) # reset index to avoid future problems and let's get started

In [22]:
# compute two lists to get all the possible cards played in the dataset
a = data.left_deck_list 
liste = set(chain.from_iterable(a))
liste_2 = [l+"_level" for l in liste]

In [23]:
# map elixir to each card
map_elixir = lambda x: [int(cards_info.loc[cards_info.name==a,"elixir"]) for a in x]

In [24]:
elixir_list = data.left_deck_list.map(map_elixir)
elixir_mean = elixir_list.map(np.mean)

In [25]:
# compute mean of elixir (the objective was to find the optimal mean level of elixir: no robust results obtained)
elixir_mean = elixir_list.map(np.mean)
data['elixir_mean'] = elixir_mean

questions: 
    - what's the impact of one card level on performance'
    - what's the rarity optimal rarity level
    - what is the relationship between elixir mean and performance (find optimal level of elixir)
    - give personnal stats on past few games/Decks

# Build one feature per card

In [26]:
from collections import Counter
def map(deck):
    s = Counter(deck)
    res = {k:s[k] for k in liste}
    return res
flat_deck = data.left_deck_list.map(lambda x: map(x))
cards = pd.DataFrame(list(flat_deck))

# Build one feature per level

In [27]:
%%time

def map_again(tup):
    classe = {v:int(k) for k,v in tup}
    dico = {k:(classe[k.split("_")[0]] if k.split("_")[0] in classe else 0) for k in liste_2}
    return dico

format_row = lambda x: list(zip(x['left_level_list'], x["left_deck_list"]))

merged = data.apply(lambda x: format_row(x), axis=1)
marged = merged.map(map_again)

Wall time: 1min 4s


In [28]:
levels = pd.DataFrame(list(marged))

In [29]:
# concatenate all features (aroound 200 features at the end, seems reasonable compared to nb of lines)
X = pd.concat([cards, levels, data.mean_trophies, elixir_mean],axis=1)

In [34]:
# prepare targets for 4 different regressions (3 ordinal logit, and one Binary Logistic)
# the coefficients obtained cannot directly be interpreted in odds ratio (marginal probability), but their amount does not really matter, their comparison
#only is important (add odds ratio later)

y_diff = data.left_diff.astype(int) #ordinal (crowns won - crowns lost) (from -3 to +3)
y_attack = data.left_crowns.astype(int) # ordinal: crowns won: from 0 to 3
y_defense = data.left_crowns.astype(int) - data.left_diff.astype(int) # ordinal crowns lost: from 0 to 3
y_won = data.left_diff.astype(int) > 0 # logistic regression: True/False
targets = [y_diff, y_attack, y_defense, y_won]

In [None]:
%%time
from sklearn.linear_model import LogisticRegression
import mord
# train the models (would be good to add crossvalidation? not sure)

lrs = [mord.LogisticAT(verbose=1) for l in range(len(targets)-1)]
lrs.append(LogisticRegression(n_jobs=-1,C=1e9))
models = [lr.fit(X,t) for t, lr in zip(targets, lrs)]  

In [None]:
# gather coefficients for each regression
coef_dict = [{k:v for k,v in zip(X.columns, m.coef_)} for m in models[:-1]]
coef_dict.append({k:v for k,v in zip(X.columns, models[-1].coef_[0])})

In [None]:
# map coefs to dictionnaries
coefs_cards = [{k:v for k,v in d.items() if k in liste} for d in coef_dict]
coefs_level = [{k:v for k,v in d.items() if k in liste_2} for d in coef_dict]
coefs_level = {k.split('_')[0]:v for k,v in coefs_level[0].items()} # take just diff regression parameters for coefs
coefs_cards.append(coefs_level)

In [None]:
# create a dict key: cards values: dict of results

from collections import defaultdict
final_dic = defaultdict(dict)
for dico, attribute in zip(coefs_cards,["perf","attack","defense","win_ratio","level_bonus"]):
    for card in dico.keys():
        final_dic[card][attribute] = dico[card]

just save file to disk

In [None]:
import json
out_file = open(path.join(Data_path,"informative_results/informative_regression.json"),"w")
json.dump(final_dic, out_file,indent=0)
out_file.close()

### interpretation:
   * Performance: the presence of card x, increases the probability of having +1 in difference of crowns  (crown wons - crowns lost) by n % (n being the value in the json).
   * Attack & Defense: The Presence of the card x in deck, increases the probability to destroy/lose one crown by n %
   * win_ratio: The Presence of the card x in deck increases the  chance of winning the game by n % (independantly of the number of crowns)
   * level_bonus: adding one level to the specific card will increase the difference of crowns by n %, it's probably wise to only compare value of cards sharing the same rarity (common with common, epic with epic...) since the level of different rarity is more difficult to obtain.
   

In order to know which card to invest in,  it would be appropriate to make a ratio gain in performance (measured by level_bonus and cost to achieve the level upgrade. (express this cost in a comparable unit between level of rarity...)

---
   
Notes: 
- some of the results might not be robust (since fewer games were played with those specific cards), some indicator of robustness can be computed (bootstraped confidence intervals...), notheless the results obtained for win_ratio and perf are very likely to be robust (thousands of games for those cards)
- these results may only be valid for good players (only above 3500 included in the analysis)
- the analysis should be performed again after a change in the characterstic of a card.

In [None]:
def plotting_result(series, name = '', col ="b"):
    pd.Series(series).sort_values().plot(kind = "barh", figsize = (8,15), color =col,title = name);

In [None]:
plotting_result(coefs_cards[0], "analysis of card overall performance")

In [None]:
plotting_result(coefs_cards[1],"Attack", 'red') 
# note: attack analysis just measure the number of towers destroyed, when this card is in the deck 

In [None]:
plotting_result(coefs_cards[2],"Defense","darkgrey")

In [None]:
plotting_result(coefs_cards[3], "Win ratio","violet")

In [None]:
plotting_result(coefs_level, "Impact on performance of adding a level to the given card", "lime")

In [None]:
pprint(final_dic)

In [None]:
import matplotlib.image as mpimg

In [None]:
from matplotlib.offsetbox import (OffsetImage,AnnotationBbox)

In [None]:
import glob
image_path = glob.glob(Data_path+"/images/*.png")
images = {i.split('\\')[-1].split('.png')[0].title() : mpimg.imread(i,format="png") for i in image_path}
images["Mini-PEKKA"] = images["Mini-Pekka"]
images["PEKKA"] = images["Pekka"]

In [None]:
def res_plot(data, images,title = ""):
    f, ax = plt.subplots(figsize = (8,35))
    plt.style.use('seaborn-white')
    dat = pd.Series(data).sort_values()
    sorted_images = [images[i.replace(' ','-').replace('.','')] for i in dat.index]
    my_range = range(len(dat.index))
    ax.hlines(y=my_range, xmin=0, xmax=dat.values, color='skyblue',linewidth = 3)
    #ax.plot(dat, my_range, "o",color='r',markersize = 10)
    ax.set_ylim(-1,76 )
    ax.set_xlim(None,max(dat) +0.2)
    ax.set_title(title)
    ax.grid(color='r', linestyle='--', linewidth=2)
    b=0
    for image,y, x in zip(sorted_images, my_range, dat.values):
        build_artist(ax, image, [x , y-0.5])
        b+=0.001

In [None]:
def build_artist(ax, image, xy):
    im = OffsetImage(image, zoom=0.1)
    im.image.axes = ax
    ab = AnnotationBbox(im, xy, pad=0,
                        xycoords='data',
                        frameon=False,
                        box_alignment =(0,0))
    ax.add_artist(ab)

In [None]:
res_plot(coefs_cards[0], images ,title="Overall Performance")

In [None]:
res_plot(coefs_cards[1], images=images)

In [None]:
AnnotationBbox?

In [None]:

[k.replace('-',' ') for k in images.keys()]