In [739]:
from bs4 import BeautifulSoup
import requests
import json
import pymongo
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import seaborn as sns

import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn import cross_validation, metrics   #Additional scklearn functions
from sklearn.grid_search import GridSearchCV   #Performing grid search
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.feature_selection import RFE

%matplotlib inline

#styles
mpl.rcParams['figure.figsize'] = (10,10)

# Crawler

## Zieltasks

1. [ ] gehe auf die [offizielle LoL eSport-Seite](https://eu.lolesports.com/en)
2. [ ] navigiere durch die einzelnen Ligen (Comboboxen unter dem AufmacherTeaser) und Spielwochen (Tabs)
3. [ ] Erstelle Liste von allen Spielen, die angezeigt werden
4. [ ] Iteriere über diese Liste (_klicke_ auf "Watch match")
5. [ ] Erstelle Liste der einzelnen Submatches (Spielmodus ist meistens entweder "Best of 3" oder "Best of 5"
5. [ ] Iteriere über die Liste der Submatches (Klicke weiter auf "View Stats")
6. [ ] Schneide aus der URL jeweils den Teil hinter ".../#match-details/" aus und füge ihn an "https://acs.leagueoflegends.com/v1/stats/game/" an. Dies ergibt eine neue Liste
7. [x] Downloade jeweils die JSONs hinter diesen URLS


## rudimentärer (!) Beispielcode:
Manuell beschaffte Liste von 5 ("Best of Five") Games. Die JSONs sollen in ein Array gespeichert werden, damit sie per .insert_many() hochgeladen werden kann (Ziel ist die zukünftige Reduzierung der benötigten Verbindungen und Operations auf dem Mongo Cluster)

# Some more serious stuff 🦄

In [740]:
curWeek = 9

if curWeek == 1:
    dataWeek = 1
else:
    dataWeek = curWeek - 1

In [741]:
# Datenpfad anpassen falls notwendig
DataFile = open("./Week{}{}".format(dataWeek, ".txt"), "r") # "Week9.txt" -> anpassen
match_list = DataFile.read().split(',\n')
#print(match_list)
print(len(match_list))
DataFile.close()

355


In [742]:
r  = requests.get("https://acs.leagueoflegends.com/v1/stats/game/ESPORTSTMNT06/550608?gameHash=c7ef2f0afb99c4cd&tab=overview")
j = json.loads(r.text)
j["_id"] = j["gameId"]

In [743]:
weekArray = [41, 41, 90, 139, 180, 224, 267, 309, 355]

In [744]:
def get_json(url):
    
    r  = requests.get(url)
    print("aktuelle URL: ", url)
    j = json.loads(r.text)
    
    # Die Zeile ruft >manchmal< einen "KeyError: gameId" hervor, ohne ersichtlichen Grund. Bei nochmaligem Durchlauf
    # ohne Änderung kann es danach funktionieren -> wo liegt der Bug ? gameId ist offensichtlich in jedem Link
    # vorhanden, und die links sind nich corrupted? Vielleicht Serverabfrage?
    j["_id"] = j["gameId"] #set "primary key" on gameID => prevents multiple uploads of same Match <3
    #j["week"] = j[""] # für später :)
               
    #print("Sleeping... zzz")
    time.sleep(0.5)
    #print("I am awake!")    
    
    # scheint eine lösung zu sein -> hat direkt geklappt beim ersten Mal. 
    # Vermuteter obiger "Bug": wird durch eine zu schnelle Serverabfrage hervorgerufen, sodass die Links
    # nicht korrekt 'angezeigt' werden, bzw. Serverseitig ein Fehler entsteht, der dazu führt, dass kein JSON vorhanden
    # ist, und demnach auch kein "gameId" 
    
    print("rufe JSON der gameID:", j["_id"], " ab ...")
    return j

List Comprehensions sind großartig!

In [745]:
#jsonsWeek = [1, 2]

#for i in range(1,10):
#    jsonsWeek(i) = i
    
#jsonsWeek

#strs = ["" for x in range(1,10)]

In [746]:
if 'jsons' in locals():
    print('bla')
else:
    print('jsons dead')

bla


In [747]:
import time                      # Zeitmessung für die json Abfrage

start = time.time()

# -------------------------------------------
# Methode auswählen / andere auskommentieren
# -------------------------------------------

# Methode 1: Von .txt file einlesen (schnell)    
#f = open("../../../Daten/LCS_Spring_Split_2018/currentJSONS.txt", "r")
#jsons = f.read()

#Methode 2: jsons von URL's erzeugen (langsam)
if 'jsons' not in locals():
    jsons = []
if len(jsons) < len(match_list):
    jsons = [get_json(url) for url in match_list] # alle jsons hintereinandergeklatscht
    

    
end = time.time()
print(end - start, len(jsons))

0.0 355


In [748]:
jsons_weeks = [jsons[:x] for x in weekArray]

len(jsons_weeks[curWeek - 1])

355

In [749]:
client = pymongo.MongoClient("mongodb://bth84:ch!m3R42@cluster0-shard-00-00-rafzz.mongodb.net:27017,cluster0-shard-00-01-rafzz.mongodb.net:27017,cluster0-shard-00-02-rafzz.mongodb.net:27017/test?ssl=true&replicaSet=Cluster0-shard-0&authSource=admin")
db = client.test

In [750]:
games = db.get_collection("games")
games.drop()
games.count()

0

In [751]:
for el in jsons_weeks[curWeek - 1]:
    try:
        games.insert_one(el)
        print(el["_id"], " wird hinzugefügt")
    except:
        print(el["_id"], " bereits vorhanden")
#games.insert_many(jsons)
#games.count()

1002440062  wird hinzugefügt
1002440106  wird hinzugefügt
1002440095  wird hinzugefügt
1002440084  wird hinzugefügt
1002440076  wird hinzugefügt
1002440127  wird hinzugefügt
1002440132  wird hinzugefügt
1002440143  wird hinzugefügt
1002440150  wird hinzugefügt
1002440161  wird hinzugefügt
1002460019  wird hinzugefügt
1002450027  wird hinzugefügt
1002450032  wird hinzugefügt
1002450042  wird hinzugefügt
1002450048  wird hinzugefügt
1002460073  wird hinzugefügt
1002460082  wird hinzugefügt
1002460089  wird hinzugefügt
1002460098  wird hinzugefügt
1002450084  wird hinzugefügt
1002460113  wird hinzugefügt
550247  wird hinzugefügt
550246  wird hinzugefügt
550275  wird hinzugefügt
550268  wird hinzugefügt
550356  wird hinzugefügt
550351  wird hinzugefügt
550331  wird hinzugefügt
550374  wird hinzugefügt
550369  wird hinzugefügt
550360  wird hinzugefügt
550433  wird hinzugefügt
550432  wird hinzugefügt
550425  wird hinzugefügt
550454  wird hinzugefügt
550441  wird hinzugefügt
550590  wird hin

610592  wird hinzugefügt
610634  wird hinzugefügt
610638  wird hinzugefügt
610658  wird hinzugefügt
610665  wird hinzugefügt
1002500096  wird hinzugefügt
1002500103  wird hinzugefügt
1002500110  wird hinzugefügt
1002500116  wird hinzugefügt
1002500123  wird hinzugefügt
1002500146  wird hinzugefügt
1002500150  wird hinzugefügt
1002500155  wird hinzugefügt
1002500163  wird hinzugefügt
1002500172  wird hinzugefügt
1002440102  wird hinzugefügt
1002440110  wird hinzugefügt
1002440121  wird hinzugefügt
1002430172  wird hinzugefügt
1002430189  wird hinzugefügt
1002430197  wird hinzugefügt
1002430202  wird hinzugefügt
1002430210  wird hinzugefügt
1002440088  wird hinzugefügt
1002440096  wird hinzugefügt
610806  wird hinzugefügt
610809  wird hinzugefügt
610818  wird hinzugefügt
610830  wird hinzugefügt
610849  wird hinzugefügt
610854  wird hinzugefügt
600924  wird hinzugefügt
600927  wird hinzugefügt
600940  wird hinzugefügt
600955  wird hinzugefügt
600967  wird hinzugefügt
600973  wird hinzuge

In [752]:
games.count()

326

In [753]:
cursor = games.aggregate([
    {"$group" : {"_id" : "$gameId", "count": {"$sum" : 1}}}
])
for el in cursor:
    print(el)

{'_id': 601342, 'count': 1}
{'_id': 601330, 'count': 1}
{'_id': 601273, 'count': 1}
{'_id': 601072, 'count': 1}
{'_id': 600967, 'count': 1}
{'_id': 600927, 'count': 1}
{'_id': 600924, 'count': 1}
{'_id': 610854, 'count': 1}
{'_id': 610818, 'count': 1}
{'_id': 1002440096, 'count': 1}
{'_id': 1002430210, 'count': 1}
{'_id': 1002430202, 'count': 1}
{'_id': 1002430172, 'count': 1}
{'_id': 1002440121, 'count': 1}
{'_id': 1002440110, 'count': 1}
{'_id': 1002440102, 'count': 1}
{'_id': 1002500172, 'count': 1}
{'_id': 1002500155, 'count': 1}
{'_id': 1002500146, 'count': 1}
{'_id': 1002500116, 'count': 1}
{'_id': 1002500110, 'count': 1}
{'_id': 601045, 'count': 1}
{'_id': 1002500096, 'count': 1}
{'_id': 610665, 'count': 1}
{'_id': 610658, 'count': 1}
{'_id': 610634, 'count': 1}
{'_id': 610569, 'count': 1}
{'_id': 610551, 'count': 1}
{'_id': 610520, 'count': 1}
{'_id': 600383, 'count': 1}
{'_id': 600353, 'count': 1}
{'_id': 600340, 'count': 1}
{'_id': 600331, 'count': 1}
{'_id': 610224, 'count':

Liste alle Spieler über alle Matches (mit Ihrer jeweiligen Anzahl Matches) auf:

In [754]:
cursor = games.aggregate([
    {"$unwind": "$participantIdentities"},
    {"$group" : {"_id" : "$participantIdentities.player.summonerName",
                 "game_count": {"$sum" : 1}}},
    {"$project": {"_id" : 0,  #<<< mit '1' und '0' kann man in einem $project einzelne Felder ein/ausblenden
                  "SummonerName" : "$_id",
                  "match_count": "$game_count"
                }}
])
for el in cursor:
    print(el)

{'SummonerName': 'KT Ucal', 'match_count': 8}
{'SummonerName': 'AFs Mowgli', 'match_count': 7}
{'SummonerName': 'OPT Dhokla', 'match_count': 4}
{'SummonerName': 'FLY Fly', 'match_count': 10}
{'SummonerName': ' JAG Grace', 'match_count': 2}
{'SummonerName': 'KZ Khan', 'match_count': 22}
{'SummonerName': 'H2K Santorin', 'match_count': 4}
{'SummonerName': 'JAG Teddy', 'match_count': 35}
{'SummonerName': 'JAG UmTi', 'match_count': 35}
{'SummonerName': 'bbq IgNar', 'match_count': 35}
{'SummonerName': 'JAG Justice', 'match_count': 8}
{'SummonerName': 'ROX Mightybear', 'match_count': 2}
{'SummonerName': 'SKT Faker', 'match_count': 36}
{'SummonerName': 'SKT Untara', 'match_count': 19}
{'SummonerName': 'KT Mata', 'match_count': 32}
{'SummonerName': 'KT PawN', 'match_count': 24}
{'SummonerName': 'SKT Effort', 'match_count': 13}
{'SummonerName': 'SKT Bang', 'match_count': 36}
{'SummonerName': 'KT Smeb', 'match_count': 32}
{'SummonerName': 'ROX Key', 'match_count': 34}
{'SummonerName': 'AFs Kramer

Liste Spieler mit noch mehr Stats auf! Müssen dafür das participants-Array $unwind'en:

**Update:** Das wird ne größere Nummer mit _\$project_ und _\$map_

**Update 2:** hierfür müssen alle "true" und "false" Werte in Nullen und einsen Konvertiert werden

In [755]:
cursor = games.aggregate([
    {"$unwind": "$participantIdentities"},
    {"$unwind": "$participants"},
    {"$project" : {"is_equal_participant" : {"$eq": ["$participants.participantId","$participantIdentities.participantId"]},
                    "d": "$$ROOT"}},
    {"$match" : {   "is_equal_participant" : True }},
    {"$unwind": "$d.teams"},
    {"$project" : {"is_equal_team" : {"$eq": ["$d.teams.teamId","$d.participants.teamId"]},
                    "d": "$d"}},
    {"$match" : {   "is_equal_team" : True }},
    {"$group" : {"_id" : "$d.participantIdentities.player.summonerName",
                 "match_count": {"$sum" : 1},
                                  
                 # TODO: Hier neue Datasets generieren !
                 # -------------------------------------
                 "team_towerkills" : {"$avg": "$d.teams.towerKills"},
                 "team_inhibitorKills" : {"$avg": "$d.teams.inhibitorKills"},
                 "team_baronKills" : {"$avg": "$d.teams.baronKills"},
                 "team_dragonKills" : {"$avg": "$d.teams.dragonKills"},
                 "team_riftHeraldKills" : {"$avg": "$d.teams.riftHeraldKills"},
                 "Kills" : {"$avg": "$d.participants.stats.kills"},
                 "Deaths" : {"$avg": {
                     "$cond" : {
                         "if" : {"$eq" : ["$d.participants.stats.deaths", 0]},
                         "then" : 1,
                         "else" : "$d.participants.stats.deaths"}}
                            },
                 "Assists" : {"$avg": "$d.participants.stats.assists"},
                 "visionScore" : {"$avg" : "$d.participants.stats.visionScore"},
                 "timeCCingOthers" : {"$avg" : "$d.participants.stats.timeCCingOthers"},
                 "goldEarned" : {"$avg" : "$d.participants.stats.goldEarned"},
                 "totalTimeCrowdControlDealt" : {"$avg" : "$d.participants.stats.totalTimeCrowdControlDealt"},
                 "perkPrimaryStyle" : {"$avg" : "$d.participants.stats.perkPrimaryStyle"},
                 "perkSubStyle" : {"$avg" : "$d.participants.stats.perkSubStyle"},
                 "goldPerMinDeltas10-20" : {"$avg" : "$d.participants.timeline.goldPerMinDeltas.10-20"},
                 "goldPerMinDeltas0-10" : {"$avg" : "$d.participants.timeline.goldPerMinDeltas.0-10"},
                 "xpPerMinDeltas10-20" : {"$avg" : "$d.participants.timeline.xpPerMinDeltas.10-20"},
                 "xpPerMinDeltas0-10" : {"$avg" : "$d.participants.timeline.xpPerMinDeltas.0-10"},
                 "creepsPerMinDeltas10-20" : {"$avg" : "$d.participants.timeline.creepsPerMinDeltas.10-20"},
                 "creepsPerMinDeltas0-10" : {"$avg" : "$d.participants.timeline.creepsPerMinDeltas.0-10"},
                 "damageTakenPerMinDeltas10-20" : {"$avg" : "$d.participants.timeline.damageTakenPerMinDeltas.10-20"},
                 "damageTakenPerMinDeltas0-10" : {"$avg" : "$d.participants.timeline.damageTakenPerMinDeltas.0-10"},
                 "gameDuration" : {"$avg" : "$d.gameDuration"},
                 "totalDamageDealtToChampions" : {"$avg" : "$d.participants.stats.totalDamageDealtToChampions"},
                 "damageDealtToObjectives" : {"$avg" : "$d.participants.stats.damageDealtToObjectives"},
                 "damageDealtToTurrets" : {"$avg" : "$d.participants.stats.damageDealtToTurrets"},
                 "totalHeal" : {"$avg" : "$d.participants.stats.totalHeal"},
                 "damageSelfMitigated" : {"$avg" : "$d.participants.stats.damageSelfMitigated"},
                 "totalDamageTaken" : {"$avg" : "$d.participants.stats.totalDamageTaken"},
                 "neutralMinionsKilledTeamJungle" : {"$avg" : "$d.participants.stats.neutralMinionsKilledTeamJungle"},
                 "neutralMinionsKilledEnemyJungle" : {"$avg" : "$d.participants.stats.neutralMinionsKilledEnemyJungle"},
                 
                 # feature Engineering
                 "win_ratio" : {"$avg": {
                     "$cond" : {
                         "if" : {
                             "$eq" : ["$d.participants.stats.win", True]
                         },
                         "then" : 1,
                         "else" : 0}
                     }
                 },                 
                }
    },
    {"$project": {"team_towerkills" : "$team_towerkills",
                  "team_inhibitorkills" : "$team_inhibitorKills",
                  "team_baronkills" : "$team_baronKills",
                  "team_dragonkills" : "$team_dragonKills",
                  "team_riftheraldkills" : "$team_riftHeraldKills",
                  "KDA": { "$divide": [ {"$add": ["$Kills","$Assists"] }, "$Deaths" ] },
                  "Match_Count" : "$match_count",
                  "Kills" : "$Kills", #-> KDA -> Dimensionsreduzierung
                  "Deaths" : "$Deaths", #-> KDA -> Dimensionsreduzierung
                  "Assists" : "$Assists", #-> KDA -> Dimensionsreduzierung
                  "visionScore" : "$visionScore", # -> utilityScore
                  "timeCCingOthers" : "$timeCCingOthers", # -> utilityScore #timeCCingOthers is the new stat (not yet deployed) which offe a more precise "score" of CC, as each CC has a value, hard CC value is the duration the CC as been applied, soft CC only a 1/2 of the duration and slow for 1/6 of the duration.
                  "goldEarned" : "$goldEarned",
                  "totalTimeCrowdControlDealt" : "$totalTimeCrowdControlDealt", # old?! -> timeCCingOthers is the new stat (not yet deployed) which offe a more precise "score" of CC, as each CC has a value, hard CC value is the duration the CC as been applied, soft CC only a 1/2 of the duration and slow for 1/6 of the duration.
                  "perkPrimaryStyle" : "$perkPrimaryStyle",  #keine Ahnung was das sein soll
                  "perkSubStyle" : "$perkSubStyle",         #keine Ahnung was das sein soll
                  "goldPerMinDeltas10-20" : "$goldPerMinDeltas10-20", #-> midPerformance ?!
                  "goldPerMinDeltas0-10" : "$goldPerMinDeltas0-10", #-> earlyPerformance ?!
                  "xpPerMinDeltas10-20" : "$xpPerMinDeltas10-20", #-> midPerformance ?!
                  "xpPerMinDeltas0-10" : "$xpPerMinDeltas0-10", #-> earlyPerformance ?!
                  "creepsPerMinDeltas10-20" : "$creepsPerMinDeltas10-20", #-> midPerformance ?!
                  "creepsPerMinDeltas0-10" : "$creepsPerMinDeltas0-10", #-> earlyPerformance ?!
                  "damageTakenPerMinDeltas10-20" : "$damageTakenPerMinDeltas10-20", #-> midPerformance ?!
                  "damageTakenPerMinDeltas0-10" : "$damageTakenPerMinDeltas0-10", #-> earlyPerformance ?!
                  "win_ratio" : "$win_ratio",
                  "totalHeal" : "$totalHeal", # -> DefScore
                  "damageSelfMitigated" : "$damageSelfMitigated", # -> DefScore
                  "totalDamageTaken" : "$totalDamageTaken", # -> DefScore
                  "gameDuration" : "$gameDuration",    # zur Berechnung anderer Werte sinnvoll
                  "goldPerMin" : {"$divide" : ["$goldEarned", {"$divide" : ["$gameDuration",60]}]},
                  "dmgToChampsPerMin" : {"$divide" : ["$totalDamageDealtToChampions", {"$divide" : ["$gameDuration",60]}]},
                  "damageDealtToObjectivesPerMin" : {"$divide" : ["$damageDealtToObjectives", {"$divide" : ["$gameDuration",60]}]},
                  "damageDealtToTurretsPerMin" : {"$divide" : ["$damageDealtToTurrets", {"$divide" : ["$gameDuration",60]}]},
                  "defenseScore" : {"$multiply" : [ {"$divide" : [ {"$divide" : [ "$totalDamageTaken", {"$add" : ["$totalHeal", "$damageSelfMitigated"]}]}, {"$divide" : ["$gameDuration",60]}]}, 100]},
                  "utilityScore" : {"$divide" : [ {"$divide" : [{"$multiply" : ["$visionScore", "$timeCCingOthers"]}, {"$divide" : ["$gameDuration",60]}]}, 10]},
                  "pushingScore" : {"$multiply" : [ {"$divide" : [ {"$divide" : [ { "$multiply" : [ {"$add" : ["$damageDealtToObjectives", "$damageDealtToTurrets"]}, "$team_dragonKills"] }, {"$divide" : ["$gameDuration",60]}]}, 100]}, "$team_baronKills"]},
                  "JungleControl" : {"$divide" : [ {"$add" : ["$neutralMinionsKilledTeamJungle", "$neutralMinionsKilledEnemyJungle"]}, {"$divide" : ["$gameDuration",60]}]},
                  "dmgScore" : {"$divide" : [ {"$divide" : ["$totalDamageDealtToChampions", {"$divide" : ["$gameDuration",60]}]}, 100]},# = DMGtoChampsPerMin/100
                 }
    }
])

#list(cursor)
df_players = pd.DataFrame(list(cursor))
df_players['Team'] = df_players._id.apply(lambda x: x.split()[0])

# Sortieren?
#df.sort_values(by=['_id'])

In [756]:
df_players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 174 entries, 0 to 173
Data columns (total 40 columns):
Assists                          174 non-null float64
Deaths                           174 non-null float64
JungleControl                    174 non-null float64
KDA                              174 non-null float64
Kills                            174 non-null float64
Match_Count                      174 non-null int64
_id                              174 non-null object
creepsPerMinDeltas0-10           174 non-null float64
creepsPerMinDeltas10-20          174 non-null float64
damageDealtToObjectivesPerMin    174 non-null float64
damageDealtToTurretsPerMin       174 non-null float64
damageSelfMitigated              174 non-null float64
damageTakenPerMinDeltas0-10      174 non-null float64
damageTakenPerMinDeltas10-20     174 non-null float64
defenseScore                     174 non-null float64
dmgScore                         174 non-null float64
dmgToChampsPerMin                1

In [757]:
feature_list = [
      #'team_towerkills',
      #'team_inhibitorkills',
      #'team_baronkills',
      #'team_dragonkills',
      #'team_riftheraldkills',
      'KDA',
      #'Match_Count',
      #'visionScore', # -> utilityScore
      #'timeCCingOthers', # -> utilityScore #timeCCingOthers is the new stat (not yet deployed) which offe a more precise "score" of CC, as each CC has a value, hard CC value is the duration the CC as been applied, soft CC only a 1/2 of the duration and slow for 1/6 of the duration.
      #'goldEarned',
      #'totalTimeCrowdControlDealt', # old?! -> timeCCingOthers is the new stat (not yet deployed) which offe a more precise "score" of CC, as each CC has a value, hard CC value is the duration the CC as been applied, soft CC only a 1/2 of the duration and slow for 1/6 of the duration.
      'perkPrimaryStyle',  #keine Ahnung was das sein soll
      'perkSubStyle',         #keine Ahnung was das sein soll
      #'goldPerMinDeltas10-20', #-> midPerformance ?!
      #'goldPerMinDeltas0-10', #-> earlyPerformance ?!
      #'xpPerMinDeltas10-20', #-> midPerformance ?!
      #'xpPerMinDeltas0-10', #-> earlyPerformance ?!
      #'creepsPerMinDeltas10-20', #-> midPerformance ?!
      #'creepsPerMinDeltas0-10', #-> earlyPerformance ?!
      #'damageTakenPerMinDeltas10-20', #-> midPerformance ?!
      #'damageTakenPerMinDeltas0-10', #-> earlyPerformance ?!
      #'win_ratio',
      #'totalHeal', # -> DefScore
      #'damageSelfMitigated', # -> DefScore
      #'totalDamageTaken', # -> DefScore
      #'gameDuration',    # zur Berechnung anderer Werte sinnvoll
      'goldPerMin',
      'dmgToChampsPerMin',
      'damageDealtToObjectivesPerMin',
      'damageDealtToTurretsPerMin',
      'defenseScore',
      'utilityScore',
      #'pushingScore',
      'JungleControl',
      #'dmgScore',                 
]

In [758]:
teams_wavg = df_players.groupby('Team').apply(lambda x: np.average(x[feature_list], weights=x["Match_Count"], axis=0))
df_teams = pd.DataFrame(columns=feature_list, index=teams_wavg.index)

for team in teams_wavg.index:
    df_teams.at[team] = teams_wavg[team]

In [759]:
df_teams

Unnamed: 0_level_0,KDA,perkPrimaryStyle,perkSubStyle,goldPerMin,dmgToChampsPerMin,damageDealtToObjectivesPerMin,damageDealtToTurretsPerMin,defenseScore,utilityScore,JungleControl
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
100,3.88641,8211.25,8250.0,358.048,375.215,286.303,85.3168,1.72649,3.9696,0.778916
AFs,4.15092,8211.52,8260.0,383.608,364.501,307.668,98.0236,1.74588,4.36908,0.994305
C9,4.13716,8207.5,8205.0,380.737,436.941,339.164,107.824,2.04243,3.8625,0.999047
CG,4.16326,8228.75,8228.75,365.892,370.791,310.399,88.5181,1.89459,4.21205,0.835942
CLG,3.59121,8231.25,8253.75,354.255,450.817,245.451,72.3221,1.99143,4.50475,0.826128
FLY,2.6669,8231.25,8245.0,348.982,386.985,233.086,60.4721,1.82312,4.26186,0.879383
FNC,4.82607,8211.25,8261.25,380.06,413.375,336.007,116.474,1.74564,3.68698,0.946177
FOX,4.13979,8240.0,8210.0,379.735,449.404,345.112,96.0134,1.91506,4.22649,0.958589
G2,4.28812,8228.24,8256.47,370.058,445.316,320.105,103.604,1.80175,4.76069,0.8719
GGS,2.67887,8192.5,8250.0,345.946,384.321,225.3,58.344,1.96489,4.10276,0.837123


In [760]:
#sns.pairplot(df_teams, diag_kind="kde")

In [761]:
column_list = [ 
    '_id',
    'win_team1',
    'player_team1',
    'player_team2'
]

cursor_games = games.aggregate([
    {"$project" : {
        "_id" : "$gameId",
        "player_team1": {"$arrayElemAt" : ["$participantIdentities", 0]},
        "player_team2": {"$arrayElemAt" : ["$participantIdentities", 5]},
        "win_team1" : {"$arrayElemAt" : ["$teams", 0]}
    }},
    {"$project" : {
        "_id" : "$_id",
        "win_team1" : "$win_team1.win",
        "player_team1": "$player_team1.player.summonerName",
        "player_team2": "$player_team2.player.summonerName"
    }}
])

t1_flist = list(map(lambda x: "team1_" + str(x), df_teams.columns))
t2_flist = list(map(lambda x: "team2_" + str(x), df_teams.columns))
column_list = column_list + t1_flist + t2_flist

df_games = pd.DataFrame(list(cursor_games), columns=column_list)

df_games.index = df_games._id
df_games["win_team1"].replace({"Win" : True, "Fail" : False}, inplace=True)
df_games["team1"] = df_games.player_team1.apply(lambda x: x.split()[0])
df_games["team2"] = df_games.player_team2.apply(lambda x: x.split()[0])
df_games.drop(["player_team1", "player_team2", '_id'], axis=1, inplace=True)


#Die Teamstats entsprechend eintragen
for index, game in df_games.iterrows():
    df_games.at[index] = [
        game['win_team1']] + \
        list(df_teams.loc[game['team1']]) + \
        list(df_teams.loc[game['team2']]) + \
        [game['team1']] + \
        [game['team2']]

In [762]:
def modelfit(alg, dtrain, predictors, useTrainCV=True, cv_folds=3, early_stopping_rounds=50, verbose=False, metr="auc"):
    
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain['win_team1'].values)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics=metr, early_stopping_rounds=early_stopping_rounds,verbose_eval=verbose)
        
        alg.set_params(n_estimators=cvresult.shape[0])
        print("best fit for n_estimators: ",cvresult.shape[0])
    
    #Fit the algorithm on the data
    alg.fit(dtrain[predictors], dtrain['win_team1'],eval_metric='auc')
        
    #Predict training set:
    dtrain_predictions = alg.predict(dtrain[predictors])
    dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]
    #dtrain_predprob2 = alg.predict_proba(dtrain[predictors])[:,0]
    
    predStats = metrics.accuracy_score(dtrain['win_team1'].values, dtrain_predictions), metrics.roc_auc_score(dtrain['win_team1'], dtrain_predprob)
    
    fileName = open('matchInfo_Week{}{}{}{}'.format(curWeek, '_', curLR, '.txt'), 'w')
    
    for item in predStats:
        fileName.write("%s\n" % item)
        
    fileName.close()
        
    #Print model report:
    #print( "\nModel Report")
    #print( "Accuracy : %.4g" % metrics.accuracy_score(dtrain['win_team1'].values, dtrain_predictions))
    #print( "AUC Score (Train): %f" % metrics.roc_auc_score(dtrain['win_team1'], dtrain_predprob))
                    
    #feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    #feat_imp.plot(kind='bar', title='Feature Importances')
    #plt.ylabel('Feature Importance Score')
    
    #cvresult.shape[0]   <<< number of fitted trees
    return 

    

In [763]:
predictors = t1_flist + t2_flist

In [764]:
df_games.drop("win_team1", axis=1)

Unnamed: 0_level_0,team1_KDA,team1_perkPrimaryStyle,team1_perkSubStyle,team1_goldPerMin,team1_dmgToChampsPerMin,team1_damageDealtToObjectivesPerMin,team1_damageDealtToTurretsPerMin,team1_defenseScore,team1_utilityScore,team1_JungleControl,...,team2_perkSubStyle,team2_goldPerMin,team2_dmgToChampsPerMin,team2_damageDealtToObjectivesPerMin,team2_damageDealtToTurretsPerMin,team2_defenseScore,team2_utilityScore,team2_JungleControl,team1,team2
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1002440062,3.371135,8215.000000,8233.750000,370.681261,341.568630,303.077430,102.864606,2.175912,3.544418,0.929535,...,8256.250000,375.041799,432.702899,310.353242,90.985043,1.798518,3.531775,0.905440,TL,TSM
1002440106,3.591210,8231.250000,8253.750000,354.254920,450.817315,245.450904,72.322055,1.991428,4.504755,0.826128,...,8205.000000,380.736872,436.941434,339.163930,107.824469,2.042431,3.862504,0.999047,CLG,C9
1002440095,4.139790,8240.000000,8210.000000,379.735237,449.403780,345.112138,96.013440,1.915062,4.226487,0.958589,...,8245.000000,348.981639,386.985058,233.085565,60.472126,1.823124,4.261858,0.879383,FOX,FLY
1002440084,4.163263,8228.750000,8228.750000,365.891545,370.790987,310.398767,88.518117,1.894587,4.212055,0.835942,...,8250.000000,345.946412,384.320514,225.300313,58.344011,1.964886,4.102755,0.837123,CG,GGS
1002440076,3.886407,8211.250000,8250.000000,358.047789,375.215369,286.303406,85.316821,1.726486,3.969604,0.778916,...,8238.750000,340.606417,347.932597,257.013220,68.539785,1.811712,4.315569,0.815372,100,OPT
1002440127,2.494165,8212.500000,8238.750000,340.606417,347.932597,257.013220,68.539785,1.811712,4.315569,0.815372,...,8233.750000,370.681261,341.568630,303.077430,102.864606,2.175912,3.544418,0.929535,OPT,TL
1002440132,2.666905,8231.250000,8245.000000,348.981639,386.985058,233.085565,60.472126,1.823124,4.261858,0.879383,...,8256.250000,375.041799,432.702899,310.353242,90.985043,1.798518,3.531775,0.905440,FLY,TSM
1002440143,3.886407,8211.250000,8250.000000,358.047789,375.215369,286.303406,85.316821,1.726486,3.969604,0.778916,...,8253.750000,354.254920,450.817315,245.450904,72.322055,1.991428,4.504755,0.826128,100,CLG
1002440150,2.678872,8192.500000,8250.000000,345.946412,384.320514,225.300313,58.344011,1.964886,4.102755,0.837123,...,8205.000000,380.736872,436.941434,339.163930,107.824469,2.042431,3.862504,0.999047,GGS,C9
1002440161,4.139790,8240.000000,8210.000000,379.735237,449.403780,345.112138,96.013440,1.915062,4.226487,0.958589,...,8228.750000,365.891545,370.790987,310.398767,88.518117,1.894587,4.212055,0.835942,FOX,CG


In [765]:
LR = [0.15, 0.1, 0.05]

for curLR in LR:
    xgb1 = XGBClassifier(
        learning_rate = curLR,
        n_estimators=500,
        max_depth=6,
        min_child_weight=5,
        gamma=0.1,
        subsample=0.75,
        colsample_bytree=0.6,
        objective= 'binary:logistic',
        nthread=6,
        scale_pos_weight=.9,
        seed=5
        )
    #backward feature selection
    #selector = RFE(xgb1, 5, step=1)
    #selector = selector.fit(df_games.drop(["win_team1","team1","team2"], axis=1), df_games["win_team1"])
    #predictors= df_games.drop(["win_team1","team1","team2"], axis=1).columns[selector.support_]
    
    #fitting
    trees = modelfit(xgb1, df_games, predictors, metr="error")

    def getTstats(team):
        return list(df_teams.loc[team])

    matches = pd.read_csv("SpringSplit2018_Matches_Week{}{}".format(curWeek, ".txt"), delimiter=':', encoding='latin1', header=None)
    matches.replace({'SKT1' : 'SKT', 'kgm' : 'KDM', 'MFS' : 'MSF', '´CG' : 'CG'}, inplace=True)
    labels = t1_flist+t2_flist
    preds = pd.DataFrame(columns=labels, index=matches.index)


    for index, match in matches.iterrows():
        stats = pd.Series(getTstats(match[0])+getTstats(match[1]), labels)
        preds.loc[index] = stats


    preds = preds.astype(np.float)
    pred_probs_inv = pd.DataFrame(xgb1.predict_proba(preds)[:,0])
    pred_probs = pd.DataFrame(xgb1.predict_proba(preds)[:,1])

    pred_probs

    pred_probs.to_csv('matchWinPerc_Week{}{}{}{}'.format(curWeek, '_', curLR, '.txt'), index=False, float_format='%.2f', header=False)
    pred_probs_inv.to_csv('matchWinPerc_inv_Week{}{}{}{}'.format(curWeek, '_', curLR, '.txt'), index=False, float_format='%.2f', header=False)
    
    X = df_games.drop(['win_team1','team1', 'team2'], axis=1)
    y = df_games['win_team1']
    
    accs = []
    for i in range(40, 150, 10):
        sss = StratifiedShuffleSplit(n_splits=i)
        accs_i = []
        for train_index, test_index in sss.split(X, y):
            #print("TRAIN:", train_index, "TEST:", test_index)
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            xgb1.fit(X_train, y_train, eval_metric='error')
            accs_i.extend([metrics.accuracy_score(y_test.values, xgb1.predict(X_test))])

        accs.extend([np.average(accs_i)])
    
    sss_values = np.average(accs)
    print(sss_values)
    
    fileName = open('matchSSS_Week{}{}{}{}'.format(curWeek, '_', curLR, '.txt'), 'w')    
    fileName.write("%s\n" % sss_values)        
    fileName.close()
    

best fit for n_estimators:  29
0.623513145104
best fit for n_estimators:  73
0.619436182122
best fit for n_estimators:  15
0.633161328265


In [766]:
np.__version__

'1.12.1'

In [768]:
team_data = {
    'year': 2018,
    'season': [
        {   'league' : 'LCK',
            'teams': [
            {   'team' : 'BBQ',
                'players' : [
                'BBQ Crazy',
                'BBQ Trick',
                'BBQ Tempt',
                'BBQ Ghost',
                'BBQ IgNar',
                'BBQ Bono',
                'BBQ Aries'                    
                ]},
            {   'team' : 'MVP',
                'players': [
                'MVP ADD',
                'MVP Yondu',
                'MVP Ian',
                'MVP MaHa',
                'MVP Max',
                'MVP Pilot',
                'MVP Motive'
                ]},
            {   'team' : 'SKT',
                'players': [
                'SKT Untara',
                'SKT Blank',
                'SKT Faker',
                'SKT Bang',
                'SKT Wolf',
                'SKT Pirean',
                'SKT Thal',
                'SKT Effort',
                'SKT Blossom',
                'SKT Leo'
                ]},
            {   'team' : 'KT',
                'players': [
                'KT Smeb',
                'KT Score',
                'KT Ucal',
                'KT Deft',
                'KT Mata',
                'KT Rush'
                ]},
            {   'team' : 'AFS',
                'players': [
                'AFS Kiin',
                'AFS Spirit',
                'AFS Kuro',
                'AFS Kramer',
                'AFS TusiN',
                'AFS Mowgli',
                'AFS Summit',
                'AFS Ruby',
                'AFS Aiming',
                'AFS Jelly'
                ]},
            {   'team' : 'GRF',
                'players': [
                'GRF Sword',
                'GRF Tarzan',
                'GRF Rather',
                'GRF Viper',
                'GRF Lehends',
                'GRF Chovy'
                ]},
            {   'team' : 'HLE',
                'players': [
                'HLE Lindarang',
                'HLE SeongHwan',
                'HLE Lava',
                'HLE Sangyoon',
                'HLE Key',
                'HLE Kuzan',
                'HLE Mightybear',
                'HLE Brook',
                'HLE Clever',
                'HLE Asper'
                ]},
            {   'team' : 'GEN',
                'players': [
                'GEN CuVee',
                'GEN Ambition',
                'GEN Crown',
                'GEN Ruler',
                'GEN CoreJJ',
                'GEN Fly',
                'GEN Haru',
                'GEN Mong',
                'GEN Life'
                ]},
            {   'team' : 'JAG',
                'players': [
                'JAG SoHwan',
                'JAG Umti',
                'JAG Grace',
                'JAG Teddy',
                'JAG Nova',
                'JAG KaKAO',
                'JAG Justice'
                ]},
            {   'team' : 'KZ',
                'players': [
                'KZ Khan',
                'KZ Peanut',
                'KZ Bdd',
                'KZ PraY',
                'KZ GorillA',
                'KZ Cuzz'
                ]},
            ]},
        {   'league' : 'NA LCS',
            'teams': [
            {   'team' : '100',
                'players' : [
                '100 Ssumday',
                '100 Meteos',
                '100 Ryu',
                '100 Cody Sun',
                '100 Aphromoo',
                '100 Stunt',
                '100 Rikara',
                '100 Brandini',
                '100 Levi',
                '100 Linsanity'
                ]},
            {   'team' : 'TL',
                'players': [
                'TL Impact',
                'TL Xmithie',
                'TL Pobelter',
                'TL Doublelift',
                'TL Olleh',
                'TL Hard',
                'TL Joey',
                'TL Insanity',
                'TL V1PER',
                'TL Shoryu'
                ]},
            {   'team' : 'TSM',
                'players': [
                'TSM Hauntzer',
                'TSM MikeYeung',
                'TSM Bjergsen',
                'TSM Zven',
                'TSM Mithy',
                'TSM MrRallez',
                'TSM Grig',
                'TSM Ablazeolive',
                'TSM Shady',
                'TSM PieCakeLord'
                ]},
            {   'team' : 'CLG',
                'players': [
                'CLG Darshan',
                'CLG Reignover',
                'CLG Huhi',
                'CLG Stixxay',
                'CLG Biofrost',
                'CLG Fallenbandit',
                'CLG Tuesday',
                'CLG Fill',
                'CLG Wiggily',
                'CLG Auto'
                ]},
            {   'team' : 'CG',
                'players': [
                'CG Solo',
                'CG Lira',
                'CG Febiven',
                'CG Apollo',
                'CG Hakuho',
                'CG Piglet',
                'CG Moon',
                'CG Maxtrobo',
                'CG Vulcan',
                'CG Sun'
                ]},
            {   'team' : 'C9',
                'players': [
                'C9 Licorice',
                'C9 Svenskeren',
                'C9 Jensen',
                'C9 Sneaky',
                'C9 Smoothie',
                'C9 Goldenglue',
                'C9 Keith',
                'C9 Zeyzal',
                'C9 Blaber',
                'C9 Shiro'
                ]},
            {   'team' : 'FLY',
                'players': [
                'FLY Flame',
                'FLY Santorin',
                'FLY Keane',
                'FLY WildTurtle',
                'FLY Kwon',
                'FLY PekinWoof',
                'FLY AnDa',
                'FLY Ngo',
                'FLY Erry',
                'FLY JayJ'
                ]},
            {   'team' : 'FOX',
                'players': [
                'FOX Huni',
                'FOX Dardoch',
                'FOX Fenix',
                'FOX Altec',
                'FOX Adrian',
                'FOX Yusui',
                'FOX OddOrange',
                'FOX Feng',
                'FOX Damonte',
                'FOX Lost',
                'FOX NintendudeX'
                ]},
            {   'team' : 'GGS',
                'players': [
                'GGS Lourlo',
                'GGS Contractz',
                'GGS Mickey',
                'GGS Deftly',
                'GGS Matt',
                'GGS Sheep',
                'GGS Benji',
                'GGS Zig',
                'GGS bobqin',
                'GGS Potluck'
                ]},
            {   'team' : 'OPT',
                'players': [
                'OPT Dhokla',
                'OPT Akaadian',
                'OPT PowerOfEvil',
                'OPT Arrow',
                'OPT Big',
                'OPT Gate',
                'OPT Allorim',
                'OPT Eclipse',
                'OPT Fanatiik',
                'OPT Palafox'
                ]},
            ]},
        {   'league' : 'EU LCS',
            'teams': [
            {   'team' : 'VIT',
                'players' : [
                'VIT Cabochard',
                'VIT Gilius',
                'VIT Jizuke',
                'VIT Attila',
                'VIT Jactroll',
                'VIT Shemek',
                'VIT Nji'
                ]},
            {   'team' : 'SPY',
                'players': [
                'SPY Odoamne',
                'SPY Xerxe',
                'SPY Nisqy',
                'SPY Kobbe',
                'SPY kaSing',
                'SPY Kronos'
                ]},
            {   'team' : 'GIA',
                'players': [
                'GIA Ruin',
                'GIA Djok',
                'GIA Betsy',
                'GIA Steeelback',
                'GIA SirNukesAlot',
                'GIA Targamas',
                'GIA Th3Antonio'
                ]},
            {   'team' : 'G2K',
                'players': [
                'H2K Smittyj',
                'H2K Shook',
                'H2K Selfie',
                'H2K Sheriff',
                'H2K Promisq',
                'H2K Candyfloss'
                ]},
            {   'team' : 'UOL',
                'players': [
                'UOL WhiteKnight',
                'UOL Kold',
                'UOL Exileh',
                'UOL Samux',
                'UOL Totoro',
                'UOL AudreyLsSapa',
                'UOL Neon'
                ]},
            {   'team' : 'S04',
                'players': [
                'S04 Vizicsacsi',
                'S04 Amazin',
                'S04 Nukeduck',
                'S04 Upset',
                'S04 Vander',
                'S04 Boris',
                'S04 Pride'
                ]},
            {   'team' : 'G2',
                'players': [
                'G2 Wunder',
                'G2 Jankos',
                'G2 Perkz',
                'G2 Hjarnan',
                'G2 Wadid',
                'G2 Sacre'
                ]},
            {   'team' : 'ROC',
                'players': [
                'ROC Profit',
                'ROC Memento',
                'ROC Blanc',
                'ROC HeaQ',
                'ROC Norskeren',
                'ROC EdinPriqtel'
                ]},
            {   'team' : 'FNC',
                'players': [
                'FNC sOAZ',
                'FNC Broxah',
                'FNC Caps',
                'FNC Rekkles',
                'FNC Hylissang',
                'FNC Bwipo'
                ]},
            {   'team' : 'MSF',
                'players': [
                'MSF Alphari',
                'MSF Maxlore',
                'MSF Sencux',
                'MSF Hans Sama',
                'MSF Mikyx',
                'MSF H1iva',
                'MSF Jesiz',
                'MSF Special',
                'MSF Obsess'
                ]}
            ]}
        ]}

In [769]:
#bei jedem Player den vorangestellten Taem-Tag entfernen

for i_l, league in enumerate(team_data['season']):
    print(league['league'])
    for i_t, team in enumerate(league['teams']):
        print(league['league'] + ' - ' + team['team'])
        for i_p, player in enumerate(team['players']):
            split = player.split()
            player = player.split()[len(split)-1] #damit es auch bei mehrmaligen Aufruf funktioniert
            print(league['league'] + ' - ' + team['team'] + ' - ' + player)
            team_data['season'][i_l]['teams'][i_t]['players'][i_p] = player

LCK
LCK - BBQ
LCK - BBQ - Crazy
LCK - BBQ - Trick
LCK - BBQ - Tempt
LCK - BBQ - Ghost
LCK - BBQ - IgNar
LCK - BBQ - Bono
LCK - BBQ - Aries
LCK - MVP
LCK - MVP - ADD
LCK - MVP - Yondu
LCK - MVP - Ian
LCK - MVP - MaHa
LCK - MVP - Max
LCK - MVP - Pilot
LCK - MVP - Motive
LCK - SKT
LCK - SKT - Untara
LCK - SKT - Blank
LCK - SKT - Faker
LCK - SKT - Bang
LCK - SKT - Wolf
LCK - SKT - Pirean
LCK - SKT - Thal
LCK - SKT - Effort
LCK - SKT - Blossom
LCK - SKT - Leo
LCK - KT
LCK - KT - Smeb
LCK - KT - Score
LCK - KT - Ucal
LCK - KT - Deft
LCK - KT - Mata
LCK - KT - Rush
LCK - AFS
LCK - AFS - Kiin
LCK - AFS - Spirit
LCK - AFS - Kuro
LCK - AFS - Kramer
LCK - AFS - TusiN
LCK - AFS - Mowgli
LCK - AFS - Summit
LCK - AFS - Ruby
LCK - AFS - Aiming
LCK - AFS - Jelly
LCK - GRF
LCK - GRF - Sword
LCK - GRF - Tarzan
LCK - GRF - Rather
LCK - GRF - Viper
LCK - GRF - Lehends
LCK - GRF - Chovy
LCK - HLE
LCK - HLE - Lindarang
LCK - HLE - SeongHwan
LCK - HLE - Lava
LCK - HLE - Sangyoon
LCK - HLE - Key
LCK - HLE - K

In [770]:
def find_team(p):
    for i_l, league in enumerate(team_data['season']):
        #print(league['league'])
        for i_t, team in enumerate(league['teams']):
            #print(league['league'] + ' - ' + team['team'])
            for i_p, player in enumerate(team['players']):
                split = player.split()
                player = player.split()[len(split)-1] #damit es auch bei mehrmaligen Aufruf funktioniert
                if p == player:
                    return(team['team'])
                #print(league['league'] + ' - ' + team['team'] + ' - ' + player)
                #team_data['season'][i_l]['teams'][i_t]['players'][i_p] = player

In [771]:
df_players_s2 = df_players.copy()
df_players_s2['_id'] = df_players_s2._id.apply(lambda x: x.split()[1])
df_players_s2['Team_S2'] = df_players_s2._id.apply(lambda x: find_team(x))

In [772]:
#Wieviele Spieler haben zwischen den Seasons das Team gewechselt?
sum(df_players_s2['Team'] != df_players_s2['Team_S2'])

60

In [773]:
def get_all_players():
    result = []
    for i_l, league in enumerate(team_data['season']):
        #print(league['league'])
        for i_t, team in enumerate(league['teams']):
            #print(league['league'] + ' - ' + team['team'])
            for i_p, player in enumerate(team['players']):
                split = player.split()
                player = player.split()[len(split)-1] #damit es auch bei mehrmaligen Aufruf funktioniert
                result.append(player)
    return result

def find_new_players(players):
    result = []
    for i_l, league in enumerate(team_data['season']):
        #print(league['league'])
        for i_t, team in enumerate(league['teams']):
            #print(league['league'] + ' - ' + team['team'])
            for i_p, player in enumerate(team['players']):
                split = player.split()
                player = player.split()[len(split)-1] #damit es auch bei mehrmaligen Aufruf funktioniert
                if player not in players:
                    result.append([player,team['team']])
                #print(league['league'] + ' - ' + team['team'] + ' - ' + player)
                #team_data['season'][i_l]['teams'][i_t]['players'][i_p] = player
    return result

def find_new_teams(teams):
    result = []
    for i_l, league in enumerate(team_data['season']):
        #print(league['league'])
        for i_t, team in enumerate(league['teams']):
            #print(league['league'] + ' - ' + team['team'])
            if team['team'] not in teams:
                result.extend(team['team'])
    return result

In [774]:
#wieviele neue Spieler?
new_players = find_new_players(df_players_s2._id.values)
print('# neue Spieler: ', len(new_players))

#wieviele neue Teams?
new_teams = find_new_teams(list(set(df_players_s2.Team.values)))
print('# neue Teams: ', len(new_teams))

# neue Spieler:  100
# neue Teams:  18


In [775]:
new_players

[['Aries', 'BBQ'],
 ['Yondu', 'MVP'],
 ['Motive', 'MVP'],
 ['Pirean', 'SKT'],
 ['Leo', 'SKT'],
 ['Rush', 'KT'],
 ['Summit', 'AFS'],
 ['Ruby', 'AFS'],
 ['Jelly', 'AFS'],
 ['Sword', 'GRF'],
 ['Tarzan', 'GRF'],
 ['Rather', 'GRF'],
 ['Viper', 'GRF'],
 ['Lehends', 'GRF'],
 ['Chovy', 'GRF'],
 ['Brook', 'HLE'],
 ['Clever', 'HLE'],
 ['Asper', 'HLE'],
 ['Mong', 'GEN'],
 ['Life', 'GEN'],
 ['Umti', 'JAG'],
 ['Nova', 'JAG'],
 ['KaKAO', 'JAG'],
 ['Sun', '100'],
 ['Aphromoo', '100'],
 ['Rikara', '100'],
 ['Brandini', '100'],
 ['Levi', '100'],
 ['Linsanity', '100'],
 ['Hard', 'TL'],
 ['Joey', 'TL'],
 ['Insanity', 'TL'],
 ['V1PER', 'TL'],
 ['Shoryu', 'TL'],
 ['MrRallez', 'TSM'],
 ['Grig', 'TSM'],
 ['Ablazeolive', 'TSM'],
 ['Shady', 'TSM'],
 ['PieCakeLord', 'TSM'],
 ['Huhi', 'CLG'],
 ['Fallenbandit', 'CLG'],
 ['Tuesday', 'CLG'],
 ['Fill', 'CLG'],
 ['Wiggily', 'CLG'],
 ['Auto', 'CLG'],
 ['Lira', 'CG'],
 ['Piglet', 'CG'],
 ['Moon', 'CG'],
 ['Maxtrobo', 'CG'],
 ['Vulcan', 'CG'],
 ['Sun', 'CG'],
 ['Goldeng

In [776]:
for p in new_players:
    p = pd.Series({'_id':p[0], 'Team_S2':p[1]})
    df_players_s2 = df_players_s2.append(p, ignore_index = True)
    
df_players_s2.fillna(0, inplace=True)

In [777]:
#und jetzt noch alle entfernen, die nicht mehr auftauchen
df_players_s2._id.values

array(['Ucal', 'Mowgli', 'Dhokla', 'Fly', 'Grace', 'Khan', 'Santorin',
       'Teddy', 'UmTi', 'IgNar', 'Justice', 'Mightybear', 'Faker',
       'Untara', 'Mata', 'PawN', 'Effort', 'Bang', 'Smeb', 'Key', 'Kramer',
       'Secret', 'Caedrel', 'Edge', 'Roach', 'MaHa', 'Ian', 'Thal', 'PraY',
       'Cuzz', 'SoHwan', 'Bdd', 'Rascal', 'CoreJJ', 'Crown', 'Blossom',
       'Ambition', 'Pilot', 'sOAZ', 'Ruler', 'Wadid', 'Hjarnan', 'Kiin',
       'Jankos', 'CuVee', 'huhi', 'TusiN', 'Hans', 'aphromoo',
       'Doublelift', 'Cody', 'Betsy', 'Deftly', 'Shrimp', 'Tempt',
       'Febiven', 'LirA', 'Deft', 'LemonNation', 'kaSing', 'Keane',
       'Adrian', 'Contractz', 'Bono', 'Huni', 'Jiizuke', 'Caps', 'Altec',
       'Ghost', 'Seonghwan', 'Solo', 'Raise', 'Ryu', 'Xmithie', 'Lourlo',
       'Flame', 'Beyond', 'Fenix', 'JayJ', 'Pobelter', 'Sangyoon',
       'Smoothie', 'Akaadian', 'Kuzan', 'MikeYeung', 'Stixxay', 'Kuro',
       'Olleh', 'Haru', 'Matt', 'Peanut', 'Impact', 'Lava', 'Spirit',
       'Mi

In [778]:
players_s2 = get_all_players()

In [779]:
len(players_s2)

246

In [780]:
index_players2rm = []
players2rm = []
for i, p in enumerate(df_players_s2._id.values):
    if p not in players_s2:
        players2rm.append(p)
        index_players2rm.append(i)

In [781]:
#plausibilitaetscheck
if (df_players_s2.iloc[index_players2rm]['_id'].values == players2rm).all():
    df_players_s2.drop(index_players2rm, inplace=True)

print(len(df_players_s2))

247


In [782]:
df_players_s2

Unnamed: 0,Assists,Deaths,JungleControl,KDA,Kills,Match_Count,_id,creepsPerMinDeltas0-10,creepsPerMinDeltas10-20,damageDealtToObjectivesPerMin,...,totalDamageTaken,totalHeal,totalTimeCrowdControlDealt,utilityScore,visionScore,win_ratio,xpPerMinDeltas0-10,xpPerMinDeltas10-20,Team,Team_S2
0,3.375000,1.500000,0.543438,3.500000,1.875000,8.0,Ucal,9.400000,10.900000,294.876155,...,10562.000000,1361.875000,351.750000,1.361922,38.375000,0.625000,497.937500,567.075000,KT,KT
1,7.428571,1.857143,3.272293,4.846154,1.571429,7.0,Mowgli,0.200000,0.971429,442.292994,...,29024.714286,16657.000000,738.000000,5.768312,90.000000,0.714286,347.900000,455.900000,AFs,AFS
2,2.500000,2.750000,0.090498,1.090909,0.500000,4.0,Dhokla,7.925000,7.950000,143.680241,...,21590.750000,5038.250000,534.250000,2.597285,30.750000,0.250000,474.550000,458.825000,OPT,OPT
3,4.200000,2.400000,0.491576,2.500000,1.800000,10.0,Fly,8.910000,9.810000,233.982606,...,17184.800000,4382.400000,367.200000,2.312676,41.700000,0.300000,483.210000,515.360000,FLY,GEN
4,1.500000,1.500000,0.578960,2.666667,2.500000,2.0,Grace,8.550000,10.650000,386.660391,...,18116.000000,6630.500000,65.000000,1.740410,42.500000,1.000000,458.500000,532.600000,JAG,JAG
5,4.954545,1.545455,0.527003,4.882353,2.590909,22.0,Khan,7.659091,10.086364,325.592878,...,24634.954545,6172.181818,985.227273,4.487823,63.227273,0.863636,452.981818,559.563636,KZ,KZ
6,1.250000,2.000000,3.066821,0.750000,0.250000,4.0,Santorin,0.250000,1.350000,230.065662,...,26195.000000,11598.500000,516.000000,2.928158,90.250000,0.000000,304.275000,403.325000,H2K,FLY
7,3.714286,1.685714,0.833832,4.627119,4.085714,35.0,Teddy,9.182857,11.371429,451.243155,...,13037.400000,3939.628571,471.600000,2.552022,58.685714,0.428571,322.985714,478.205714,JAG,JAG
9,5.342857,1.657143,0.002111,3.482759,0.428571,35.0,IgNar,1.602857,2.691429,37.897070,...,16001.171429,6072.057143,218.942857,5.732649,115.714286,0.400000,281.877143,278.534286,bbq,BBQ
10,4.625000,1.750000,0.295095,3.857143,2.125000,8.0,Justice,8.025000,10.487500,149.346391,...,17898.125000,3490.250000,1331.000000,3.756310,61.750000,0.375000,463.912500,556.075000,JAG,JAG


In [783]:
teams_plausi = df_players_s2.groupby('Team_S2').sum()

In [784]:
teams_plausi

Unnamed: 0_level_0,Assists,Deaths,JungleControl,KDA,Kills,Match_Count,creepsPerMinDeltas0-10,creepsPerMinDeltas10-20,damageDealtToObjectivesPerMin,damageDealtToTurretsPerMin,...,team_towerkills,timeCCingOthers,totalDamageTaken,totalHeal,totalTimeCrowdControlDealt,utilityScore,visionScore,win_ratio,xpPerMinDeltas0-10,xpPerMinDeltas10-20
Team_S2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100,21.6875,8.732143,3.350078,12.603959,5.125,62.0,18.555357,23.01875,842.204752,204.618548,...,26.892857,104.633929,99040.357143,30575.705357,2997.0,19.273742,299.991071,2.232143,1594.784821,1739.778571
AFS,36.201299,12.088412,9.044138,30.12632,15.370796,165.0,37.232035,47.732451,2633.621421,751.624948,...,51.2003,146.202298,132711.585415,51732.904595,5275.738761,30.380938,582.878788,4.736264,2595.465501,3206.014169
BBQ,24.761905,12.1,7.114181,17.176881,8.952381,175.0,28.380952,36.508571,1935.142019,392.886823,...,31.971429,122.328571,146695.095238,44455.428571,3705.8,22.837563,493.814286,2.233333,2267.140476,2683.491429
C9,31.4375,11.0625,4.995236,20.685775,12.0,80.0,28.95,35.125,1695.819649,539.122345,...,40.0,101.4375,107742.9375,34483.9375,3245.6875,19.312521,333.75,3.4375,1946.7625,2283.2875
CG,17.5,5.8125,1.254574,17.54965,7.5625,64.0,27.2625,33.875,1205.077723,426.05497,...,27.5,99.5625,60649.75,17170.6875,2875.0,16.28827,236.9375,2.5,1534.46875,1851.89375
CLG,25.0625,9.25,3.603493,14.306049,8.0,64.0,18.575,23.1375,1023.746094,290.554651,...,23.5,108.8125,103342.5,38308.8125,2572.125,19.35123,279.625,1.75,1399.45,1633.51875
FLY,18.46131,12.654762,7.145629,9.961905,5.895833,42.0,18.966071,23.56756,1081.374985,174.085768,...,19.732143,137.467262,134933.511905,46883.696429,3846.797619,25.569448,370.127976,1.002976,1887.9375,2136.488393
FNC,36.276786,11.964286,5.015707,29.692826,15.196429,80.0,35.40625,44.180357,1908.03834,662.772408,...,53.357143,151.508929,126567.151786,47555.732143,3932.571429,23.278177,382.964286,5.035714,2351.094643,2885.663393
FOX,34.375,11.8125,4.792944,20.698948,13.25,80.0,28.50625,34.19375,1725.560689,480.067199,...,36.5625,125.625,117465.6875,54346.5625,4160.875,21.132434,310.4375,3.4375,1893.48125,2325.44375
G2,31.294118,9.941176,4.3595,21.440581,11.058824,85.0,28.494118,34.841176,1600.52747,518.020794,...,39.705882,130.941176,113780.941176,36056.352941,3382.352941,23.803474,380.588235,3.235294,1893.935294,2181.5


In [785]:
#GRF muss exkludiert werden, da noch keine Daten vorhanden sind
#der Filter kann entfernt werden, sobald mehr Daten vorhanden sind
teams_wavg = df_players_s2[df_players_s2['Team_S2']!='GRF'].groupby('Team_S2').apply(lambda x: np.average(x[feature_list], weights=x["Match_Count"], axis=0))
df_teams_s2 = pd.DataFrame(columns=feature_list, index=teams_wavg.index)

for team in teams_wavg.index:
    df_teams_s2.at[team] = teams_wavg[team]

In [792]:
df_teams_s2

Unnamed: 0_level_0,KDA,perkPrimaryStyle,perkSubStyle,goldPerMin,dmgToChampsPerMin,damageDealtToObjectivesPerMin,damageDealtToTurretsPerMin,defenseScore,utilityScore,JungleControl
Team_S2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
100,3.1736,8235.48,8261.29,339.513,294.98,215.18,51.847,1.72846,4.76543,0.864073
AFS,4.15092,8211.52,8260.0,383.608,364.501,307.668,98.0236,1.74588,4.36908,0.994305
BBQ,3.04795,8221.71,8252.57,359.471,350.756,297.54,74.1205,1.65042,3.98289,0.867933
C9,4.13716,8207.5,8205.0,380.737,436.941,339.164,107.824,2.04243,3.8625,0.999047
CG,4.38741,8232.81,8231.25,376.594,421.399,301.269,106.514,1.93519,4.07207,0.313643
CLG,3.57651,8228.12,8270.31,343.338,395.259,255.937,72.6387,1.76549,4.83781,0.900873
FLY,2.60057,8245.24,8247.62,341.714,328.053,241.39,40.8413,1.93994,4.54124,1.51624
FNC,4.82607,8211.25,8261.25,380.06,413.375,336.007,116.474,1.74564,3.68698,0.946177
FOX,4.13979,8240.0,8210.0,379.735,449.404,345.112,96.0134,1.91506,4.22649,0.958589
G2,4.28812,8228.24,8256.47,370.058,445.316,320.105,103.604,1.80175,4.76069,0.8719


In [793]:
def predString(t1, t2):
    st1 = df_teams_s2.loc[t1]
    st2 = df_teams_s2.loc[t1]
    return (st1, st2)

In [794]:
t1, t2 = predString('JAG', 'HLE')
t1.index = ["team1_"+x for x in list(t1.index)]
t2.index = ["team2_"+x for x in list(t2.index)]
xgb1.predict_proba(pd.DataFrame([list(t1.append(t2))],columns=t1.index.append(t2.index)))[0][1]

XGBoostError: need to call fit beforehand