In [1]:
# requirements
from pymongo import MongoClient

import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier

from sklearn.decomposition import PCA

import joblib

import tqdm
import heapq
import json

In [2]:
# MongoDB 연결
client = MongoClient("mongodb://localhost:3999/")

In [3]:
# DB 불러오기
matchup_statistics_db = client["matchup_statistics_db"]  # summoners, indicators, matches

In [49]:
division = {"I": 1, "II": 2, "III": 3, "IV": 4}

users = []  # 다이아몬드
my_rank = 1

# or (summoner["league"]["tier"] == "DIAMOND" and summoner["league"]["rank"] == "IV")

for summoner in matchup_statistics_db["summoners"].find():
    if (summoner["league"]["tier"] == "DIAMOND" and division[summoner["league"]["rank"]] <= 3):
        users.append(summoner["_id"])

print(len(users))
print(users[0])

3148
t3C2JzNt9v8ypQ3PtL7v4c6VNvn-QdDDnm8-Pcnllyd65xPT


In [50]:
# 학습 데이터 추출

# collection 불러오기
indicators = matchup_statistics_db["indicators"].find({"_id": {"$in": users}})

# 학습 데이터
x = []
y = []

summoners = []

times = 0
all_zero = 0


# 결측치 처리 - 추후에는 평균 값으로 입력
def imputation(datas, indicator, record, record_detail):
    global times

    if datas[indicator][record].get(record_detail):
        return datas[indicator][record].get(record_detail)
    else:
        times += 1
        return 0

# 데이터 추출
for indicator in indicators:
    for match_indicator in indicator["matchIndicators"]:
        summoner = {}

        summoner["csDiffer"] = match_indicator["laneIndicator"]["basicWeight"]["csDiffer"]
        summoner["expDiffer"] = match_indicator["laneIndicator"]["basicWeight"]["expDiffer"]
        summoner["turretPlateDestroyDiffer"] = match_indicator["laneIndicator"]["basicWeight"]["turretPlateDestroyDiffer"]
        summoner["dealDiffer"] = imputation(match_indicator, "laneIndicator", "aggresiveLaneAbilility", "dealDiffer")

        summoner["turretKillsPerDeaths"] = match_indicator["macroIndicator"]["splitPoint"]["turretKillsPerDeaths"]
        summoner["damageDealtToTurretsPerTeamTotalTowerDamageDone"] = match_indicator["macroIndicator"]["splitPoint"]["damageDealtToTurretsPerTeamTotalTowerDamageDone"]

        summoner["totalTimeCCingOthersPerTotalDamageTaken"] = match_indicator["macroIndicator"]["initiatingPoint"]["totalTimeCCingOthersPerTotalDamageTaken"]
        summoner["totalDamageTakenPerTeamTotalDamageTaken"] = match_indicator["macroIndicator"]["initiatingPoint"]["totalDamageTakenPerTeamTotalDamageTaken"]
        summoner["damageSelfMitigatedPerTotalDamageTaken"] = match_indicator["macroIndicator"]["initiatingPoint"]["damageSelfMitigatedPerTotalDamageTaken"]

        summoner["visionScorePerDeath"] = match_indicator["macroIndicator"]["visionPoint"]["visionScorePerDeath"]
        summoner["totalJungleObjectivePerGameDuration"] = match_indicator["macroIndicator"]["jungleHoldPoint"]["totalJungleObjectivePerGameDuration"]

        summoner["getObjectiveDifferPerGameDuration"] = match_indicator["macroIndicator"]["objectivePoint"]["getObjectiveDifferPerGameDuration"]

        summoner["damagePerMinute"] = match_indicator["macroIndicator"]["totalDealPoint"]["damagePerMinute"]
        summoner["dealPerGold"] = imputation(match_indicator, "macroIndicator", "totalDealPoint", "dealPerGold")
        summoner["teamDamagePercentage"] = imputation(match_indicator, "macroIndicator", "totalDealPoint", "teamDamagePercentage")

        if match_indicator["metadata"]["laneInfo"].get("teamPosition") == "TOP":
            pass


        is_all_zero = True
        idx = 0

        for key, value in summoner.items():
            if value != 0:
                is_all_zero = False
                break 
                
        if is_all_zero is False:

            if match_indicator["metadata"]["isWin"] is True:
                y.append(1)
            else:
                y.append(0)

            summoners.append(summoner)

In [51]:
df = pd.DataFrame(summoners)
df

Unnamed: 0,csDiffer,expDiffer,turretPlateDestroyDiffer,dealDiffer,turretKillsPerDeaths,damageDealtToTurretsPerTeamTotalTowerDamageDone,totalTimeCCingOthersPerTotalDamageTaken,totalDamageTakenPerTeamTotalDamageTaken,damageSelfMitigatedPerTotalDamageTaken,visionScorePerDeath,totalJungleObjectivePerGameDuration,getObjectiveDifferPerGameDuration,damagePerMinute,dealPerGold,teamDamagePercentage
0,23,-12,4.666667,564,0,8333,43,28732,45013,275000,1297,-648,31400000,98412,0.171972
1,13,1322,-2.666667,3197,0,3628,43,32223,50395,2000000,0,316,40700000,89925,0.161630
2,-3,161,0.000000,201,16666,36225,28,30416,57116,216666,1819,-629,55500000,127241,0.169418
3,-5,525,-1.666667,50,0,11579,34,26494,64003,528571,1663,231,31200000,81445,0.085888
4,10,-481,-2.666667,-3576,0,0,38,38323,49794,280000,631,105,39000000,91781,0.152747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61769,-12,-1682,-1.000000,-1278,0,11019,4,16074,44438,163636,0,327,61400000,183032,0.179579
61770,6,-239,-0.666667,-935,0,23810,54,26551,102474,1233333,760,597,63100000,137400,0.143167
61771,-4,954,2.333333,4344,0,0,48,35103,90953,442857,982,-327,83000000,166037,0.248919
61772,7,-639,3.000000,-2164,0,0,38,26177,81954,444444,487,-389,46600000,128632,0.129182


In [52]:
scaler = StandardScaler()
scaler.fit(df)

joblib.dump(scaler, '../statistics/scalers/diamond_1_scaler.joblib')

['../statistics/scalers/diamond_1_scaler.joblib']