In [8]:
import pandas as pd
import json
from sklearn.preprocessing import MultiLabelBinarizer
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.decomposition import PCA

In [2]:
with open('grandmasterData.json') as f:
  gdData = f.read()
gdDict = json.loads(gdData)

In [3]:
placements = []
traits = []
augments = []
units = []

for summonerID in gdDict.keys():
  for i in range(len(gdDict[summonerID])):
    for puuid, dataDict in gdDict[summonerID][i].items():
      placements.append(dataDict["placement"])
      traits.append(dataDict["traits"])
      augments.append(dataDict["augments"])
      units.append(dataDict["units"])
    # break
  # break
df = pd.DataFrame()
df["placement"] = placements
df["traits"] = traits
df["augments"] = augments
df["units"] = units

df.head()

Unnamed: 0,placement,traits,augments,units
0,4,"[Set10_Brawler, Set10_Classical, Set10_Country...","[TFT9_Augment_DravenSpoilsOfWar, TFT9_Augment_...","[TFT10_Bard, TFT10_Aphelios, TFT10_MissFortune..."
1,8,"[Set10_Brawler, Set10_Breakout, Set10_Dazzler,...","[TFT7_Augment_AFK, TFT10_Augment_Determinedinv...","[TFT10_KSante, TFT10_Senna, TFT10_Lulu, TFT10_..."
2,3,"[Set10_8Bit, Set10_Brawler, Set10_CrowdDive, S...","[TFT9_Augment_OneTwosThree, TFT9_Augment_Great...","[TFT10_MissFortune, TFT10_Sett, TFT10_Zac, TFT..."
3,6,"[Set10_CrowdDive, Set10_Edgelord, Set10_Execut...","[TFT9_Augment_CustomerIsAlwaysRight, TFT9_Augm...","[TFT10_Lillia, TFT10_Gnar, TFT10_Kayle, TFT10_..."
4,7,"[Set10_Deadeye, Set10_Guardian, Set10_Hyperpop...","[TFT9_Augment_SilverSpoon, TFT9_Augment_Pandor...","[TFT10_Lillia, TFT10_Kennen, TFT10_Seraphine, ..."


In [4]:
mlb = MultiLabelBinarizer()
df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('traits')),
                                            columns=mlb.classes_,
                                            index=df.index).add_prefix("trait_"))

df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('units')),
                                            columns=mlb.classes_,
                                            index=df.index).add_prefix("unit_"))

df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('augments')),
                                            columns=mlb.classes_,
                                            index=df.index).add_prefix("augment_"))
df.head()

Unnamed: 0,placement,trait_Astro,trait_Battlecast,trait_Blaster,trait_Chrono,trait_Cybernetic,trait_DarkStar,trait_Demolitionist,trait_Infiltrator,trait_ManaReaver,...,augment_TFT9_Augment_TiniestTitan,augment_TFT9_Augment_TiniestTitanPlus,augment_TFT9_Augment_TonsOfStats,augment_TFT9_Augment_TwoHealthy,augment_TFT9_Augment_UnleashedArcana,augment_TFT9_Augment_WhatDoesntKillYou,augment_TFT9_Augment_WhatTheForge,augment_TFT9_Augment_YouHaveMyBow,augment_TFT9_Augment_YouHaveMySword,augment_TFT9_Augment_YoungAndWildAndFree
0,4,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
def reduceFeatures(df, target, n_components):
    features = list(df.columns)
    features.remove(target)

    # n_components necessary to explain 80% of the variance
    pca = PCA(n_components=n_components)
    components = pca.fit_transform(df[features])

    PCs = []
    colNames = []
    for i in range(n_components):
        PCs.append(components[:,i])
        colNames.append(f"PC{i+1}")

    zipped = list(zip(*PCs,
                      df[target]))

    pc_df = pd.DataFrame(zipped,
                         columns=[*colNames,
                                      target])

    return pc_df

In [12]:
pc_df = reduceFeatures(df, "placement", 60)
pc_df.head()

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,...,PC52,PC53,PC54,PC55,PC56,PC57,PC58,PC59,PC60,placement
0,2.472877,-0.516134,-0.423691,1.090205,1.647069,0.461294,0.476847,0.901297,0.322718,-0.074304,...,0.192748,-0.094132,0.054595,0.232655,0.049828,0.069853,0.007567,0.006897,-0.03288,4
1,-0.617901,-1.594665,0.718298,0.053678,-0.698367,-0.3769,0.376411,-0.765911,1.074969,0.675096,...,-0.445461,-0.197357,-0.218318,0.056211,0.146372,0.206243,-0.001379,-0.147774,0.102597,8
2,2.198565,-0.209806,-0.137471,-1.152329,-0.889903,0.451187,-0.199991,-0.263528,-0.18749,0.905182,...,-0.106124,-0.013921,0.112021,0.197203,0.139066,0.012511,-0.039369,-0.077501,0.056628,3
3,-1.219602,2.168358,-0.343408,-0.250049,0.082409,-1.092121,-0.005953,0.251811,0.044836,-0.370573,...,-0.038095,-0.321158,0.070267,0.291201,0.088896,-0.043456,-0.198539,0.115954,0.062575,6
4,-2.155069,-1.364458,0.006958,-0.529021,0.315305,0.163541,-1.04847,0.568991,-0.675832,-0.149855,...,0.1243,-0.291743,0.067682,-0.210613,-0.206132,-0.047861,-0.243655,-0.149227,0.301789,7


In [None]:
#TODO: Kmeans on pc_df