## League of Legends Ranked Game Analysis
This is the notebook of the League of Legends ranked game analysis by using pyspark. In this notebook, we are going to use the League of Legends dataset from https://www.kaggle.com/datasnaek/league-of-legends which created by Mitchell J. This is a collection of over 50,000 ranked EUW games from the game League of Legends, as well as json files containing a way to convert between champion and summoner spell IDs and their names.

In [0]:
#import necessary libs
import numpy as np

#general spark modules
from pyspark.sql import Row
from pyspark.sql.functions import asc, col, lit, when
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType, TimestampType, ArrayType

#scikit learn modules
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

#spark ml modules

# spark ml classifier
from pyspark.ml.classification import MultilayerPerceptronClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

In [0]:
#read csv file
game_df = (sqlContext.read.format("csv").
  option("header", "true").
  option("nullValue", "NA").
  option("inferSchema", True).
  load("/FileStore/tables/Project1/games.csv"))

In [0]:
#drop empty columns
game_df = game_df.dropna()

#display dataframe
game_df.display()

gameId,creationTime,gameDuration,seasonId,winner,firstBlood,firstTower,firstInhibitor,firstBaron,firstDragon,firstRiftHerald,t1_champ1id,t1_champ1_sum1,t1_champ1_sum2,t1_champ2id,t1_champ2_sum1,t1_champ2_sum2,t1_champ3id,t1_champ3_sum1,t1_champ3_sum2,t1_champ4id,t1_champ4_sum1,t1_champ4_sum2,t1_champ5id,t1_champ5_sum1,t1_champ5_sum2,t1_towerKills,t1_inhibitorKills,t1_baronKills,t1_dragonKills,t1_riftHeraldKills,t1_ban1,t1_ban2,t1_ban3,t1_ban4,t1_ban5,t2_champ1id,t2_champ1_sum1,t2_champ1_sum2,t2_champ2id,t2_champ2_sum1,t2_champ2_sum2,t2_champ3id,t2_champ3_sum1,t2_champ3_sum2,t2_champ4id,t2_champ4_sum1,t2_champ4_sum2,t2_champ5id,t2_champ5_sum1,t2_champ5_sum2,t2_towerKills,t2_inhibitorKills,t2_baronKills,t2_dragonKills,t2_riftHeraldKills,t2_ban1,t2_ban2,t2_ban3,t2_ban4,t2_ban5
3326086514,1504279457970,1949,9,1,2,1,1,1,1,2,8,12,4,432,3,4,96,4,7,11,11,6,112,4,14,11,1,2,3,0,92,40,69,119,141,104,11,4,498,4,7,122,6,4,238,14,4,412,4,3,5,0,0,1,1,114,67,43,16,51
3229566029,1497848803862,1851,9,1,1,1,1,0,1,1,119,7,4,39,12,4,76,4,3,10,4,14,35,4,11,10,4,0,2,1,51,122,17,498,19,54,4,12,25,4,14,120,11,4,157,4,14,92,4,7,2,0,0,0,0,11,67,238,51,420
3327363504,1504360103310,1493,9,1,2,1,1,1,2,0,18,4,7,141,11,4,267,3,4,68,4,12,38,12,4,8,1,1,1,0,117,40,29,16,53,69,4,7,412,14,4,126,4,12,24,4,11,22,7,4,2,0,0,1,0,157,238,121,57,28
3326856598,1504348503996,1758,9,1,1,1,1,1,1,0,57,4,12,63,4,14,29,4,7,61,4,1,36,11,4,9,2,1,2,0,238,67,516,114,31,90,14,4,19,11,4,412,4,3,92,4,14,22,4,7,0,0,0,0,0,164,18,141,40,51
3330080762,1504554410899,2094,9,1,2,1,1,1,1,0,19,4,12,29,11,4,40,4,3,119,4,7,134,7,4,9,2,1,3,0,90,64,412,25,31,37,3,4,59,4,12,141,11,4,38,4,12,51,4,7,3,0,0,1,0,86,11,201,122,18
3287435705,1501667992132,2059,9,1,2,2,1,1,2,0,40,3,4,141,11,4,24,12,4,45,3,4,67,4,7,8,1,1,1,0,117,6,238,122,105,92,4,12,15,4,7,245,12,4,2,4,11,12,4,14,6,0,0,3,0,119,134,154,63,31
3314215542,1503430065724,1993,9,1,1,2,1,1,1,1,74,3,4,17,4,12,412,3,4,18,4,7,57,4,11,10,2,1,2,1,157,11,141,32,35,53,4,14,21,4,7,114,12,4,161,21,4,19,11,4,2,0,0,0,0,75,42,31,40,429
3329224025,1504472363684,1334,9,1,1,1,0,0,2,1,150,12,4,498,7,4,16,4,3,90,4,14,79,11,4,6,0,0,0,1,117,19,157,29,18,112,4,7,64,11,4,497,14,4,67,4,7,57,4,12,0,0,0,2,0,157,38,122,40,238
3318040883,1503686577549,1387,9,2,2,2,2,0,2,2,111,12,4,57,4,11,18,7,4,267,4,3,161,3,4,0,0,0,0,0,154,117,31,126,157,29,7,4,59,4,12,99,21,4,141,4,11,63,14,4,8,1,0,2,1,28,51,53,157,40
3327786881,1504385918871,2681,9,2,2,2,2,2,2,0,427,4,3,11,11,4,75,12,4,429,7,4,131,4,14,10,1,0,2,0,31,238,141,17,119,35,4,11,110,4,7,74,4,14,99,3,4,86,12,4,8,3,1,2,0,64,141,84,67,53


In [0]:
#create champions schema for JSON file
champ_schema = StructType([
      StructField("id",IntegerType(),True),
      StructField("name",StringType(),True),
      StructField("key",StringType(),True),
      StructField("title",StringType(),True),
  ])

In [0]:
#read champion_info JSON file
champions = spark.read.option("multiline","true").json('/FileStore/tables/Project1/champion_info.json')

#select 'data'
champions = champions.select('data')

#convert to pandas
champion_pd = champions.toPandas()

#to_dict to get index
champions_index = champion_pd.to_dict('index')
#load the data in dataframe
champions_jdf = spark.createDataFrame(champions_index[0]['data'], champ_schema)

#ascending orderBy 'id'
champions_jdf = champions_jdf.orderBy(asc("id"))

  Unable to convert the field data. If this column is not necessary, you may consider dropping it or converting to primitive type before the conversion.
Direct cause: Nested StructType not supported in conversion to Arrow
Attempting non-optimization as 'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to true.


In [0]:
#drop the 'key' & 'title'
champions_jdf = champions_jdf.drop('key','title')

In [0]:
#show json dataframe
champions_jdf.show()

+---+------------+
| id|        name|
+---+------------+
|  1|       Annie|
|  2|        Olaf|
|  3|       Galio|
|  4| TwistedFate|
|  5|     XinZhao|
|  6|       Urgot|
|  7|     Leblanc|
|  8|    Vladimir|
|  9|Fiddlesticks|
| 10|       Kayle|
| 11|    MasterYi|
| 12|     Alistar|
| 13|        Ryze|
| 14|        Sion|
| 15|       Sivir|
| 16|      Soraka|
| 17|       Teemo|
| 18|    Tristana|
| 19|     Warwick|
| 20|        Nunu|
+---+------------+
only showing top 20 rows



In [0]:
#select the ban champion data 
ban_df = game_df.select('gameId','t1_ban1','t1_ban2','t1_ban3','t1_ban4','t1_ban5','t2_ban1','t2_ban2','t2_ban3','t2_ban4','t2_ban5')

ban_df.show()

+----------+-------+-------+-------+-------+-------+-------+-------+-------+-------+-------+
|    gameId|t1_ban1|t1_ban2|t1_ban3|t1_ban4|t1_ban5|t2_ban1|t2_ban2|t2_ban3|t2_ban4|t2_ban5|
+----------+-------+-------+-------+-------+-------+-------+-------+-------+-------+-------+
|3326086514|     92|     40|     69|    119|    141|    114|     67|     43|     16|     51|
|3229566029|     51|    122|     17|    498|     19|     11|     67|    238|     51|    420|
|3327363504|    117|     40|     29|     16|     53|    157|    238|    121|     57|     28|
|3326856598|    238|     67|    516|    114|     31|    164|     18|    141|     40|     51|
|3330080762|     90|     64|    412|     25|     31|     86|     11|    201|    122|     18|
|3287435705|    117|      6|    238|    122|    105|    119|    134|    154|     63|     31|
|3314215542|    157|     11|    141|     32|     35|     75|     42|     31|     40|    429|
|3329224025|    117|     19|    157|     29|     18|    157|     38|  

In [0]:
#create view for champions JSON dataframe
champions_jdf.createOrReplaceTempView('champ')
#create view for ban champions dataframe
ban_df.createOrReplaceTempView('ban')

#repalce t1_ban1 by champions name
t1_ban1 = sqlContext.sql('select b.gameId, c.name as t1_ban1, b.t1_ban2, b.t1_ban3, b.t1_ban4, b.t1_ban5, b.t2_ban1, b.t2_ban2, b.t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban b where b.t1_ban1 == c.id')
t1_ban1.createOrReplaceTempView('ban1')

#repalce t1_ban2 by champions name
t1_ban2 = sqlContext.sql('select b.gameId, b.t1_ban1, c.name as t1_ban2, b.t1_ban3, b.t1_ban4, b.t1_ban5, b.t2_ban1, b.t2_ban2, b.t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban1 b where b.t1_ban2 == c.id')
t1_ban2.createOrReplaceTempView('ban2')

#repalce t1_ban3 by champions name
t1_ban3 = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, c.name as t1_ban3, b.t1_ban4, b.t1_ban5, b.t2_ban1, b.t2_ban2, b.t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban2 b where b.t1_ban3 == c.id')
t1_ban3.createOrReplaceTempView('ban3')

#repalce t1_ban4 by champions name
t1_ban4 = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, b.t1_ban3, c.name as t1_ban4, b.t1_ban5, b.t2_ban1, b.t2_ban2, b.t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban3 b where b.t1_ban4 == c.id')
t1_ban4.createOrReplaceTempView('ban4')

#repalce t1_ban5 by champions name
t1_ban5 = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, b.t1_ban3, b.t1_ban4, c.name as t1_ban5, b.t2_ban1, b.t2_ban2, b.t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban4 b where b.t1_ban5 == c.id')
t1_ban5.createOrReplaceTempView('ban5')

#repalce t2_ban1 by champions name
t2_ban1 = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, b.t1_ban3, b.t1_ban4, b.t1_ban5, c.name as t2_ban1, b.t2_ban2, b.t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban5 b where b.t2_ban1 == c.id')
t2_ban1.createOrReplaceTempView('ban6')

#repalce t2_ban2 by champions name
t2_ban2 = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, b.t1_ban3, b.t1_ban4, b.t1_ban5, b.t2_ban1, c.name as t2_ban2, b.t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban6 b where b.t2_ban2 == c.id')
t2_ban2.createOrReplaceTempView('ban7')

#repalce t2_ban3 by champions name
t2_ban3 = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, b.t1_ban3, b.t1_ban4, b.t1_ban5, b.t2_ban1, b.t2_ban2, c.name as t2_ban3, b.t2_ban4, b.t2_ban5 from champ c, ban7 b where b.t2_ban3 == c.id')
t2_ban3.createOrReplaceTempView('ban8')

#repalce t2_ban4 by champions name
t2_ban4 = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, b.t1_ban3, b.t1_ban4, b.t1_ban5, b.t2_ban1, b.t2_ban2, b.t2_ban3, c.name as t2_ban4, b.t2_ban5 from champ c, ban8 b where b.t2_ban4 == c.id')
t2_ban4.createOrReplaceTempView('ban9')

#repalce t2_ban5 by champions name
ban_champs_df = sqlContext.sql('select b.gameId, b.t1_ban1, b.t1_ban2, b.t1_ban3, b.t1_ban4, b.t1_ban5, b.t2_ban1, b.t2_ban2, b.t2_ban3, b.t2_ban4, c.name as t2_ban5 from champ c, ban9 b where b.t2_ban5 == c.id')

In [0]:
#drop 'gameId'
ban_champ_df = ban_champs_df.drop('gameId')
#show ban champions dataframe
ban_champ_df.show()

+-------+--------+----------+----------+----------+--------+--------+--------+----------+----------+
|t1_ban1| t1_ban2|   t1_ban3|   t1_ban4|   t1_ban5| t2_ban1| t2_ban2| t2_ban3|   t2_ban4|   t2_ban5|
+-------+--------+----------+----------+----------+--------+--------+--------+----------+----------+
|  Annie|  Illaoi|    Thresh|   Chogath|     Yasuo|Malzahar|    Nami|     Jax|  JarvanIV|   Caitlyn|
|  Annie|     Zac|   Chogath|     Janna|      Gnar|  Maokai|    Gnar|   Janna|     Vayne|     Brand|
|  Annie|  Darius|  Malzahar|    Maokai|     Fiora|   Janna|  Irelia|  Gragas|  Kassadin|    LeeSin|
|  Annie|   Talon|   Caitlyn|Blitzcrank|Tryndamere|   Riven|    Fizz|  Draven|Blitzcrank|  Malzahar|
|  Annie|    Jinx|Blitzcrank|    Darius|     Galio|   Yasuo|     Zed|    Fizz|     Fiora|     Leona|
|  Annie|Malzahar|  Tristana|   Chogath| Gangplank|  Khazix|     Zed|    Gnar|     Fiora|     Talon|
|  Annie|   Fiora|    Darius|Tryndamere|      Ahri| Caitlyn|     Zac|MasterYi|     Elise|  

In [0]:
#champions and objectives dataframe
champ_df = game_df.select('gameId','gameDuration','winner','firstBlood','t1_dragonKills','t1_baronKills','t1_towerKills','t1_inhibitorKills','t2_dragonKills','t2_baronKills','t2_towerKills','t2_inhibitorKills','t1_champ1id','t1_champ2id','t1_champ3id','t1_champ4id','t1_champ5id','t2_champ1id','t2_champ2id','t2_champ3id','t2_champ4id','t2_champ5id')

In [0]:
#create view for champ_df
champ_df.createOrReplaceTempView('champ_sel')

#replace t1_champ1id by name
t1_champ1 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, c.name as t1_champ1id, s.t1_champ2id, s.t1_champ3id, s.t1_champ4id, s.t1_champ5id, s.t2_champ1id, s.t2_champ2id, s.t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ_sel s where s.t1_champ1id == c.id')
t1_champ1.createOrReplaceTempView('champ1')

#replace t1_champ2id by name
t1_champ2 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, c.name as t1_champ2id, s.t1_champ3id, s.t1_champ4id, s.t1_champ5id, s.t2_champ1id, s.t2_champ2id, s.t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ1 s where s.t1_champ2id == c.id')
t1_champ2.createOrReplaceTempView('champ2')

#replace t1_champ3id by name
t1_champ3 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, c.name as t1_champ3id, s.t1_champ4id, s.t1_champ5id, s.t2_champ1id, s.t2_champ2id, s.t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ2 s where s.t1_champ3id == c.id')
t1_champ3.createOrReplaceTempView('champ3')

#replace t1_champ4id by name
t1_champ4 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, s.t1_champ3id, c.name as t1_champ4id, s.t1_champ5id, s.t2_champ1id, s.t2_champ2id, s.t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ3 s where s.t1_champ4id == c.id')
t1_champ4.createOrReplaceTempView('champ4')

#replace t1_champ5id by name
t1_champ5 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, s.t1_champ3id, s.t1_champ4id, c.name as t1_champ5id, s.t2_champ1id, s.t2_champ2id, s.t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ4 s where s.t1_champ5id == c.id')
t1_champ5.createOrReplaceTempView('champ5')

#replace t2_champ1id by name
t2_champ1 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, s.t1_champ3id, s.t1_champ4id, s.t1_champ5id, c.name as t2_champ1id, s.t2_champ2id, s.t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ5 s where s.t2_champ1id == c.id')
t2_champ1.createOrReplaceTempView('champ6')

#replace t2_champ2id by name
t2_champ2 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, s.t1_champ3id, s.t1_champ4id, s.t1_champ5id, s.t2_champ1id, c.name as t2_champ2id, s.t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ6 s where s.t2_champ2id == c.id')
t2_champ2.createOrReplaceTempView('champ7')

#replace t2_champ3id by name
t2_champ3 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, s.t1_champ3id, s.t1_champ4id, s.t1_champ5id, s.t2_champ1id, s.t2_champ2id, c.name as t2_champ3id, s.t2_champ4id, s.t2_champ5id from champ c, champ7 s where s.t2_champ3id == c.id')
t2_champ3.createOrReplaceTempView('champ8')

#replace t2_champ4id by name
t2_champ4 = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, s.t1_champ3id, s.t1_champ4id, s.t1_champ5id, s.t2_champ1id, s.t2_champ2id, s.t2_champ3id, c.name as t2_champ4id, s.t2_champ5id from champ c, champ8 s where s.t2_champ4id == c.id')
t2_champ4.createOrReplaceTempView('champ9')

#replace t2_champ5id by name
champ_sel_df = sqlContext.sql('select s.gameId, s.gameDuration, s.winner, s.firstBlood, s.t1_dragonKills, s.t1_baronKills, s.t1_towerKills, s.t1_inhibitorKills, s.t2_dragonKills, s.t2_baronKills, s.t2_towerKills, S.t2_inhibitorKills, s.t1_champ1id, s.t1_champ2id, s.t1_champ3id, s.t1_champ4id, s.t1_champ5id, s.t2_champ1id, s.t2_champ2id, s.t2_champ3id, s.t2_champ4id, c.name as t2_champ5id from champ c, champ9 s where s.t2_champ5id == c.id')

In [0]:
#display champ_sel_df
champ_sel_df.display()

gameId,gameDuration,winner,firstBlood,t1_dragonKills,t1_baronKills,t1_towerKills,t1_inhibitorKills,t2_dragonKills,t2_baronKills,t2_towerKills,t2_inhibitorKills,t1_champ1id,t1_champ2id,t1_champ3id,t1_champ4id,t1_champ5id,t2_champ1id,t2_champ2id,t2_champ3id,t2_champ4id,t2_champ5id
3328249843,2386,1,1,3,2,11,6,1,0,5,1,Annie,Yasuo,XinZhao,Jinx,Leona,Jax,Tristana,MonkeyKing,Sona,Malzahar
3329366475,1777,2,1,1,0,0,0,2,1,7,0,Annie,Brand,Shen,Kayn,Tristana,XinZhao,Orianna,Ornn,Rakan,Xayah
3327932127,2358,2,2,0,2,3,0,4,0,11,3,Annie,Tristana,TahmKench,Nidalee,Kayn,Chogath,Lucian,Kindred,Lulu,Cassiopeia
3323445837,1986,2,1,1,0,3,0,3,1,10,2,Annie,Nasus,Tristana,Morgana,Shyvana,RekSai,Galio,Mordekaiser,Twitch,Ahri
3326117500,2696,2,2,2,1,6,1,3,1,10,3,Annie,KogMaw,Garen,Bard,Evelynn,Zed,MasterYi,Jinx,Kennen,Rakan
3322933464,2325,1,2,2,1,11,3,2,0,4,0,Annie,Kayn,Ezreal,Blitzcrank,Trundle,Warwick,Maokai,Xayah,Riven,Brand
3325670666,1273,2,1,0,0,0,0,2,0,8,1,Annie,Udyr,Lulu,Fiora,Vayne,Velkoz,Poppy,Caitlyn,MasterYi,Heimerdinger
3326271651,1876,2,2,1,0,3,0,1,0,9,1,Annie,Xayah,Morgana,Olaf,Renekton,Fiora,Talon,LeeSin,Soraka,Jinx
3269218929,2459,1,2,4,1,11,3,1,0,3,0,Annie,Warwick,Malzahar,Vayne,Darius,MissFortune,Yasuo,Blitzcrank,Hecarim,Ezreal
3326136022,968,2,1,0,0,0,0,1,0,4,0,Annie,Leona,Tristana,LeeSin,Shaco,Ornn,Sivir,Vi,Taliyah,Alistar


In [0]:
a = ban_champ_df.select('t1_ban1')
#renamed it as 'ban'
a = a.withColumnRenamed('t1_ban1','ban')
b = ban_champ_df.select('t1_ban2')
c = ban_champ_df.select('t1_ban3')
d = ban_champ_df.select('t1_ban4')
e = ban_champ_df.select('t1_ban5')
f = ban_champ_df.select('t2_ban1')
g = ban_champ_df.select('t2_ban2')
h = ban_champ_df.select('t2_ban3')
i = ban_champ_df.select('t2_ban4')
j = ban_champ_df.select('t2_ban5')

#union data as 1-dimension dataframe
r = a.union(b)
re = r.union(c)
res = re.union(d)
resu = res.union(e)
resul = resu.union(f)
result = resul.union(g)
result_b = result.union(h)
result_ba = result_b.union(i)
result_ban = result_ba.union(j)

#orderBy 'ban' column
result_ban = result_ban.orderBy('ban')

#Count the data by champions name
sum_ban_result = result_ban.rdd.map(lambda x: (x,1)).reduceByKey(lambda x,y: x+y)
#collect data
sum_ban_result.collect()

Out[264]: [(Row(ban='Aatrox'), 650),
 (Row(ban='Ahri'), 4214),
 (Row(ban='Akali'), 4235),
 (Row(ban='Alistar'), 649),
 (Row(ban='Amumu'), 560),
 (Row(ban='Anivia'), 827),
 (Row(ban='Annie'), 2015),
 (Row(ban='Ashe'), 464),
 (Row(ban='AurelionSol'), 402),
 (Row(ban='Azir'), 396),
 (Row(ban='Bard'), 493),
 (Row(ban='Blitzcrank'), 17717),
 (Row(ban='Brand'), 2709),
 (Row(ban='Braum'), 1295),
 (Row(ban='Caitlyn'), 9689),
 (Row(ban='Camille'), 1642),
 (Row(ban='Cassiopeia'), 1515),
 (Row(ban='Chogath'), 23495),
 (Row(ban='Corki'), 609),
 (Row(ban='Darius'), 21669),
 (Row(ban='Diana'), 1543),
 (Row(ban='DrMundo'), 610),
 (Row(ban='Draven'), 19087),
 (Row(ban='Ekko'), 2634),
 (Row(ban='Elise'), 1149),
 (Row(ban='Evelynn'), 1347),
 (Row(ban='Ezreal'), 756),
 (Row(ban='Fiddlesticks'), 268),
 (Row(ban='Fiora'), 14027),
 (Row(ban='Fizz'), 12614),
 (Row(ban='Galio'), 1956),
 (Row(ban='Gangplank'), 775),
 (Row(ban='Garen'), 1778),
 (Row(ban='Gnar'), 1841),
 (Row(ban='Gragas'), 1634),
 (Row(ban='Gra

In [0]:
#plot 'ban' data
ban_rows = sum_ban_result.map(lambda z: Row(x=str(z[0]), y=int(z[1])))
ban_bar_df = spark.createDataFrame(ban_rows)
display(ban_bar_df)

x,y
Row(ban='Aatrox'),650
Row(ban='Ahri'),4214
Row(ban='Akali'),4235
Row(ban='Alistar'),649
Row(ban='Amumu'),560
Row(ban='Anivia'),827
Row(ban='Annie'),2015
Row(ban='Ashe'),464
Row(ban='AurelionSol'),402
Row(ban='Azir'),396


In [0]:
#drop data
champ_select_df = champ_sel_df.drop('gameId','gameDuration''firstBlood','t1_dragonKills','t1_baronKills','t1_towerKills','t1_inhibitorKills','t2_dragonKills','t2_baronKills','t2_towerKills','t2_inhibitorKills')

c1 = champ_select_df.select('t1_champ1id')
#renamed 't1_champ1id' as 'champ'
c1 = c1.withColumnRenamed('t1_champ1id','pick')
c2 = champ_select_df.select('t1_champ2id')
c3 = champ_select_df.select('t1_champ3id')
c4 = champ_select_df.select('t1_champ4id')
c5 = champ_select_df.select('t1_champ5id')
c6 = champ_select_df.select('t2_champ1id')
c7 = champ_select_df.select('t2_champ2id')
c8 = champ_select_df.select('t2_champ3id')
c9 = champ_select_df.select('t2_champ4id')
c10 = champ_select_df.select('t2_champ5id')

#union data
ch = c1.union(c2)
cha = ch.union(c3)
cham = cha.union(c4)
champ = cham.union(c5)
champ_r = champ.union(c6)
champ_re = champ_r.union(c7)
champ_res = champ_re.union(c8)
champ_resu = champ_res.union(c9)
champ_result = champ_resu.union(c10)

#orderBy 'champ'
champ_result = champ_result.orderBy('pick')

#count the champ
sum_champ_result = champ_result.rdd.map(lambda x: (x,1)).reduceByKey(lambda x,y: x+y)
#collect() retrieve data
sum_champ_result.collect()

Out[266]: [(Row(pick='Aatrox'), 753),
 (Row(pick='Ahri'), 6316),
 (Row(pick='Akali'), 2949),
 (Row(pick='Alistar'), 3476),
 (Row(pick='Amumu'), 2312),
 (Row(pick='Anivia'), 2252),
 (Row(pick='Annie'), 3277),
 (Row(pick='Ashe'), 5391),
 (Row(pick='AurelionSol'), 841),
 (Row(pick='Azir'), 1280),
 (Row(pick='Bard'), 3733),
 (Row(pick='Blitzcrank'), 6847),
 (Row(pick='Brand'), 3876),
 (Row(pick='Braum'), 3744),
 (Row(pick='Caitlyn'), 6776),
 (Row(pick='Camille'), 1886),
 (Row(pick='Cassiopeia'), 2766),
 (Row(pick='Chogath'), 4175),
 (Row(pick='Corki'), 2449),
 (Row(pick='Darius'), 3894),
 (Row(pick='Diana'), 3079),
 (Row(pick='DrMundo'), 1567),
 (Row(pick='Draven'), 5081),
 (Row(pick='Ekko'), 4866),
 (Row(pick='Elise'), 2607),
 (Row(pick='Evelynn'), 1616),
 (Row(pick='Ezreal'), 5337),
 (Row(pick='Fiddlesticks'), 1739),
 (Row(pick='Fiora'), 3078),
 (Row(pick='Fizz'), 3468),
 (Row(pick='Galio'), 2717),
 (Row(pick='Gangplank'), 4204),
 (Row(pick='Garen'), 3893),
 (Row(pick='Gnar'), 3922),
 (R

In [0]:
#plot champ data
champ_rows = sum_champ_result.map(lambda z: Row(x=str(z[0]), y=int(z[1])))
champ_bar_df = spark.createDataFrame(champ_rows)
display(champ_bar_df)

x,y
Row(pick='Aatrox'),753
Row(pick='Ahri'),6316
Row(pick='Akali'),2949
Row(pick='Alistar'),3476
Row(pick='Amumu'),2312
Row(pick='Anivia'),2252
Row(pick='Annie'),3277
Row(pick='Ashe'),5391
Row(pick='AurelionSol'),841
Row(pick='Azir'),1280


In [0]:
duration_900 = game_df.select('gameDuration').filter('gameDuration>=900').orderBy('gameDuration')
sum_duration = duration_900.rdd.map(lambda x: (x,1)).reduceByKey(lambda x,y: x+y)

sum_duration.collect()

duration_rows = sum_duration.map(lambda z: Row(x=str(z[0]), y=int(z[1])))

duration_result_df = spark.createDataFrame(duration_rows)
display(duration_result_df)

x,y
Row(gameDuration=901),1
Row(gameDuration=902),1
Row(gameDuration=903),1
Row(gameDuration=904),2
Row(gameDuration=906),1
Row(gameDuration=908),1
Row(gameDuration=909),1
Row(gameDuration=911),12
Row(gameDuration=912),21
Row(gameDuration=913),14


In [0]:
duration_1700 = game_df.select('gameDuration').filter('gameDuration>=1700').orderBy('gameDuration')
sum_duration_1700 = duration_1700.rdd.map(lambda x: (x,1)).reduceByKey(lambda x,y: x+y)

sum_duration_1700.collect()

duration_rows_1700 = sum_duration_1700.map(lambda z: Row(x=str(z[0]), y=int(z[1])))

duration_result_1700 = spark.createDataFrame(duration_rows_1700)
display(duration_result_1700)

x,y
Row(gameDuration=1700),60
Row(gameDuration=1701),55
Row(gameDuration=1702),42
Row(gameDuration=1703),41
Row(gameDuration=1704),36
Row(gameDuration=1705),24
Row(gameDuration=1706),32
Row(gameDuration=1707),33
Row(gameDuration=1708),32
Row(gameDuration=1709),37


## K-Neighbour Neighbors Algorithm

In [0]:
champ_sel_df = champ_sel_df.withColumn('team',when((col('t1_champ1id') == 'Tristana') | (col('t1_champ2id') == 'Tristana') | (col('t1_champ3id') == 'Tristana')\
            |(col('t1_champ4id') == 'Tristana') | (col('t1_champ5id') == 'Tristana'),\
        1)\
    .otherwise(2))

champ_sel_df = champ_sel_df.withColumn('victory', when((col('team') == col('winner')), 1).otherwise(0))

champ_sel_df = champ_sel_df.withColumn('fBlood', when((col('team') == col('firstBlood')), 1).otherwise(0))

champ_sel_df = champ_sel_df.withColumn('dragon', when((col('team') == 2), col('t2_dragonKills')).otherwise(col('t1_dragonKills')))

champ_sel_df = champ_sel_df.withColumn('baron', when((col('team') == 2), col('t2_baronKills')).otherwise(col('t1_baronKills')))

champ_sel_df = champ_sel_df.withColumn('tower', when((col('team') == 2), col('t2_towerKills')).otherwise(col('t1_towerKills')))

champ_sel_df = champ_sel_df.withColumn('inhibitor', when((col('team') == 2), col('t2_inhibitorKills')).otherwise(col('t1_inhibitorKills')))

In [0]:
tristana_df= champ_sel_df.select('gameDuration','fBlood','dragon','baron','tower','inhibitor')

data_feature = tristana_df.collect()
data_target = champ_sel_df.select('victory').collect()

X_train, X_test, Y_train, Y_test = train_test_split(data_feature,data_target, test_size= 0.33, random_state=21, stratify=data_target)

knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, Y_train)
prediction = knn.predict(X_test)
display(knn.score(X_test, Y_test))

  return self._fit(X, y)
0.92920197740113

In [0]:
#List Hyperparameters that we want to tune.
leaf_size = list(range(0,5))
n_neighbors = list(range(1,10))
p=[1,2]
#Convert to dictionary
hyperparameters = dict(leaf_size=leaf_size, n_neighbors=n_neighbors, p=p)
#Create new KNN object
knn_tuning = KNeighborsClassifier()
#Use GridSearch
clf = GridSearchCV(knn_tuning, hyperparameters, cv=10)
#Fit the model
best_model = clf.fit(X_train,Y_train)
#Print The value of best Hyperparameters
print('Best leaf_size:', best_model.best_estimator_.get_params()['leaf_size'])
print('Best p:', best_model.best_estimator_.get_params()['p'])
print('Best n_neighbors:', best_model.best_estimator_.get_params()['n_neighbors'])

  return self._fit(X, y)
Traceback (most recent call last):
  File "/databricks/python/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/databricks/python_shell/dbruntime/MLWorkloadsInstrumentation/_sklearn.py", line 29, in patch_function
    original_result = original(self, *args, **kwargs)
  File "/databricks/python/lib/python3.8/site-packages/sklearn/neighbors/_classification.py", line 179, in fit
    return self._fit(X, y)
  File "/databricks/python/lib/python3.8/site-packages/sklearn/neighbors/_base.py", line 503, in _fit
    self._tree = KDTree(X, self.leaf_size,
  File "sklearn/neighbors/_binary_tree.pxi", line 1060, in sklearn.neighbors._kd_tree.BinaryTree.__init__
ValueError: leaf_size must be greater than or equal to 1

  return self._fit(X, y)
Traceback (most recent call last):
  File "/databricks/python/lib/python3.8/site-packages/sklearn/model_selection/_validation.py"

In [0]:
#predict 'gameDuration','FirstBlood','Dragon','Baron','Tower','Inhibitor'
predictWin = np.array([[1800,1,3,0,6,1],[1800,0,2,0,2,1],[1800,0,5,1,4,3]])

knn.predict(predictWin)

Out[273]: array([1, 0, 1])

##MLPClassifer

In [0]:
sel_champ = champ_sel_df.select('gameId','gameDuration','t1_champ1id','t1_champ2id','t1_champ3id','t1_champ4id','t1_champ5id',\
                                't2_champ1id','t2_champ2id','t2_champ3id','t2_champ4id','t2_champ5id')

champ_MLP_df = sel_champ.join(ban_champs_df, on='gameId')
champ_MLP_df.display()

gameId,gameDuration,t1_champ1id,t1_champ2id,t1_champ3id,t1_champ4id,t1_champ5id,t2_champ1id,t2_champ2id,t2_champ3id,t2_champ4id,t2_champ5id,t1_ban1,t1_ban2,t1_ban3,t1_ban4,t1_ban5,t2_ban1,t2_ban2,t2_ban3,t2_ban4,t2_ban5
3326118772,2475,Sivir,Fiora,Janna,Warwick,Orianna,Twitch,TahmKench,Kayn,Nasus,Lux,Annie,Illaoi,Thresh,Chogath,Yasuo,Malzahar,Nami,Jax,JarvanIV,Caitlyn
3323943181,1212,Zed,Malphite,Xayah,Ornn,MonkeyKing,Twitch,Nasus,Corki,Draven,Braum,Annie,Zac,Chogath,Janna,Gnar,Maokai,Gnar,Janna,Vayne,Brand
3329923107,1224,Riven,Kayn,Ivern,Pantheon,Jayce,Thresh,Varus,Syndra,Evelynn,Trundle,Annie,Darius,Malzahar,Maokai,Fiora,Janna,Irelia,Gragas,Kassadin,LeeSin
3262924776,2598,Poppy,Ezreal,Thresh,TwistedFate,Kayn,Jax,TahmKench,Tristana,Yasuo,Rengar,Annie,Talon,Caitlyn,Blitzcrank,Tryndamere,Riven,Fizz,Draven,Blitzcrank,Malzahar
3322553581,2307,Sion,TwistedFate,TahmKench,Twitch,Khazix,Amumu,Tristana,Shen,Lux,Ornn,Annie,Jinx,Blitzcrank,Darius,Galio,Yasuo,Zed,Fizz,Fiora,Leona
3279181405,1289,Sona,Lissandra,Kayn,Riven,Jhin,Shaco,Vayne,MonkeyKing,Velkoz,Karma,Annie,Malzahar,Tristana,Chogath,Gangplank,Khazix,Zed,Gnar,Fiora,Talon
3261010773,1812,Yasuo,Olaf,Trundle,Braum,Ashe,Amumu,Jhin,Blitzcrank,Ziggs,Ekko,Annie,Fiora,Darius,Tryndamere,Ahri,Caitlyn,Zac,MasterYi,Elise,Urgot
3230780157,2809,Khazix,Ahri,Sona,Twitch,Graves,Pantheon,Leblanc,Thresh,Caitlyn,Gragas,Annie,Zac,Draven,Syndra,Fizz,Zac,Katarina,Yasuo,Fiora,Lucian
3330336792,2164,Janna,Tristana,Darius,Lissandra,MonkeyKing,Ornn,Shyvana,Rakan,Azir,Xayah,Annie,Yasuo,MasterYi,Kayn,Lulu,Zed,Chogath,Kennen,KogMaw,Thresh
3308888113,1637,DrMundo,Janna,Morgana,Vayne,MasterYi,Tristana,Thresh,Kassadin,Trundle,Mordekaiser,Annie,Yasuo,Zac,Fiora,Darius,Lulu,Malzahar,Chogath,Cassiopeia,Twitch


In [0]:
champ_MLP_df = champ_MLP_df.filter('gameDuration >= 900')
champ_MLP_df = champ_MLP_df.drop('gameId','gameDuration')

In [0]:
from pyspark.ml import Pipeline
from pyspark.ml.feature import StringIndexer

indexers = [StringIndexer(inputCol=column, outputCol=column+"_index", stringOrderType='alphabetAsc').fit(champ_MLP_df) for column in list(champ_MLP_df.columns)]

pipeline = Pipeline(stages=indexers)
df_r = pipeline.fit(champ_MLP_df).transform(champ_MLP_df)

In [0]:
from pyspark.ml.feature import VectorAssembler
assembler = VectorAssembler(
  inputCols = [
      't1_ban1_index',
      't1_ban2_index',
      't1_ban3_index',
      't1_ban4_index',
      't1_ban5_index',
      't2_ban1_index',
      't2_ban2_index',
      't2_ban3_index',
      't2_ban4_index',
      't2_ban5_index'],
  outputCol = "features")

In [0]:
r_df_r = assembler.transform(df_r)

r_df_r.display()

t1_champ1id,t1_champ2id,t1_champ3id,t1_champ4id,t1_champ5id,t2_champ1id,t2_champ2id,t2_champ3id,t2_champ4id,t2_champ5id,t1_ban1,t1_ban2,t1_ban3,t1_ban4,t1_ban5,t2_ban1,t2_ban2,t2_ban3,t2_ban4,t2_ban5,t1_champ1id_index,t1_champ2id_index,t1_champ3id_index,t1_champ4id_index,t1_champ5id_index,t2_champ1id_index,t2_champ2id_index,t2_champ3id_index,t2_champ4id_index,t2_champ5id_index,t1_ban1_index,t1_ban2_index,t1_ban3_index,t1_ban4_index,t1_ban5_index,t2_ban1_index,t2_ban2_index,t2_ban3_index,t2_ban4_index,t2_ban5_index,features
Sivir,Fiora,Janna,Warwick,Orianna,Twitch,TahmKench,Kayn,Nasus,Lux,Annie,Illaoi,Thresh,Chogath,Yasuo,Malzahar,Nami,Jax,JarvanIV,Caitlyn,100.0,28.0,41.0,127.0,81.0,116.0,106.0,53.0,75.0,65.0,6.0,38.0,111.0,17.0,131.0,67.0,74.0,43.0,42.0,14.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 38.0, 111.0, 17.0, 131.0, 67.0, 74.0, 43.0, 42.0, 14.0))"
Zed,Malphite,Xayah,Ornn,MonkeyKing,Twitch,Nasus,Corki,Draven,Braum,Annie,Zac,Chogath,Janna,Gnar,Maokai,Gnar,Janna,Vayne,Brand,134.0,66.0,128.0,82.0,71.0,116.0,75.0,18.0,22.0,13.0,6.0,133.0,17.0,41.0,33.0,68.0,33.0,41.0,120.0,12.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 133.0, 17.0, 41.0, 33.0, 68.0, 33.0, 41.0, 120.0, 12.0))"
Riven,Kayn,Ivern,Pantheon,Jayce,Thresh,Varus,Syndra,Evelynn,Trundle,Annie,Darius,Malzahar,Maokai,Fiora,Janna,Irelia,Gragas,Kassadin,LeeSin,91.0,53.0,40.0,83.0,44.0,111.0,119.0,105.0,25.0,113.0,6.0,19.0,67.0,68.0,28.0,41.0,39.0,34.0,50.0,60.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 19.0, 67.0, 68.0, 28.0, 41.0, 39.0, 34.0, 50.0, 60.0))"
Poppy,Ezreal,Thresh,TwistedFate,Kayn,Jax,TahmKench,Tristana,Yasuo,Rengar,Annie,Talon,Caitlyn,Blitzcrank,Tryndamere,Riven,Fizz,Draven,Blitzcrank,Malzahar,84.0,26.0,111.0,115.0,53.0,43.0,106.0,112.0,131.0,90.0,6.0,108.0,14.0,11.0,114.0,91.0,29.0,22.0,11.0,67.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 108.0, 14.0, 11.0, 114.0, 91.0, 29.0, 22.0, 11.0, 67.0))"
Sion,TwistedFate,TahmKench,Twitch,Khazix,Amumu,Tristana,Shen,Lux,Ornn,Annie,Jinx,Blitzcrank,Darius,Galio,Yasuo,Zed,Fizz,Fiora,Leona,99.0,115.0,106.0,116.0,55.0,4.0,112.0,96.0,65.0,82.0,6.0,46.0,11.0,19.0,30.0,131.0,134.0,29.0,28.0,61.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 46.0, 11.0, 19.0, 30.0, 131.0, 134.0, 29.0, 28.0, 61.0))"
Sona,Lissandra,Kayn,Riven,Jhin,Shaco,Vayne,MonkeyKing,Velkoz,Karma,Annie,Malzahar,Tristana,Chogath,Gangplank,Khazix,Zed,Gnar,Fiora,Talon,102.0,62.0,53.0,91.0,45.0,95.0,120.0,71.0,122.0,48.0,6.0,67.0,112.0,17.0,31.0,55.0,134.0,33.0,28.0,108.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 67.0, 112.0, 17.0, 31.0, 55.0, 134.0, 33.0, 28.0, 108.0))"
Yasuo,Olaf,Trundle,Braum,Ashe,Amumu,Jhin,Blitzcrank,Ziggs,Ekko,Annie,Fiora,Darius,Tryndamere,Ahri,Caitlyn,Zac,MasterYi,Elise,Urgot,131.0,80.0,113.0,13.0,7.0,4.0,45.0,11.0,135.0,23.0,6.0,28.0,19.0,114.0,1.0,14.0,133.0,69.0,24.0,118.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 28.0, 19.0, 114.0, 1.0, 14.0, 133.0, 69.0, 24.0, 118.0))"
Khazix,Ahri,Sona,Twitch,Graves,Pantheon,Leblanc,Thresh,Caitlyn,Gragas,Annie,Zac,Draven,Syndra,Fizz,Zac,Katarina,Yasuo,Fiora,Lucian,55.0,1.0,102.0,116.0,35.0,83.0,59.0,111.0,14.0,34.0,6.0,133.0,22.0,105.0,29.0,133.0,51.0,131.0,28.0,63.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 133.0, 22.0, 105.0, 29.0, 133.0, 51.0, 131.0, 28.0, 63.0))"
Janna,Tristana,Darius,Lissandra,MonkeyKing,Ornn,Shyvana,Rakan,Azir,Xayah,Annie,Yasuo,MasterYi,Kayn,Lulu,Zed,Chogath,Kennen,KogMaw,Thresh,41.0,112.0,19.0,62.0,71.0,82.0,97.0,86.0,9.0,128.0,6.0,131.0,69.0,53.0,64.0,134.0,17.0,54.0,58.0,111.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 131.0, 69.0, 53.0, 64.0, 134.0, 17.0, 54.0, 58.0, 111.0))"
DrMundo,Janna,Morgana,Vayne,MasterYi,Tristana,Thresh,Kassadin,Trundle,Mordekaiser,Annie,Yasuo,Zac,Fiora,Darius,Lulu,Malzahar,Chogath,Cassiopeia,Twitch,21.0,41.0,73.0,120.0,69.0,112.0,111.0,50.0,113.0,72.0,6.0,131.0,133.0,28.0,19.0,64.0,67.0,17.0,16.0,116.0,"Map(vectorType -> dense, length -> 10, values -> List(6.0, 131.0, 133.0, 28.0, 19.0, 64.0, 67.0, 17.0, 16.0, 116.0))"


In [0]:
r_df_r = r_df_r.select('features','t1_champ1id_index')

In [0]:
splits = r_df_r.randomSplit([0.7,0.3])
train_df = splits[0]
test_df = splits[1]
train_df.count(), test_df.count(), r_df_r.count()

Out[280]: (33216, 14450, 47666)

In [0]:
from pyspark.ml.classification import MultilayerPerceptronClassifier

layers = [10, 5, 6, 138]

mlp = MultilayerPerceptronClassifier(labelCol="t1_champ1id_index",featuresCol="features",layers=layers,maxIter=10,seed=5)

mlp_model = mlp.fit(train_df)

In [0]:
pred_df = mlp_model.transform(test_df)
pred_df.show()

+--------------------+-----------------+--------------------+--------------------+----------+
|            features|t1_champ1id_index|       rawPrediction|         probability|prediction|
+--------------------+-----------------+--------------------+--------------------+----------+
|[0.0,19.0,28.0,8....|             90.0|[-1.0183673191636...|[0.00211613850692...|     112.0|
|[0.0,132.0,116.0,...|              1.0|[-1.0183673191649...|[0.00211613850691...|     112.0|
|[1.0,17.0,26.0,53...|             29.0|[-1.0183936599807...|[0.00211608100374...|     112.0|
|[1.0,19.0,29.0,10...|             97.0|[-1.0183673191649...|[0.00211613850691...|     112.0|
|[1.0,19.0,41.0,11...|             94.0|[-1.0183673133354...|[0.00211613855486...|     112.0|
|[1.0,43.0,41.0,64...|             69.0|[-1.0183673191649...|[0.00211613850691...|     112.0|
|[1.0,51.0,29.0,13...|             69.0|[-1.0183673191649...|[0.00211613850691...|     112.0|
|[1.0,64.0,12.0,22...|             28.0|[-1.0183673053446...

In [0]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

evaluator = MulticlassClassificationEvaluator(labelCol = 't1_champ1id_index', predictionCol = 'prediction', metricName = 'accuracy')
mlpacc = evaluator.evaluate(pred_df)
mlpacc

Out[283]: 0.02718970527189705

In [0]:
from pyspark.ml.tuning import ParamGridBuilder
from pyspark.ml.tuning import TrainValidationSplit

clf = MultilayerPerceptronClassifier(labelCol='t1_champ1id_index',layers=layers)
pipeline2 = Pipeline(stages=[clf])
x1 = 'stepSize'
x2 = 'maxIter'
paramGrid = ParamGridBuilder() \
    .addGrid(getattr(clf,x1), [0.1, 0.3]) \
    .addGrid(getattr(clf,x2),[1,5,10])\
    .build()
evaluator = MulticlassClassificationEvaluator(labelCol='t1_champ1id_index', predictionCol='prediction', metricName='accuracy')

crossval = TrainValidationSplit(estimator=pipeline2,estimatorParamMaps=paramGrid,evaluator=evaluator,trainRatio=0.7)

cvModel = crossval.fit(train_df)



In [0]:
list(zip(cvModel.validationMetrics, cvModel.getEstimatorParamMaps()))

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-267825188256622>[0m in [0;36m<module>[0;34m[0m
[0;32m----> 1[0;31m [0mlist[0m[0;34m([0m[0mzip[0m[0;34m([0m[0mcvModel[0m[0;34m.[0m[0mvalidationMetrics[0m[0;34m,[0m [0mcvModel[0m[0;34m.[0m[0mgetEstimatorParamMaps[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
[0;31mNameError[0m: name 'cvModel' is not defined

##Decision Tree Classifier

In [0]:
dtc_df = game_df.select('winner','firstBlood','firstTower','firstInhibitor','firstBaron','firstDragon','firstRiftHerald',\
                        't1_towerKills','t1_inhibitorKills','t1_baronKills','t1_dragonKills',\
                        't2_towerKills','t2_inhibitorKills', 't2_baronKills', 't2_dragonKills')


In [0]:
winner = game_df.select('winner')

In [0]:
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.feature import StringIndexer, VectorIndexer

dtc_assembler = VectorAssembler(
    inputCols = [
      'firstBlood',
      'firstTower',
      'firstInhibitor',
      'firstDragon',
      'firstRiftHerald',
      't1_towerKills',
      't1_inhibitorKills',
      't1_baronKills',
      't1_dragonKills',
      't2_towerKills',
      't2_inhibitorKills', 
      't2_baronKills', 
      't2_dragonKills'],
  outputCol = "dtc_features")

dtc_assembler_df = dtc_assembler.transform(dtc_df)

In [0]:
(dtc_trainData, dtc_testData) = dtc_assembler_df.randomSplit([0.7, 0.3])

In [0]:
dtc = DecisionTreeClassifier(labelCol="winner", featuresCol="dtc_features")

In [0]:
dtc_model = dtc.fit(dtc_trainData)

In [0]:
dtc_pred_df = dtc_model.transform(dtc_testData)
dtc_pred_df.display()

winner,firstBlood,firstTower,firstInhibitor,firstBaron,firstDragon,firstRiftHerald,t1_towerKills,t1_inhibitorKills,t1_baronKills,t1_dragonKills,t2_towerKills,t2_inhibitorKills,t2_baronKills,t2_dragonKills,dtc_features,rawPrediction,probability,prediction
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Map(vectorType -> sparse, length -> 13, indices -> List(), values -> List())","Map(vectorType -> dense, length -> 3, values -> List(0.0, 361.0, 262.0))","Map(vectorType -> dense, length -> 3, values -> List(0.0, 0.579454253611557, 0.420545746388443))",1.0


In [0]:
dtc_evaluator = MulticlassClassificationEvaluator(labelCol = 'winner', predictionCol = 'prediction', metricName = 'accuracy')
dtcacc = dtc_evaluator.evaluate(dtc_pred_df)
dtcacc

Out[294]: 0.964854581801767

In [0]:
from pyspark.ml.linalg import Vectors
a= np.array([[1,1,2,1,1,1,10,2,1,4,7,2,1,1]])
x1=Vectors.dense(a)
c=dtc_model.predict(x1)
print("winner is :" , c)

winner is : 2.0


In [0]:
b = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
x2=Vectors.dense(b)
c2=dtc_model.predict(x2)
print("winner is :" , c2)

winner is : 1.0
