In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import os
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import pandas as pd
import time

# run web driver headlessly
os.environ['MOZ_HEADLESS'] = '1'
binary = FirefoxBinary('/usr/bin/firefox') 

# considers percentages also numeric
def numeric(s):
    try:
        float(s.strip("%\n "))
        return True
    except ValueError:
        pass
 
    try:
        import unicodedata
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass
 
    return False

In [2]:
# web scrape basic data into headerList and rowList
driver = webdriver.Firefox(firefox_binary=binary)
driver.get("https://octane.gg/stats/players/rlcs-career/#main")
delay = 2 # seconds
try:
    time.sleep(delay)
    headers = driver.find_elements_by_css_selector("section#\\#main table.vuetable > thead > tr:nth-child(1) th span")
    rows = driver.find_elements_by_css_selector("section#\\#main .vuetable-body tr")
    headerList = list(map(lambda x: x.text, headers))
    rowList = []
    for i in range(len(rows)):
        data = driver.find_elements_by_css_selector("section#\\#main .vuetable-body tr:nth-child(" + str(i+1) + ") > td")
        rowList.append(list(map(lambda x: float(x.text.strip("%\n ")) if len(x.text) > 0 and numeric(x.text) else x.text, data)))
except TimeoutException:
    print ("Loading took too much time!")
driver.quit()

playerDF = pd.DataFrame(columns=headerList, data=rowList)

In [3]:
# web scrape basic data into headerList and rowList
driver = webdriver.Firefox(firefox_binary=binary)
driver.get("https://octane.gg/stats/players/rlcs-career/#advanced")
delay = 2 # seconds
try:
    time.sleep(delay)
    headers = driver.find_elements_by_css_selector("section#\\#advanced table.vuetable > thead > tr:nth-child(1) th span")
    rows = driver.find_elements_by_css_selector("section#\\#advanced .vuetable-body tr")
    headerList = list(map(lambda x: x.text, headers))
    rowList = []
    for i in range(len(rows)):
        data = driver.find_elements_by_css_selector("section#\\#advanced .vuetable-body tr:nth-child(" + str(i+1) + ") > td")
        rowList.append(list(map(lambda x: float(x.text.strip("%\n ")) if len(x.text) > 0 and numeric(x.text) else x.text, data)))
except TimeoutException:
    print ("Loading took too much time!")
driver.quit()

# write playerVector to csv
playerDFAdv = pd.DataFrame(columns=headerList, data=rowList)
playerMerged = playerDF.merge(playerDFAdv, on='Player', suffixes=('', '_x'))
playerVector = playerMerged[playerDF['GP'] > 50].drop(columns=['','_x','GP_x', 'W%_x','Rating'])
playerVector.to_csv("rlstats.csv", sep=',')

In [4]:
# load playerVector from csv, optional
playerVector = pd.read_csv("rlstats.csv", sep=',')

In [4]:
from sklearn import preprocessing

scalar = preprocessing.StandardScaler().fit(playerVector.drop(columns=['Player']))
scaled_train = scalar.transform(playerVector.drop(columns=['Player']))
train_labels = playerVector['Player']


In [5]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# find best # clusters for future kmeans classification
kmeans = [KMeans(n_clusters=i) for i in range(1,10)]
score = [kmeans[i].fit(scaled_train).score(scaled_train) for i in range(len(kmeans))]
plt.plot(range(1,10),score, 'bx-', label='Pro RL Player-Type Clusters')
plt.xlabel('Number of Player Types')
plt.ylabel('Variance')
plt.title('Number of Player Types vs Variance')
plt.legend(loc=4)
plt.show()

<Figure size 640x480 with 1 Axes>

In [6]:
from sklearn.svm import SVC

# determine natural clusters of each player
kmeans = KMeans(n_clusters = 4, random_state=1000)
classes = [0,1,2,3]
cluster_builder = kmeans.fit(scaled_train, classes)
labels = cluster_builder.labels_

# build svc classifier
classifier = SVC(gamma='auto',kernel='linear', probability=True)
classifier.fit(scaled_train, labels)

# determine most important features for each of the 3 classes
feature_weights = classifier.coef_
headers = playerVector.drop(columns=['Player']).columns.values
#type0_pillars = sorted(range(len(feature_weights[0])), key=lambda i: int(feature_weights[0][i])**2)[-5:]
#type1_pillars = sorted(range(len(feature_weights[1])), key=lambda i: int(feature_weights[1][i])**2)[-5:]
##type2_pillars = sorted(range(len(feature_weights[2])), key=lambda i: int(feature_weights[2][i])**2)[-5:]
#type2_pillars = sorted(range(len(feature_weights[3])), key=lambda i: int(feature_weights[3][i])**2)[-5:]

print(list(playerVector.drop(columns=['Player'])))
print(feature_weights[0])
print(feature_weights[1])
print(feature_weights[2])
print(feature_weights[3])
averages = list(map(lambda x: (feature_weights[0][x] + feature_weights[1][x] + 
                               feature_weights[2][x] + feature_weights[3][x])/4
                   , range(len(feature_weights[0]))))
distances_from_average0 = list(map(lambda x : (feature_weights[0][x] - averages[0])**2, range(len(feature_weights[0]))))
distances_from_average1 = list(map(lambda x : (feature_weights[1][x] - averages[0])**2, range(len(feature_weights[1]))))
distances_from_average2 = list(map(lambda x : (feature_weights[2][x] - averages[0])**2, range(len(feature_weights[2]))))
distances_from_average3 = list(map(lambda x : (feature_weights[3][x] - averages[0])**2, range(len(feature_weights[3]))))

standouts0 =  sorted(range(len(distances_from_average0)), key=lambda i: distances_from_average0)[-5:]
standouts1 =  sorted(range(len(distances_from_average1)), key=lambda i: distances_from_average1)[-5:]
standouts2 =  sorted(range(len(distances_from_average2)), key=lambda i: distances_from_average2)[-5:]
standouts3 =  sorted(range(len(distances_from_average3)), key=lambda i: distances_from_average3)[-5:]

print('------Type 0 (Offensive Support)------')
for x in standouts0:
    print(headers[x] + " (" + str(feature_weights[0][x]) + ")")
print('------Type 1 (Support)------')
for x in standouts2:
    print(headers[x] + " (" + str(feature_weights[1][x]) + ")")
print('------Type 2 (Striker)------')
for x in standouts3:
    print(headers[x] + " (" + str(feature_weights[2][x]) + ")")
print('------Type 3 (Midfield)------')
for x in standouts3:
    print(headers[x] + " (" + str(feature_weights[3][x]) + ")")

print('\nplayer types')
print('Torment-')
print(classifier.predict_proba(scalar.transform(playerVector[playerVector['Player'] == 'Torment'].drop(columns=['Player'])))[0])
print('Gimmick-')
print(classifier.predict_proba(scalar.transform(playerVector[playerVector['Player'] == 'Gimmick'].drop(columns=['Player'])))[0])
print('ViolentPanda-')
print(classifier.predict_proba(scalar.transform(playerVector[playerVector['Player'] == 'ViolentPanda'].drop(columns=['Player'])))[0])
print('Kaydop-')
print(classifier.predict_proba(scalar.transform(playerVector[playerVector['Player'] == 'Kaydop'].drop(columns=['Player'])))[0])
print('GarrettG-')
print(classifier.predict_proba(scalar.transform(playerVector[playerVector['Player'] == 'GarrettG'].drop(columns=['Player'])))[0])
print('Kronovi-')
print(classifier.predict_proba(scalar.transform(playerVector[playerVector['Player'] == 'Kronovi'].drop(columns=['Player'])))[0])

['GP', 'W%', 'SCPG', 'GPG', 'APG', 'SAPG', 'SHPG', 'ASP', 'A/G', 'SH%', 'GP%', 'PPG']
[-0.17495856  0.47934227 -0.50295979 -0.49624317  0.54479371 -0.58040366
 -0.37347541  0.95861382  0.90780819 -0.22477715 -0.58635191 -0.06062214]
[-0.34865506 -0.51880484 -0.25095553 -0.25314954 -0.0588246  -0.18852571
 -0.29708381  0.22230844  0.15054168 -0.05354553 -0.13976996 -0.21077923]
[-1.33263954 -0.86123387 -1.26835735  0.04473459 -0.32115515 -0.78281905
 -0.07479508  0.82252458 -0.5138237   0.1686762  -0.14338688  0.00942105]
[-0.55460785 -0.51382518 -0.02169916 -0.25689838 -0.34156613  0.11228785
 -0.29368911  0.20634938 -0.07813907 -0.06864267  0.13912714 -0.37767018]
------Type 0 (Offensive Support)------
ASP (0.9586138212009498)
A/G (0.907808194410298)
SH% (-0.22477715186561065)
GP% (-0.5863519144539572)
PPG (-0.06062214332417526)
------Type 1 (Support)------
ASP (0.2223084388111423)
A/G (0.1505416762110403)
SH% (-0.053545528562539246)
GP% (-0.13976995861661934)
PPG (-0.2107792255940907

In [12]:
# create csv of all players
file = ""
for index, pl in playerVector.iterrows():
    file += pl['Player'] + ","
    file += ','.join(str(x) for x in classifier.predict_proba(scalar.transform(playerVector[playerVector['Player'] == pl['Player']].drop(columns=['Player'])))[0])
    file += '\n'
print(file)

Gambit,0.0027884344609599592,0.003972142751778639,0.9918553657317513,0.0013840570555101674
Gimmick,0.004994848488852249,0.04935089602426487,0.9307481563212014,0.014906099165681303
Drippay,0.0010939189726929283,0.016184977241368352,0.9781686716902939,0.004552432095644761
SquishyMuffinz,0.014190145218907784,0.04493416265911279,0.8179384993511646,0.12293719277081466
DarkFire,0.06969209660315843,0.13104521293459126,0.7611181748386687,0.03814451562358155
kuxir97,0.0021221952335992583,0.005871802820974933,0.9577522799697897,0.03425372197563598
JKnaps,0.007788607096624872,0.15413274942547062,0.7543608033084306,0.0837178401694739
Express,0.04045454398954623,0.086494930809858,0.7306148103661345,0.14243571483446124
GarrettG,0.0018019758045135148,0.03746189393858534,0.8852280910400262,0.07550803921687485
M1k3Rules,0.08119345471216179,0.0032974669884583857,0.89326946382405,0.022239614475330054
JSTN,0.012657191727966937,0.018706405691744993,0.2231793411880114,0.7454570613922769
CJCJ,0.0487347918242