In [1]:
import pandas
from google.colab import drive
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import random
from tabulate import tabulate
drive.mount('/content/drive')
players = pandas.read_csv('/content/drive/MyDrive/EECS4412/wc_players.csv')

# focus on 'k', 'd', 'a'
Old = players[['k','d','a']]

selectcopy = Old.copy(deep=True)
filter = selectcopy[(np.abs(stats.zscore(selectcopy)) < 3).all(axis=1)] #Drop the row with outliers

filter = filter.dropna().reset_index(drop=True)#Drop all the instances with missing values & reset the indeces
filter = filter.drop(labels = range(1000,len(filter)),axis = 0) #limit the row to 1000 rows

filter.to_csv('T1Old.csv')


Mounted at /content/drive


In [2]:
from sklearn.preprocessing import KBinsDiscretizer
est_k = KBinsDiscretizer(n_bins=4, encode='ordinal')
# the bin with equal frequency
est_k.fit(filter[['k']])
print("the edge of bin for 'k':",est_k.bin_edges_[0])
discretizer_k = est_k.transform(filter[['k']])
discretizer_k=pd.DataFrame(discretizer_k.astype(int))

the edge of bin for 'k': [0. 1. 2. 4. 9.]


In [3]:
est_d = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
# the bin with equal Equal Interval Approach
est_d.fit(filter[['d']])
print("the edge of bin for 'd':",est_d.bin_edges_[0])
discretizer_d = est_d.transform(filter[['d']])
discretizer_d=pd.DataFrame(discretizer_d.astype(int))

the edge of bin for 'd': [0.         2.66666667 5.33333333 8.        ]


In [4]:
est_a = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='kmeans')
# the bin with equal kmeans Approach
est_a.fit(filter[['a']])
print("the edge of bin for 'a':",est_a.bin_edges_[0])
discretizer_a = est_a.transform(filter[['a']])
discretizer_a=pd.DataFrame(discretizer_a.astype(int))

the edge of bin for 'a': [ 0.          5.22137476 10.21904624 17.        ]


In [5]:
#combine the discretized result together
discretized = pd.concat([discretizer_k,discretizer_d, discretizer_a],axis=1)
discretized.columns = filter.columns

# make a bit represent of the result
discretizations = ['Least KILL','Less KILL','More KILL','Most KILL','Least DEATH',
                   'Medium DEATH','Most DEATH','Least ASSIST','Medium ASSIST','Most ASSIST']
zero_data = np.zeros(shape=(1000,10))
result = pd.DataFrame(zero_data.astype(int), columns=discretizations)

tmp = pd.concat([result,discretized],axis=1)

# transform into bit form
def transform_bit(x, columns):
  x[columns[x['k']]] = 1
  x[columns[x['d']+4]] = 1
  x[columns[x['a']+7]] = 1

tmp.apply(lambda x:transform_bit(x, discretizations), axis=1)
result = tmp[discretizations]

In [6]:
result.to_csv("T1Disc.csv")
result.head(10)

Unnamed: 0,Least KILL,Less KILL,More KILL,Most KILL,Least DEATH,Medium DEATH,Most DEATH,Least ASSIST,Medium ASSIST,Most ASSIST
0,1,0,0,0,0,1,0,1,0,0
1,0,1,0,0,0,1,0,1,0,0
2,0,1,0,0,0,1,0,0,1,0
3,0,0,1,0,0,1,0,1,0,0
4,0,0,1,0,0,1,0,1,0,0
5,0,1,0,0,1,0,0,0,0,1
6,0,0,0,1,1,0,0,0,1,0
7,0,0,1,0,1,0,0,0,1,0
8,0,0,1,0,0,1,0,1,0,0
9,0,1,0,0,0,1,0,0,1,0


**Itemsets**

In [7]:
from mlxtend.frequent_patterns import apriori,association_rules
frequent_itemsets=apriori(result, min_support=0.01, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
print(frequent_itemsets)

    support                                  itemsets  length
0     0.207                              (Least KILL)       1
1     0.215                               (Less KILL)       1
2     0.297                               (More KILL)       1
3     0.281                               (Most KILL)       1
4     0.518                             (Least DEATH)       1
..      ...                                       ...     ...
60    0.010    (More KILL, Medium ASSIST, Most DEATH)       3
61    0.086    (Least ASSIST, Most KILL, Least DEATH)       3
62    0.081   (Medium ASSIST, Most KILL, Least DEATH)       3
63    0.045   (Least ASSIST, Medium DEATH, Most KILL)       3
64    0.041  (Medium ASSIST, Medium DEATH, Most KILL)       3

[65 rows x 3 columns]


In [8]:
most_frequent_itemsets = frequent_itemsets[ (frequent_itemsets['length'] >= 2) ].sort_values(by=['support'],ascending=False).head(10)
most_frequent_itemsets

Unnamed: 0,support,itemsets,length
34,0.271,"(Least ASSIST, Least DEATH)",2
37,0.254,"(Least ASSIST, Medium DEATH)",2
35,0.197,"(Medium ASSIST, Least DEATH)",2
28,0.174,"(Most KILL, Least DEATH)",2
25,0.157,"(More KILL, Least ASSIST)",2
22,0.156,"(More KILL, Least DEATH)",2
31,0.136,"(Least ASSIST, Most KILL)",2
13,0.131,"(Least ASSIST, Least KILL)",2
19,0.13,"(Least ASSIST, Less KILL)",2
32,0.126,"(Medium ASSIST, Most KILL)",2


Top 5

In [86]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.001)
conf_top5 = rules[(rules['support'] >= 0.1259)].sort_values(by=['confidence'],ascending=False).head(5)
conf_top5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
7,(Least KILL),(Least ASSIST),0.207,0.554,0.131,0.63285,1.142329,0.016322,1.214763
36,(Most KILL),(Least DEATH),0.281,0.518,0.174,0.619217,1.1954,0.028442,1.265813
19,(Less KILL),(Least ASSIST),0.215,0.554,0.13,0.604651,1.091428,0.01089,1.128118
55,(Medium DEATH),(Least ASSIST),0.422,0.554,0.254,0.601896,1.086454,0.020212,1.12031
50,(Medium ASSIST),(Least DEATH),0.346,0.518,0.197,0.569364,1.099159,0.017772,1.119275


bottom 5

In [10]:
conf_bot5 = rules[(rules['support'] >= 0.1259)].sort_values(by=['confidence'],ascending=True).head(5)
conf_bot5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
18,(Least ASSIST),(Less KILL),0.554,0.215,0.13,0.234657,1.091428,0.01089,1.025684
6,(Least ASSIST),(Least KILL),0.554,0.207,0.131,0.236462,1.142329,0.016322,1.038586
42,(Least ASSIST),(Most KILL),0.554,0.281,0.136,0.245487,0.873621,-0.019674,0.952933
31,(Least ASSIST),(More KILL),0.554,0.297,0.157,0.283394,0.954187,-0.007538,0.981013
25,(Least DEATH),(More KILL),0.518,0.297,0.156,0.301158,1.014001,0.002154,1.00595


In [69]:
top5 = [['Least KILL','Least ASSIST'],['Most KILL','Least DEATH'],['Less KILL','Least ASSIST'],['Medium DEATH','Least ASSIST'],['Medium ASSIST','Least DEATH']]

f11 = {}
f10 = {}
f01 = {}
f00 = {}

for i in range(5):
  f11[i] = 0
  f10[i] = 0
  f01[i] = 0
  f00[i] = 0

def interest(row,XY,XNY,NXY,NXNY, toplist):
  for t in range(5):
    x = row[toplist[t][0]].item()
    y = row[toplist[t][1]].item()
    if x & y:
      XY[t] += 1
    if x and (not y):
      XNY[t] += 1
    if (not x) and y:
      NXY[t] += 1
    if (not x) and (not y):
      NXNY[t] += 1

result.apply(lambda x : interest(x, f11,f10,f01,f00, top5), axis=1)

print(f11)
print(f10)
print(f01)
print(f00)

{0: 131, 1: 174, 2: 130, 3: 254, 4: 197}
{0: 76, 1: 107, 2: 85, 3: 168, 4: 149}
{0: 423, 1: 344, 2: 424, 3: 300, 4: 321}
{0: 370, 1: 375, 2: 361, 3: 278, 4: 333}


In [100]:
conf_top5[['f11', 'f10', 'f01', 'f00']] = [f11.values(), f10.values(), f01.values(), f00.values()]
conf_top5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,f11,f10,f01,f00
7,(Least KILL),(Least ASSIST),0.207,0.554,0.131,0.63285,1.142329,0.016322,1.214763,131,76,423,370
36,(Most KILL),(Least DEATH),0.281,0.518,0.174,0.619217,1.1954,0.028442,1.265813,174,107,344,375
19,(Less KILL),(Least ASSIST),0.215,0.554,0.13,0.604651,1.091428,0.01089,1.128118,130,85,424,361
55,(Medium DEATH),(Least ASSIST),0.422,0.554,0.254,0.601896,1.086454,0.020212,1.12031,254,168,300,278
50,(Medium ASSIST),(Least DEATH),0.346,0.518,0.197,0.569364,1.099159,0.017772,1.119275,197,149,321,333


In [103]:
def LIFT(row):
  pxy = row[['f11']].item()
  px = row[['antecedent support']].item()
  py = row[['consequent support']].item()
  return (pxy)/1000/px*py
conf_top5['LIFT'] =conf_top5.apply(LIFT, axis=1)
conf_top5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,f11,f10,f01,f00,LIFT
7,(Least KILL),(Least ASSIST),0.207,0.554,0.131,0.63285,1.142329,0.016322,1.214763,131,76,423,370,0.350599
36,(Most KILL),(Least DEATH),0.281,0.518,0.174,0.619217,1.1954,0.028442,1.265813,174,107,344,375,0.320754
19,(Less KILL),(Least ASSIST),0.215,0.554,0.13,0.604651,1.091428,0.01089,1.128118,130,85,424,361,0.334977
55,(Medium DEATH),(Least ASSIST),0.422,0.554,0.254,0.601896,1.086454,0.020212,1.12031,254,168,300,278,0.33345
50,(Medium ASSIST),(Least DEATH),0.346,0.518,0.197,0.569364,1.099159,0.017772,1.119275,197,149,321,333,0.294931


In [107]:
import math
def CORRELATION(row):
  f11 = row[['f11']].item()
  f1p = row[['f11']].item()+row[['f10']].item()
  fp1 = row[['f11']].item()+row[['f01']].item()
  f0p = row[['f00']].item()+row[['f01']].item()
  fp0 = row[['f00']].item()+row[['f10']].item()
  return (1000*f11-f1p*fp1)/(math.sqrt(f1p*fp1*f0p*fp0))
conf_top5['CORRELATION'] =conf_top5.apply(CORRELATION, axis=1)
conf_top5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,f11,f10,f01,f00,LIFT,CORRELATION
7,(Least KILL),(Least ASSIST),0.207,0.554,0.131,0.63285,1.142329,0.016322,1.214763,131,76,423,370,0.350599,0.081046
36,(Most KILL),(Least DEATH),0.281,0.518,0.174,0.619217,1.1954,0.028442,1.265813,174,107,344,375,0.320754,0.126635
19,(Less KILL),(Least ASSIST),0.215,0.554,0.13,0.604651,1.091428,0.01089,1.128118,130,85,424,361,0.334977,0.053328
55,(Medium DEATH),(Least ASSIST),0.422,0.554,0.254,0.601896,1.086454,0.020212,1.12031,254,168,300,278,0.33345,0.082332
50,(Medium ASSIST),(Least DEATH),0.346,0.518,0.197,0.569364,1.099159,0.017772,1.119275,197,149,321,333,0.294931,0.074769


In [108]:
def ODD_RATIO(row):
  f11 = row[['f11']].item()
  f00 = row[['f00']].item()
  f10 = row[['f10']].item()
  f01 = row[['f01']].item()
  return (f11*f00)/(f10*f01)
conf_top5['ODDS RATIO'] =conf_top5.apply(ODD_RATIO, axis=1)
conf_top5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,f11,f10,f01,f00,LIFT,CORRELATION,ODDS RATIO
7,(Least KILL),(Least ASSIST),0.207,0.554,0.131,0.63285,1.142329,0.016322,1.214763,131,76,423,370,0.350599,0.081046,1.507714
36,(Most KILL),(Least DEATH),0.281,0.518,0.174,0.619217,1.1954,0.028442,1.265813,174,107,344,375,0.320754,0.126635,1.772712
19,(Less KILL),(Least ASSIST),0.215,0.554,0.13,0.604651,1.091428,0.01089,1.128118,130,85,424,361,0.334977,0.053328,1.302164
55,(Medium DEATH),(Least ASSIST),0.422,0.554,0.254,0.601896,1.086454,0.020212,1.12031,254,168,300,278,0.33345,0.082332,1.401032
50,(Medium ASSIST),(Least DEATH),0.346,0.518,0.197,0.569364,1.099159,0.017772,1.119275,197,149,321,333,0.294931,0.074769,1.371574


In [109]:
def INTEREST(row):
  f11 = row[['f11']].item()
  f1p = row[['f11']].item()+row[['f10']].item()
  fp1 = row[['f11']].item()+row[['f01']].item()
  return (f11*1000)/(f1p*fp1)
conf_top5['INTEREST'] =conf_top5.apply(INTEREST, axis=1)
conf_top5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,f11,f10,f01,f00,LIFT,CORRELATION,ODDS RATIO,INTEREST
7,(Least KILL),(Least ASSIST),0.207,0.554,0.131,0.63285,1.142329,0.016322,1.214763,131,76,423,370,0.350599,0.081046,1.507714,1.142329
36,(Most KILL),(Least DEATH),0.281,0.518,0.174,0.619217,1.1954,0.028442,1.265813,174,107,344,375,0.320754,0.126635,1.772712,1.1954
19,(Less KILL),(Least ASSIST),0.215,0.554,0.13,0.604651,1.091428,0.01089,1.128118,130,85,424,361,0.334977,0.053328,1.302164,1.091428
55,(Medium DEATH),(Least ASSIST),0.422,0.554,0.254,0.601896,1.086454,0.020212,1.12031,254,168,300,278,0.33345,0.082332,1.401032,1.086454
50,(Medium ASSIST),(Least DEATH),0.346,0.518,0.197,0.569364,1.099159,0.017772,1.119275,197,149,321,333,0.294931,0.074769,1.371574,1.099159


In [110]:
def COSINE(row):
  f11 = row[['f11']].item()
  f1p = row[['f11']].item()+row[['f10']].item()
  fp1 = row[['f11']].item()+row[['f01']].item()
  return f11/math.sqrt(f1p*fp1)
conf_top5['COSINE'] =conf_top5.apply(COSINE, axis=1)
conf_top5

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,f11,f10,f01,f00,LIFT,CORRELATION,ODDS RATIO,INTEREST,COSINE
7,(Least KILL),(Least ASSIST),0.207,0.554,0.131,0.63285,1.142329,0.016322,1.214763,131,76,423,370,0.350599,0.081046,1.507714,1.142329,0.38684
36,(Most KILL),(Least DEATH),0.281,0.518,0.174,0.619217,1.1954,0.028442,1.265813,174,107,344,375,0.320754,0.126635,1.772712,1.1954,0.45607
19,(Less KILL),(Least ASSIST),0.215,0.554,0.13,0.604651,1.091428,0.01089,1.128118,130,85,424,361,0.334977,0.053328,1.302164,1.091428,0.376677
55,(Medium DEATH),(Least ASSIST),0.422,0.554,0.254,0.601896,1.086454,0.020212,1.12031,254,168,300,278,0.33345,0.082332,1.401032,1.086454,0.525318
50,(Medium ASSIST),(Least DEATH),0.346,0.518,0.197,0.569364,1.099159,0.017772,1.119275,197,149,321,333,0.294931,0.074769,1.371574,1.099159,0.465332


In [112]:
conf_top5[['antecedents','consequents','LIFT','CORRELATION','ODDS RATIO','INTEREST','COSINE']]

Unnamed: 0,antecedents,consequents,LIFT,CORRELATION,ODDS RATIO,INTEREST,COSINE
7,(Least KILL),(Least ASSIST),0.350599,0.081046,1.507714,1.142329,0.38684
36,(Most KILL),(Least DEATH),0.320754,0.126635,1.772712,1.1954,0.45607
19,(Less KILL),(Least ASSIST),0.334977,0.053328,1.302164,1.091428,0.376677
55,(Medium DEATH),(Least ASSIST),0.33345,0.082332,1.401032,1.086454,0.525318
50,(Medium ASSIST),(Least DEATH),0.294931,0.074769,1.371574,1.099159,0.465332
