In [93]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules

In [94]:
username = 'Mr-Barros'
df = pd.read_csv('../../dados/base/chess_games_chesscom.csv')
df = df[df['player'] == username]

print(f'{username} games: {df.shape}')
print(df.head())

Mr-Barros games: (2136, 35)
                                           url  \
0  https://www.chess.com/game/live/13542785939   
1   https://www.chess.com/game/daily/330957418   
2  https://www.chess.com/game/live/13557082425   
3  https://www.chess.com/game/live/13557721435   
4  https://www.chess.com/game/live/13558396887   

                                                 pgn time_control  \
0  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
1  [Event "Let's Play"]\n[Site "Chess.com"]\n[Dat...    1/1209600   
2  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
3  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
4  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   

              end_time rated  \
0  2021-04-30 17:44:18  True   
1  2021-04-30 17:50:33  True   
2  2021-04-30 21:37:31  True   
3  2021-04-30 21:52:26  True   
4  2021-04-30 22:04:11  True   

                                          accuracies  \
0      

## Falta discretizar winrate_with_opening.

In [95]:
for column in ['time_control', 'time_class', 'rated', 'player_pieces', 'winner', 'win_method', 'opening_eval', 'midgame_eval']:
    print(f'Unique values of {column}: {df[column].unique()}')

df.isna().sum()

Unique values of time_control: ['600' '1/1209600' '1800' '60' '3600' '180' '300' '60+1' '180+2' '7200'
 '1/0']
Unique values of time_class: ['rapid' 'daily' 'bullet' 'blitz']
Unique values of rated: [True False]
Unique values of player_pieces: ['white' 'black']
Unique values of winner: ['white' 'black' 'draw']
Unique values of win_method: ['checkmated' 'resigned' 'timeout' 'agreed' 'abandoned'
 'timevsinsufficient' 'stalemate' 'insufficient' 'repetition']
Unique values of opening_eval: [ 2.  0. -1. -3.  3.  1. -4. -2.  4.]
Unique values of midgame_eval: [-4.  0.  3. -1. -3.  1.  4.  2. -2.]


url                        0
pgn                        0
time_control               0
end_time                   0
rated                      0
accuracies              1992
tcn                        0
uuid                       0
initial_setup           2135
fen                        0
time_class                 0
rules                      0
eco                        0
start_time              2134
player                     0
player_rating              0
opponent                   0
opponent_rating            0
player_pieces              0
winner                     0
win_method                 0
move_list                  0
move_evals                 0
material_count             0
winrate_with_opening       0
opening_eval               0
midgame_eval               0
Brilliant                  0
Best                       0
Excellent                  0
Good                       0
Inaccuracy                 0
Mistake                    0
Blunder                    0
Missed Mate   

In [96]:
df = df.drop([
            df.columns[0], 
            'url', 
            'accuracies', 
            'tcn', 
            'uuid', 
            'initial_setup', 
            'fen', 
            'start_time', 
            'move_list',
            'move_evals',
            'material_count',
            ], axis=1)

# We only want to analyse normal chess games
df = df[df['rules'] == 'chess']

print(f'Unique value of win_method {df["win_method"].unique()}')

print(df.head())

Unique value of win_method ['checkmated' 'resigned' 'timeout' 'agreed' 'abandoned'
 'timevsinsufficient' 'stalemate' 'insufficient' 'repetition']
                                                 pgn time_control  \
0  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
1  [Event "Let's Play"]\n[Site "Chess.com"]\n[Dat...    1/1209600   
2  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
3  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
4  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   

              end_time rated time_class  rules             eco     player  \
0  2021-04-30 17:44:18  True      rapid  chess  Englund Gambit  Mr-Barros   
1  2021-04-30 17:50:33  True      daily  chess      Kings Pawn  Mr-Barros   
2  2021-04-30 21:37:31  True      rapid  chess     Center Game  Mr-Barros   
3  2021-04-30 21:52:26  True      rapid  chess     Center Game  Mr-Barros   
4  2021-04-30 22:04:11  True      rapid  chess     Que

In [97]:
df.isna().sum()

pgn                     0
time_control            0
end_time                0
rated                   0
time_class              0
rules                   0
eco                     0
player                  0
player_rating           0
opponent                0
opponent_rating         0
player_pieces           0
winner                  0
win_method              0
winrate_with_opening    0
opening_eval            0
midgame_eval            0
Brilliant               0
Best                    0
Excellent               0
Good                    0
Inaccuracy              0
Mistake                 0
Blunder                 0
Missed Mate             0
dtype: int64

In [98]:
# Add the 'player_won' column
df['player_won'] = df['player_pieces'] == df['winner']

# Display the unique values for the new 'player_won' column
print(f'Unique values of player_won: {df["player_won"].unique()}')


Unique values of player_won: [ True False]


In [99]:
# makes it so the value is in reference to the player advantage
df.loc[df['player_pieces'] == 'black', 'opening_eval'] = (-1)*df.loc[df['player_pieces'] == 'black', 'opening_eval']
df.loc[df['player_pieces'] == 'black', 'midgame_eval'] = (-1)*df.loc[df['player_pieces'] == 'black', 'midgame_eval']

In [100]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [101]:
print(df.columns.tolist())

['pgn', 'time_control', 'end_time', 'rated', 'time_class', 'rules', 'eco', 'player', 'player_rating', 'opponent', 'opponent_rating', 'player_pieces', 'winner', 'win_method', 'winrate_with_opening', 'opening_eval', 'midgame_eval', 'Brilliant', 'Best', 'Excellent', 'Good', 'Inaccuracy', 'Mistake', 'Blunder', 'Missed Mate', 'player_won']


In [102]:
df.dtypes

pgn                      object
time_control             object
end_time                 object
rated                    object
time_class               object
rules                    object
eco                      object
player                   object
player_rating           float64
opponent                 object
opponent_rating         float64
player_pieces            object
winner                   object
win_method               object
winrate_with_opening     object
opening_eval            float64
midgame_eval            float64
Brilliant               float64
Best                    float64
Excellent               float64
Good                    float64
Inaccuracy              float64
Mistake                 float64
Blunder                 float64
Missed Mate             float64
player_won                 bool
dtype: object

In [103]:
numerical_df = df[[
        'player_rating',
        'opponent_rating',
        'opening_eval',
        'midgame_eval',
        'player_won'
        ]]

numerical_df.corr()

Unnamed: 0,player_rating,opponent_rating,opening_eval,midgame_eval,player_won
player_rating,1.0,0.882727,-0.009048,-0.015175,0.015048
opponent_rating,0.882727,1.0,-0.065055,-0.073345,-0.107266
opening_eval,-0.009048,-0.065055,1.0,0.362238,0.364575
midgame_eval,-0.015175,-0.073345,0.362238,1.0,0.488906
player_won,0.015048,-0.107266,0.364575,0.488906,1.0


In [104]:

# Create a DataFrame with categorical columns of interest
data = df[['time_class', 'eco', 'player_pieces', 'player_won', 'opening_eval', 'midgame_eval','Missed Mate','Blunder','win_method']]

# Convert categorical variables to a one-hot encoded format
data_encoded = pd.get_dummies(data, columns=['time_class', 'eco', 'player_pieces', 'player_won', 'opening_eval', 'midgame_eval','Missed Mate','Blunder','win_method'])

# Perform Apriori to find frequent itemsets
frequent_itemsets = apriori(data_encoded, min_support=0.06, use_colnames=True)

n_items = frequent_itemsets.count()
frequent_itemsets.head()

Unnamed: 0,support,itemsets
0,0.544007,(time_class_blitz)
1,0.404963,(time_class_rapid)
2,0.090824,(eco_Englund Gambit)
3,0.070225,(eco_Italian Game)
4,0.08427,(eco_Kings Pawn)


In [162]:
# Generate association rules with a minimum confidence threshold

conf_min = 0.5

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=conf_min, num_itemsets=n_items)

rules_filtered = rules.sort_values(by="confidence", ascending=False)

display(rules_filtered)

#bad rules 😡

#type_of_victory -> result
#eco <-> color_piece
#Missed_Mate 0.0 <-> {}

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
997,"(midgame_eval_-3.0, win_method_resigned, Misse...",(player_won_False),0.063202,0.494382,0.063202,1.0,2.022727,1.0,0.031956,inf,0.539730,0.127841,1.000000,0.563920
790,"(win_method_checkmated, midgame_eval_-4.0, Mis...",(player_pieces_white),0.080524,0.501873,0.080524,1.0,1.992537,1.0,0.040111,inf,0.541752,0.160448,1.000000,0.580224
829,"(midgame_eval_-3.0, win_method_resigned, Blund...",(player_won_False),0.063670,0.494382,0.063670,1.0,2.022727,1.0,0.032193,inf,0.540000,0.128788,1.000000,0.564394
753,"(win_method_checkmated, midgame_eval_-4.0, pla...",(player_pieces_white),0.073034,0.501873,0.073034,1.0,1.992537,1.0,0.036380,inf,0.537374,0.145522,1.000000,0.572761
857,"(win_method_checkmated, Blunder_0.0, Missed Ma...",(player_won_True),0.063670,0.505618,0.063670,1.0,1.977778,1.0,0.031478,inf,0.528000,0.125926,1.000000,0.562963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
928,"(win_method_resigned, Missed Mate_0.0, Blunder...",(time_class_blitz),0.126404,0.544007,0.063202,0.5,0.919105,1.0,-0.005563,0.911985,-0.091529,0.104086,-0.096509,0.308090
618,"(Missed Mate_0.0, player_won_True, time_class_...",(player_pieces_black),0.168539,0.498127,0.084270,0.5,1.003759,1.0,0.000316,1.003745,0.004505,0.144695,0.003731,0.334586
325,"(win_method_checkmated, player_won_True)",(player_pieces_white),0.182584,0.501873,0.091292,0.5,0.996269,1.0,-0.000342,0.996255,-0.004561,0.153907,-0.003759,0.340951
630,"(Missed Mate_0.0, player_won_True, time_class_...",(player_pieces_white),0.168539,0.501873,0.084270,0.5,0.996269,1.0,-0.000316,0.996255,-0.004484,0.143770,-0.003759,0.333955


In [160]:
rules_filtered1 = rules[rules['antecedents'].apply(lambda x: any(item.startswith('eco') for item in x)) & rules['consequents'].apply(lambda x: any(item.startswith('player_pieces') for item in x))]

rules_filtered2 = rules[rules['antecedents'].apply(lambda x: any(item.startswith('player_pieces') for item in x)) & rules['consequents'].apply(lambda x: any(item.startswith('eco') for item in x))]

rules_filtered3 = rules[rules['antecedents'].apply(lambda x: 'Missed Mate_0.0' in x) | rules['consequents'].apply(lambda x: 'Missed Mate_0.0' in x)]

rules_filtered4 = rules[rules['consequents'].apply(lambda x: any(item.startswith('player_won') for item in x)) & rules['antecedents'].apply(lambda x: any(item.startswith('win_method') for item in x))]

rules_filtered5 = rules[rules['antecedents'].apply(lambda x: any(item.startswith('player_won') for item in x)) & rules['consequents'].apply(lambda x: any(item.startswith('win_method') for item in x))]

rules_filtered_indexes = pd.concat([
    rules_filtered1,
    rules_filtered2,
    rules_filtered3,
    rules_filtered4,
    rules_filtered5
]).index

rules_filtered = rules.drop(rules_filtered_indexes)
rules_filtered

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(player_pieces_black),(time_class_blitz),0.498127,0.544007,0.266386,0.534774,0.983028,1.0,-0.004599,0.980154,-0.033258,0.343392,-0.020248,0.512224
1,(player_pieces_white),(time_class_blitz),0.501873,0.544007,0.277622,0.553172,1.016846,1.0,0.004599,1.020509,0.033258,0.361365,0.020097,0.531749
2,(time_class_blitz),(player_pieces_white),0.544007,0.501873,0.277622,0.510327,1.016846,1.0,0.004599,1.017265,0.036331,0.361365,0.016972,0.531749
3,(player_won_False),(time_class_blitz),0.494382,0.544007,0.274813,0.555871,1.021808,1.0,0.005865,1.026712,0.042211,0.359902,0.026017,0.530517
4,(time_class_blitz),(player_won_False),0.544007,0.494382,0.274813,0.505164,1.021808,1.0,0.005865,1.021788,0.046805,0.359902,0.021323,0.530517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
827,"(player_won_False, win_method_resigned, Blunde...",(midgame_eval_-3.0),0.105805,0.180243,0.063670,0.601770,3.338651,1.0,0.044600,2.058500,0.783362,0.286316,0.514209,0.477508
830,"(midgame_eval_-3.0, Blunder_1.0)","(player_won_False, win_method_resigned)",0.068352,0.310393,0.063670,0.931507,3.001054,1.0,0.042454,10.068258,0.715704,0.202080,0.900678,0.568318
852,"(win_method_resigned, midgame_eval_3.0, player...",(Blunder_0.0),0.154494,0.345506,0.094569,0.612121,1.771668,1.0,0.041191,1.687368,0.515148,0.233256,0.407361,0.442917
853,"(win_method_resigned, Blunder_0.0, player_won_...",(midgame_eval_3.0),0.137172,0.226592,0.094569,0.689420,3.042563,1.0,0.063487,2.490205,0.778058,0.351304,0.598427,0.553388


In [169]:
#rules that have player_won_True as a consequence
rules_player_won = rules


rules_player_won = rules_player_won.drop(rules_filtered3.index)

rules_player_won = rules[rules['consequents'].apply(lambda x: all('player_won_True' in item for item in x) or all('player_won_False' in item for item in x)) & rules['antecedents'].apply(lambda x: all(not item.startswith('win_method') for item in x))]

rules_player_won

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
4,(time_class_blitz),(player_won_False),0.544007,0.494382,0.274813,0.505164,1.021808,1.0,0.005865,1.021788,0.046805,0.359902,0.021323,0.530517
26,(time_class_rapid),(player_won_True),0.404963,0.505618,0.208801,0.515607,1.019756,1.0,0.004045,1.020622,0.032558,0.297532,0.020205,0.464285
35,(player_pieces_black),(player_won_False),0.498127,0.494382,0.251404,0.504699,1.020869,1.0,0.005139,1.020830,0.040732,0.339229,0.020405,0.506611
47,(player_pieces_white),(player_won_True),0.501873,0.505618,0.258895,0.515858,1.020253,1.0,0.005139,1.021151,0.039851,0.345841,0.020713,0.513948
60,(opening_eval_-3.0),(player_won_False),0.073034,0.494382,0.061798,0.846154,1.711538,1.0,0.025691,3.286517,0.448485,0.122222,0.695726,0.485577
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,"(midgame_eval_3.0, Blunder_0.0, player_pieces_...",(player_won_True),0.066011,0.505618,0.061330,0.929078,1.837510,1.0,0.027953,6.970787,0.487999,0.120183,0.856544,0.525187
776,"(Missed Mate_0.0, Blunder_0.0, player_pieces_w...",(player_won_True),0.158240,0.505618,0.115637,0.730769,1.445299,1.0,0.035628,1.836276,0.366021,0.210931,0.455420,0.479736
817,"(midgame_eval_-3.0, Missed Mate_0.0, Blunder_1.0)",(player_won_False),0.067884,0.494382,0.066479,0.979310,1.980878,1.0,0.032919,24.438202,0.531236,0.134089,0.959080,0.556890
836,"(midgame_eval_3.0, Blunder_0.0, Missed Mate_0.0)",(player_won_True),0.108146,0.505618,0.103933,0.961039,1.900722,1.0,0.049252,12.689139,0.531347,0.203857,0.921192,0.583297


In [172]:
#T-LIFT

T_lift = rules_filtered["lift"].sum()

#T-LIFT_Result

T_lift_R = rules_player_won["lift"].sum()

print(f"T-Lift: {T_lift}; T-Lift_Result {T_lift_R}")

T-Lift: 1570.9419716075558; T-Lift_Result 135.74874264869638
