In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules

In [13]:
username = 'Mr-Barros'
df = pd.read_csv('../../dados/base/chess_games_chesscom.csv')
df = df[df['player'] == username]

print(f'{username} games: {df.shape}')
print(df.head())

Mr-Barros games: (2149, 27)
                                           url  \
0  https://www.chess.com/game/live/13542785939   
1   https://www.chess.com/game/daily/330957418   
2  https://www.chess.com/game/live/13557082425   
3  https://www.chess.com/game/live/13557721435   
4  https://www.chess.com/game/live/13558396887   

                                                 pgn time_control  \
0  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
1  [Event "Let's Play"]\n[Site "Chess.com"]\n[Dat...    1/1209600   
2  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
3  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
4  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   

              end_time  rated  \
0  2021-04-30 17:44:18   True   
1  2021-04-30 17:50:33   True   
2  2021-04-30 21:37:31   True   
3  2021-04-30 21:52:26   True   
4  2021-04-30 22:04:11   True   

                                          accuracies  \
0

## Falta discretizar winrate_with_opening.

In [14]:
for column in ['time_control', 'time_class', 'rated', 'player_pieces', 'winner', 'win_method', 'opening_eval', 'midgame_eval']:
    print(f'Unique values of {column}: {df[column].unique()}')

df.isna().sum()

Unique values of time_control: ['600' '1/1209600' '1800' '60' '3600' '180' '300' '60+1' '180+2' '7200'
 '1/0']
Unique values of time_class: ['rapid' 'daily' 'bullet' 'blitz']
Unique values of rated: [ True False]
Unique values of player_pieces: ['white' 'black']
Unique values of winner: ['white' 'black' 'draw']
Unique values of win_method: ['checkmated' 'resigned' 'timeout' 'agreed' 'abandoned'
 'timevsinsufficient' 'stalemate' 'threecheck' 'insufficient' 'repetition'
 'bughousepartnerlose']
Unique values of opening_eval: [ 2.  0. -1. -3.  3.  1. -4. -2.  4. nan]
Unique values of midgame_eval: [-4.  0.  3. -1. -3.  1.  4.  2. -2. nan]


url                        0
pgn                        4
time_control               0
end_time                   0
rated                      0
accuracies              2005
tcn                        5
uuid                       0
initial_setup           2145
fen                        0
time_class                 0
rules                      0
eco                        0
start_time              2147
player                     0
player_rating              0
opponent                   0
opponent_rating            0
player_pieces              0
winner                     0
win_method                 0
move_list                  5
move_evals                 5
material_count             8
winrate_with_opening       0
opening_eval               8
midgame_eval               8
dtype: int64

In [15]:
df = df.drop([
            df.columns[0], 
            'url', 
            'accuracies', 
            'tcn', 
            'uuid', 
            'initial_setup', 
            'fen', 
            'start_time', 
            'move_list',
            'move_evals',
            'material_count',
            ], axis=1)

# We only want to analyse normal chess games
df = df[df['rules'] == 'chess']

print(f'Unique value of win_method {df["win_method"].unique()}')

print(df.head())

Unique value of win_method ['checkmated' 'resigned' 'timeout' 'agreed' 'abandoned'
 'timevsinsufficient' 'stalemate' 'insufficient' 'repetition']
                                                 pgn time_control  \
0  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
1  [Event "Let's Play"]\n[Site "Chess.com"]\n[Dat...    1/1209600   
2  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
3  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   
4  [Event "Live Chess"]\n[Site "Chess.com"]\n[Dat...          600   

              end_time  rated time_class  rules             eco     player  \
0  2021-04-30 17:44:18   True      rapid  chess  Englund Gambit  Mr-Barros   
1  2021-04-30 17:50:33   True      daily  chess      Kings Pawn  Mr-Barros   
2  2021-04-30 21:37:31   True      rapid  chess     Center Game  Mr-Barros   
3  2021-04-30 21:52:26   True      rapid  chess     Center Game  Mr-Barros   
4  2021-04-30 22:04:11   True      rapid  chess  

In [16]:
df.isna().sum()

pgn                     0
time_control            0
end_time                0
rated                   0
time_class              0
rules                   0
eco                     0
player                  0
player_rating           0
opponent                0
opponent_rating         0
player_pieces           0
winner                  0
win_method              0
winrate_with_opening    0
opening_eval            4
midgame_eval            4
dtype: int64

In [17]:
# Add the 'result' column
def determine_result(row):
    if row['winner'] == 'draw':
        return 'draw'
    elif row['winner'] == row['player_pieces']:
        return 'win'
    else:
        return 'loss'

df['result'] = df.apply(determine_result, axis=1)

# Display the unique values for the new 'result' column
print(f'Unique values of player_won: {df["result"].unique()}')


Unique values of player_won: ['win' 'loss' 'draw']


In [18]:
# makes it so the value is in reference to the player advantage
df.loc[df['player_pieces'] == 'black', 'opening_eval'] = (-1)*df.loc[df['player_pieces'] == 'black', 'opening_eval']
df.loc[df['player_pieces'] == 'black', 'midgame_eval'] = (-1)*df.loc[df['player_pieces'] == 'black', 'midgame_eval']

In [19]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [20]:
print(df.columns.tolist())

['pgn', 'time_control', 'end_time', 'rated', 'time_class', 'rules', 'eco', 'player', 'player_rating', 'opponent', 'opponent_rating', 'player_pieces', 'winner', 'win_method', 'winrate_with_opening', 'opening_eval', 'midgame_eval', 'result']


In [21]:
df.dtypes

pgn                      object
time_control             object
end_time                 object
rated                      bool
time_class               object
rules                    object
eco                      object
player                   object
player_rating             int64
opponent                 object
opponent_rating           int64
player_pieces            object
winner                   object
win_method               object
winrate_with_opening    float64
opening_eval            float64
midgame_eval            float64
result                   object
dtype: object

In [22]:
numerical_df = df[[
        'player_rating',
        'opponent_rating',
        'opening_eval',
        'midgame_eval',
        'result'
        ]]

numerical_df.corr()

ValueError: could not convert string to float: 'win'

In [None]:

# Create a DataFrame with categorical columns of interest
data = df[['time_class', 'eco', 'player_pieces', 'win_method', 'player_won', 'opening_eval', 'midgame_eval']]

# Convert categorical variables to a one-hot encoded format
data_encoded = pd.get_dummies(data, columns=['time_class', 'eco', 'player_pieces', 'win_method', 'player_won', 'opening_eval', 'midgame_eval'])

# Perform Apriori to find frequent itemsets
frequent_itemsets = apriori(data_encoded, min_support=0.06, use_colnames=True)

frequent_itemsets.head()

In [None]:
# Generate association rules with a minimum confidence threshold
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

rules_sorted = rules.sort_values(by="confidence", ascending=False)

rules_sorted.head()

A regra com a maior confiança, "Se time_class_blitz e win_method_checkmated são utilizados, então player_won_True", apresenta uma confiança de 71%. Isso significa que, aproximadamente 71% das vezes em que o jogador utiliza time_class_blitz e vence por checkmate, ele efetivamente ganha a partida. O lift de 1.40 indica que essa combinação de eventos ocorre em uma probabilidade 40% maior do que se esses fatores ocorressem isoladamente.

In [None]:
#regras com confiança superiore a 55%

rules_55 = rules[rules['confidence'] > 0.55]
rules_55 = rules_55[rules_55['support'] > 0.12]

# rules_55 = rules_55[rules_55['consequents'].apply(lambda x: 'player_won_True' in x)]

rules_55_sorted = rules_55.sort_values(by='confidence', ascending=False)

rules_55_sorted

In [None]:
# Scatter plot using support (x), confidence (y), and lift (color scale)

plt.figure(figsize=(10,6))
scatter = plt.scatter(rules_55['support'], rules_55['confidence'], c=rules_55['lift'], cmap='viridis', s=100, edgecolor='k', alpha=0.7)
plt.title('Grafico regras de associação com confiança acima de 55%', fontsize=15)
plt.xlabel('Suporte', fontsize=12)
plt.ylabel('Confiança', fontsize=12)
colorbar = plt.colorbar(scatter)
colorbar.set_label('Elevação', fontsize=12)
plt.show()

In [None]:
#rules that have player_won_True as a consequence

rules_player_won_True = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.50)

player_won_True_rules = rules_player_won_True[rules_player_won_True['consequents'].apply(lambda x: 'player_won_True' in x)]

player_won_True_rules_sorted = player_won_True_rules.sort_values(by='confidence', ascending=False)

player_won_True_rules_sorted.head()

In [None]:
plt.figure(figsize=(10, 6))
scatter = plt.scatter( player_won_True_rules_sorted['support'], player_won_True_rules_sorted['confidence'], c=player_won_True_rules_sorted['lift'], cmap='viridis', s=100, edgecolor='k', alpha=0.7)
plt.title('Regras de associação jogador ganha', fontsize=15)
plt.xlabel('Suporte', fontsize=12)
plt.ylabel('Confiança', fontsize=12)
colorbar = plt.colorbar(scatter)
colorbar.set_label('Elevação', fontsize=12)
plt.show()

In [None]:
player_won_True_rules_sorted.plot(kind='bar', x='antecedents', y='confidence', figsize=(10,6), color='skyblue')
plt.title("Confidence of player_won_True")
plt.xlabel("Antecedents")
plt.ylabel("Confidence")
plt.xticks(rotation=90)
plt.show()

In [None]:
#rules that have player_won_False as a consequence

rules_player_won_False = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.50)

player_won_False_rules = rules_player_won_False[rules_player_won_False['consequents'].apply(lambda x: 'player_won_False' in x)]

player_won_False_rules_sorted = player_won_False_rules.sort_values(by='confidence', ascending=False)

player_won_False_rules_sorted.head()

In [None]:
plt.figure(figsize=(10, 6))
scatter = plt.scatter( player_won_False_rules_sorted['support'], player_won_False_rules_sorted['confidence'], c=player_won_False_rules_sorted['lift'], cmap='viridis', s=100, edgecolor='k', alpha=0.7)
plt.title('Regras de associação jogador perde', fontsize=15)
plt.xlabel('Suporte', fontsize=12)
plt.ylabel('Confiança', fontsize=12)
colorbar = plt.colorbar(scatter)
colorbar.set_label('Elevação', fontsize=12)
plt.show()

In [None]:
player_won_False_rules_sorted.plot(kind='bar', x='antecedents', y='confidence', figsize=(10,6), color='skyblue')
plt.title("Confidence of player_won_False")
plt.xlabel("Antecedents")
plt.ylabel("Confidence")
plt.xticks(rotation=90)
plt.show()