In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
username = 'Mr-Barros'
df = pd.read_csv('../../dados/base/chess_games_chesscom.csv')
df = df[df['player'] == username]

print(f'{username} games: {df.shape}')
print(df.head())

FileNotFoundError: [Errno 2] No such file or directory: '../../dados/base/chess_games_chesscom.csv'

## Falta discretizar winrate_with_opening.

In [None]:
for column in ['time_control', 'time_class', 'rated', 'player_pieces', 'winner', 'win_method', 'opening_eval', 'midgame_eval']:
    print(f'Unique values of {column}: {df[column].unique()}')

df.isna().sum()

In [None]:
df = df.drop([
            df.columns[0], 
            'url', 
            'accuracies', 
            'tcn', 
            'uuid', 
            'initial_setup', 
            'fen', 
            'start_time', 
            'move_list',
            'move_evals',
            'material_count',
            ], axis=1)

# We only want to analyse normal chess games
df = df[df['rules'] == 'chess']

print(f'Unique value of win_method {df["win_method"].unique()}')

print(df.head())

In [None]:
df.isna().sum()

In [None]:
# Add the 'player_won' column
df['player_won'] = df['player_pieces'] == df['winner']

# Display the unique values for the new 'player_won' column
print(f'Unique values of player_won: {df["player_won"].unique()}')


In [None]:
# makes it so the value is in reference to the player advantage
df.loc[df['player_pieces'] == 'black', 'opening_eval'] = (-1)*df.loc[df['player_pieces'] == 'black', 'opening_eval']
df.loc[df['player_pieces'] == 'black', 'midgame_eval'] = (-1)*df.loc[df['player_pieces'] == 'black', 'midgame_eval']

In [None]:
print(type(df))

In [None]:
print(df.columns.tolist())

In [None]:
df.dtypes

In [None]:
numerical_df = df[[
        'player_rating',
        'opponent_rating',
        'opening_eval',
        'midgame_eval',
        'player_won'
        ]]

numerical_df.corr()

In [None]:

# Create a DataFrame with categorical columns of interest
data = df[['time_class', 'eco', 'player_pieces', 'win_method', 'player_won', 'opening_eval', 'midgame_eval']]

# Convert categorical variables to a one-hot encoded format
data_encoded = pd.get_dummies(data, columns=['time_class', 'eco', 'player_pieces', 'win_method', 'player_won', 'opening_eval', 'midgame_eval'])

# Perform Apriori to find frequent itemsets
frequent_itemsets = apriori(data_encoded, min_support=0.06, use_colnames=True)

frequent_itemsets.head()

In [None]:
# Generate association rules with a minimum confidence threshold
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

rules_sorted = rules.sort_values(by="confidence", ascending=False)

rules_sorted.head()

A regra com a maior confiança, "Se time_class_blitz e win_method_checkmated são utilizados, então player_won_True", apresenta uma confiança de 71%. Isso significa que, aproximadamente 71% das vezes em que o jogador utiliza time_class_blitz e vence por checkmate, ele efetivamente ganha a partida. O lift de 1.40 indica que essa combinação de eventos ocorre em uma probabilidade 40% maior do que se esses fatores ocorressem isoladamente.

In [None]:
#regras com confiança superiore a 55%

rules_55 = rules[rules['confidence'] > 0.55]
rules_55 = rules_55[rules_55['support'] > 0.12]

# rules_55 = rules_55[rules_55['consequents'].apply(lambda x: 'player_won_True' in x)]

rules_55_sorted = rules_55.sort_values(by='confidence', ascending=False)

rules_55_sorted

In [None]:
# Scatter plot using support (x), confidence (y), and lift (color scale)

plt.figure(figsize=(10,6))
scatter = plt.scatter(rules_55['support'], rules_55['confidence'], c=rules_55['lift'], cmap='viridis', s=100, edgecolor='k', alpha=0.7)
plt.title('Grafico regras de associação com confiança acima de 55%', fontsize=15)
plt.xlabel('Suporte', fontsize=12)
plt.ylabel('Confiança', fontsize=12)
colorbar = plt.colorbar(scatter)
colorbar.set_label('Elevação', fontsize=12)
plt.show()

In [None]:
#rules that have player_won_True as a consequence

rules_player_won_True = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.50)

player_won_True_rules = rules_player_won_True[rules_player_won_True['consequents'].apply(lambda x: 'player_won_True' in x)]

player_won_True_rules_sorted = player_won_True_rules.sort_values(by='confidence', ascending=False)

player_won_True_rules_sorted.head()

In [None]:
plt.figure(figsize=(10, 6))
scatter = plt.scatter( player_won_True_rules_sorted['support'], player_won_True_rules_sorted['confidence'], c=player_won_True_rules_sorted['lift'], cmap='viridis', s=100, edgecolor='k', alpha=0.7)
plt.title('Regras de associação jogador ganha', fontsize=15)
plt.xlabel('Suporte', fontsize=12)
plt.ylabel('Confiança', fontsize=12)
colorbar = plt.colorbar(scatter)
colorbar.set_label('Elevação', fontsize=12)
plt.show()

In [None]:
player_won_True_rules_sorted.plot(kind='bar', x='antecedents', y='confidence', figsize=(10,6), color='skyblue')
plt.title("Confidence of player_won_True")
plt.xlabel("Antecedents")
plt.ylabel("Confidence")
plt.xticks(rotation=90)
plt.show()

In [None]:
#rules that have player_won_False as a consequence

rules_player_won_False = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.50)

player_won_False_rules = rules_player_won_False[rules_player_won_False['consequents'].apply(lambda x: 'player_won_False' in x)]

player_won_False_rules_sorted = player_won_False_rules.sort_values(by='confidence', ascending=False)

player_won_False_rules_sorted.head()

In [None]:
plt.figure(figsize=(10, 6))
scatter = plt.scatter( player_won_False_rules_sorted['support'], player_won_False_rules_sorted['confidence'], c=player_won_False_rules_sorted['lift'], cmap='viridis', s=100, edgecolor='k', alpha=0.7)
plt.title('Regras de associação jogador perde', fontsize=15)
plt.xlabel('Suporte', fontsize=12)
plt.ylabel('Confiança', fontsize=12)
colorbar = plt.colorbar(scatter)
colorbar.set_label('Elevação', fontsize=12)
plt.show()

In [None]:
player_won_False_rules_sorted.plot(kind='bar', x='antecedents', y='confidence', figsize=(10,6), color='skyblue')
plt.title("Confidence of player_won_False")
plt.xlabel("Antecedents")
plt.ylabel("Confidence")
plt.xticks(rotation=90)
plt.show()