# Resultados

In [125]:
# pytorch libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# for visualizing the results
import numpy as np
import matplotlib.pyplot as plt

# for reading input data
import pandas as pd

# for parsing the FEN of chess positions
import re

# Measuring time in seconds:
from timeit import default_timer as timer
from datetime import timedelta
import chess

#for plotting graphs
import plotly.express as px

In [126]:
results = [('100k', 10, 10, 0.022416312247514725, 0.02053998038172722),
('100k', 10, 20, 0.02313656359910965, 0.02055313251912594),
('100k', 10, 30, 0.02109093964099884, 0.020554840564727783),
('100k', 20, 10, 0.012965253554284573, 0.012098654173314571),
('100k', 20, 20, 0.013631502166390419, 0.012116038240492344),
('100k', 20, 30, 0.01310361921787262, 0.012120513245463371),
('100k', 30, 10, 0.022319447249174118, 0.0204627588391304),
('100k', 30, 20, 0.02279026061296463, 0.020515641197562218),
('100k', 30, 30, 0.022309035062789917, 0.020543061196804047),
('200k', 10, 10, 0.02984640561044216, 0.03014620952308178),
('200k', 10, 20, 0.029988357797265053, 0.030148625373840332),
('200k', 10, 30, 0.02992214821279049, 0.03014753758907318),
('200k', 20, 10, 0.020010216161608696, 0.020151956006884575),
('200k', 20, 20, 0.020113280043005943, 0.020157787948846817),
('200k', 20, 30, 0.02029838040471077, 0.02015789784491062),
('200k', 30, 10, 0.020582381635904312, 0.020142000168561935),
('200k', 30, 20, 0.020018387585878372, 0.02015572413802147),
('200k', 30, 30, 0.019972316920757294, 0.020156633108854294),
('300k', 10, 10, 0.014279586263000965, 0.013515775091946125),
('300k', 10, 20, 0.012130709365010262, 0.013517911545932293),
('300k', 10, 30, 0.013071983121335506, 0.013520436361432076),
('300k', 20, 10, 0.009385841898620129, 0.010141642764210701),
('300k', 20, 20, 0.008993926458060741, 0.010144157335162163),
('300k', 20, 30, 0.00976440031081438, 0.010143240913748741),
('300k', 30, 10, 0.010881094262003899, 0.0111979516223073),
('300k', 30, 20, 0.010787458159029484, 0.011198915541172028),
('300k', 30, 30, 0.010907788760960102, 0.011199025437235832),
]

In [127]:
values = []
for v in results:
    values.append([v[0], v[1], v[2], v[3], "AdamW"])
    values.append([v[0], v[1], v[2], v[4], "SGD"])

values = pd.DataFrame(values, columns=["Data", "Batch Size", "Epochs", "MSE", "Otimizer"])

In [128]:
fig1 =  px.line(values, x='Epochs', y='MSE', color='Otimizer', markers=True, facet_col="Batch Size", facet_row="Data", title="Comparação dos Resultados")
# fig1 =  px.line(values, x='Batch Size', y='MSE', color='Otimizer', markers=True, facet_col="Epochs", facet_row="Data", title="Comparação dos Resultados")

fig1.show()


## Média dos erros médios por conjuntos de dados

In [130]:
adamW_errors = [0,0,0]
sgd_errors = [0,0,0]
for i in values.values:
    if i[4] == 'AdamW':
        if (i[0] == '100k'):
            adamW_errors[0] += i[3]
        if (i[0] == '200k'):
            adamW_errors[1] += i[3]
        if (i[0] == '300k'):
            adamW_errors[2] += i[3]
    if i[4] == 'SGD':
        if (i[0] == '100k'):
            sgd_errors[0] += i[3]
        if (i[0] == '200k'):
            sgd_errors[1] += i[3]
        if (i[0] == '300k'):
            sgd_errors[2] += i[3]
            
adamW_errors[0] = adamW_errors[0]/9 
adamW_errors[1] = adamW_errors[1]/9 
adamW_errors[2] = adamW_errors[2]/9 

sgd_errors[0] = sgd_errors[0]/9 
sgd_errors[1] = sgd_errors[1]/9 
sgd_errors[2] = sgd_errors[2]/9 

df_errors = pd.DataFrame([adamW_errors, sgd_errors], columns=['100k', '200k', '300k'], index=['AdamW', 'SGD']).T
df_errors
fig2 = px.line(df_errors, markers=True, title='Média do MSEs por conjuntos de dados')
fig2.show()


## Conclusões

- A performance dos dois otimizadores está bem paredida;
- Média do MSEs é menor para SDG com 100k de dados;
- Média do MSEs é menor para AdamW com 300k de dados;

# Referências

1. https://www.kaggle.com/ronakbadhe/chess-evaluation-prediction
2. https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation
3. http://starship-knowledge.com/wp-content/uploads/2020/10/Perceptrons-1024x724.jpeg

# Contribution

* Representation of castling rights, en passant, active color, halfmoves and fullmoves in an 8x8 grid
* Neural Network Architecture
* Testing AdamW vs SVG