In [1]:
import torch
import torch.nn as nn 
import torch.optim as optim 
import numpy as np
import json
from torch.utils.data import DataLoader 
from data_manager import load_data, get_data_shapes, get_num_indexes
%load_ext autoreload
%autoreload 2

In [2]:
X, y, num_games = load_data()

Przetwarzanie plików PGN:   0%|          | 0/24 [00:00<?, ?it/s]

In [3]:
print("Ilość gier:", num_games)

Ilość gier: 10327


In [4]:
get_data_shapes(X,y)

{'X_shape': (848412, 8, 8, 13),
 'y_shape': (848412,),
 'X_dtype': dtype('float32'),
 'y_dtype': dtype('int64'),
 'X_min': np.float32(0.0),
 'X_max': np.float32(1.0),
 'y_min': np.int64(1),
 'y_max': np.int64(4094),
 'y_unique': 1792}

In [5]:
from dataset import ChessDataset
from model import Model

unique_labels, y_mapped = np.unique(y, return_inverse=True)

int_to_move = {i: int(label) for i, label in enumerate(unique_labels)}

with open("../models/move_mapping.json", "w") as f:
    json.dump(int_to_move, f)


In [6]:
dataset = ChessDataset(X, y_mapped)

loader = DataLoader(dataset, batch_size=64, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

torch.manual_seed(42)

model = Model(num_of_indexes=len(unique_labels)).to(device)
model

Using device: cuda


Model(
  (conv1): Conv2d(13, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=16384, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=1792, bias=True)
  (relu): ReLU()
)

In [7]:
import csv

optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

num_epochs = 150
model_type = "With BatchNorm2d(64) and 128"

description = f"Epochs: {num_epochs}, Lr:{optimizer.param_groups[0]['lr']}, Model: {model_type}"

with open("../charts/cn2_bn2.csv", "w", newline='') as csvfile:
    csvfile.write(f"# {description}\n")
    writer = csv.writer(csvfile)
    writer.writerow(["epoch", "loss"])

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Przewidywanie
            outputs = model(inputs)

            # Obliczanie straty
            loss = criterion(outputs, labels)
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # Aktualizacja wag
            optimizer.step()
            # Akumulacja strat
            running_loss += loss.item()
        
        # Obliczanie średniej straty dla epoki
        avg_loss = running_loss / len(loader)
        # Dodawanie do historii strat
        writer.writerow([epoch + 1, avg_loss])
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}')

Epoch 1/150, Loss: 3.7749
Epoch 2/150, Loss: 2.6510
Epoch 3/150, Loss: 2.1884
Epoch 4/150, Loss: 1.8301
Epoch 5/150, Loss: 1.5282
Epoch 6/150, Loss: 1.2755
Epoch 7/150, Loss: 1.0689
Epoch 8/150, Loss: 0.9042
Epoch 9/150, Loss: 0.7813
Epoch 10/150, Loss: 0.6873
Epoch 11/150, Loss: 0.6202
Epoch 12/150, Loss: 0.5704
Epoch 13/150, Loss: 0.5348
Epoch 14/150, Loss: 0.5067
Epoch 15/150, Loss: 0.4840
Epoch 16/150, Loss: 0.4649
Epoch 17/150, Loss: 0.4482
Epoch 18/150, Loss: 0.4350
Epoch 19/150, Loss: 0.4231
Epoch 20/150, Loss: 0.4128
Epoch 21/150, Loss: 0.4031
Epoch 22/150, Loss: 0.3940
Epoch 23/150, Loss: 0.3863
Epoch 24/150, Loss: 0.3806
Epoch 25/150, Loss: 0.3732
Epoch 26/150, Loss: 0.3668
Epoch 27/150, Loss: 0.3602
Epoch 28/150, Loss: 0.3565
Epoch 29/150, Loss: 0.3493
Epoch 30/150, Loss: 0.3452
Epoch 31/150, Loss: 0.3404
Epoch 32/150, Loss: 0.3365
Epoch 33/150, Loss: 0.3315
Epoch 34/150, Loss: 0.3271
Epoch 35/150, Loss: 0.3229
Epoch 36/150, Loss: 0.3207
Epoch 37/150, Loss: 0.3156
Epoch 38/1

KeyboardInterrupt: 

<h3>Defualt:</h3>

```python
class Model(nn.Module):
    def __init__(self, num_of_indexes):
        super().__init__()
        self.conv1 = nn.Conv2d(13, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(8 * 8 * 128, 256)
        self.fc2 = nn.Linear(256, num_of_indexes)
        self.relu = nn.ReLU()

        nn.init.kaiming_uniform_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_uniform_(self.conv2.weight, nonlinearity='relu')
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        

<table>
  <tr>
    <th style="text-align:center">Model</th>
    <th>Epochs</th>
    <th>Learning rate</th>
    <th>Loss</th>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">Divergence</td>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">0.3485</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">0.4564</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">0.2547</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 3x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">0.0001</td>
    <td align="center">0.2142</td>
  </tr>
</table>



In [8]:
torch.save(model.state_dict(), "../models/CN2_BN2.pth")

<table>
  <tr>
    <th style="text-align:center">Model</th>
    <th>Epochs</th>
    <th>Learning rate</th>
    <th>Graph</th>
    <th>Model</th>
    <th>Loss</th>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">X</td>
    <td align="center">X</td>
    <td align="center">Divergence</td>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">default</td>
    <td align="center">DEFAULT</td>
    <td align="center">0.3485</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">bn2_hlr</td>
    <td align="center">BN2_HLR</td>
    <td align="center">0.4564</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">bn2</td>
    <td align="center">BN2</td>
    <td align="center">0.2547</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 2x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">0.0001</td>
    <td align="center">cn2_bn2</td>
    <td align="center">CN2_BN2</td>
    <td align="center">0.2142</td>
  </tr>
</table>

