In [None]:
import torch
import torch.nn as nn 
import torch.optim as optim 
import numpy as np
import json
from torch.utils.data import DataLoader, random_split
from data_manager import load_data, get_data_shapes
%load_ext autoreload
%autoreload 2

In [None]:
X, y, num_games = load_data()

In [None]:
print("Ilość gier:", num_games)

In [None]:
get_data_shapes(X,y)

In [None]:
from dataset import ChessDataset
from model import Model

unique_labels, y_mapped = np.unique(y, return_inverse=True)

int_to_move = {i: int(label) for i, label in enumerate(unique_labels)}

with open("../../models/policy_network/move_mapping.json", "w") as f:
    json.dump(int_to_move, f)

In [None]:
dataset = ChessDataset(X, y_mapped)

train_size = int(0.8 * len(dataset))

val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

torch.manual_seed(42)

model = Model(num_of_indexes=len(unique_labels)).to(device)
model

In [None]:
import csv

optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, factor=0.5, patience=5
)

num_epochs = 100
model_name = "cn2_bn2_rlrop"
model_type = "Default + 1x Conv2d(128,256) 3x BatchNorm2d(64) and 128, ReduceLROnPlateau"

description = f"Epochs: {num_epochs}, Lr:{optimizer.param_groups[0]['lr']}, Model: {model_type}"

with open(f"../../charts/policy_network/{model_name}.csv", "w", newline='') as csvfile:
    csvfile.write(f"# {description}\n")
    writer = csv.writer(csvfile)
    writer.writerow(["epoch", "train_loss", "val_loss"])

    for epoch in range(num_epochs):
        # ----- Trening -----
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            running_loss += loss.item()
        avg_train_loss = running_loss / len(train_loader)

        # ----- Walidacja -----
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        avg_val_loss = val_loss / len(val_loader)

        # ----- Scheduler -----
        scheduler.step(avg_val_loss)

        # ----- Logi -----
        writer.writerow([epoch + 1, avg_train_loss, avg_val_loss])
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")


<h3>Defualt:</h3>

```python
class Model(nn.Module):
    def __init__(self, num_of_indexes):
        super().__init__()
        self.conv1 = nn.Conv2d(13, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(8 * 8 * 128, 256)
        self.fc2 = nn.Linear(256, num_of_indexes)
        self.relu = nn.ReLU()

        nn.init.kaiming_uniform_(self.conv1.weight, nonlinearity='relu')
        nn.init.kaiming_uniform_(self.conv2.weight, nonlinearity='relu')
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        

<table>
  <tr>
    <th style="text-align:center">Model</th>
    <th>Epochs</th>
    <th>Learning rate</th>
    <th>Loss</th>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">Divergence</td>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">0.3485</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">0.4564</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">0.2547</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 3x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">0.0001</td>
    <td align="center">0.2142</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 3x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">0.0002</td>
    <td align="center">0.2094</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 3x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">RLROP</td>
    <td align="center">0.1352</td>
  </tr>
</table>



In [None]:
torch.save(model.state_dict(), "../../models/policy_network/{}.pth".format(model_name.upper()))

<table>
  <tr>
    <th style="text-align:center">Model</th>
    <th>Epochs</th>
    <th>Learning rate</th>
    <th>Graph</th>
    <th>Model</th>
    <th>Loss</th>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">X</td>
    <td align="center">X</td>
    <td align="center">Divergence</td>
  </tr>
  <tr>
    <td style="text-align:left">Default</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">default</td>
    <td align="center">DEFAULT</td>
    <td align="center">0.3485</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.001</td>
    <td align="center">bn2_hlr</td>
    <td align="center">BN2_HLR</td>
    <td align="center">0.4564</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 2x BatchNorm2d(64) and 128</td>
    <td align="center">150</td>
    <td align="center">0.0001</td>
    <td align="center">bn2</td>
    <td align="center">BN2</td>
    <td align="center">0.2547</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 2x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">0.0001</td>
    <td align="center">cn2_bn2</td>
    <td align="center">CN2_BN2</td>
    <td align="center">0.2142</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 3x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">0.0002</td>
    <td align="center">cn2_bn2_lr2</td>
    <td align="center">CN2_BN2_LR2</td>
    <td align="center">0.2094</td>
  </tr>
  <tr>
    <td style="text-align:left">Default + 1x Conv2d(128,256) 3x BatchNorm2d(64) and 128</td>
    <td align="center">100</td>
    <td align="center">RLROP</td>
    <td align="center">cn2_bn2_rlrop</td>
    <td align="center">CN2_BN2_RLROP</td>
    <td align="center">0.1352</td>
  </tr>
</table>

