In [58]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import importlib
import training as training
importlib.reload(training);


In [59]:
#
# Hyper parameter
#
gamma = 0.9 # Q-learning's discount factor, should probably stay constant at 0.9
learning_rate = 0.1 # initial learning rate
actual_learning_rate = 0.1 # used to reset optimizers from file
epsilon = 0.5
validation_games = 5000
omega = 1 # the percentage of opponent random moves during validation


In [42]:
class Connect4CnnV1(nn.Module):
    def __init__(self):
        super(Connect4CnnV1, self).__init__()

        self.feature_count = 128
        self.feature_size = self.feature_count * 6 * 7

        self.conv1 = nn.Conv2d(3, 64,  kernel_size=3, padding=1)    
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, self.feature_count, kernel_size=3, padding=1)
        self.out = nn.Linear(self.feature_size, 7)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = x.view(-1, self.feature_size)
        x = self.out(x)
        return x


260871

In [60]:
class Connect4Cnn(nn.Module):
    def __init__(self):
        super(Connect4Cnn, self).__init__()

        self.feature_count = 128
        self.feature_size = self.feature_count * 6 * 7

        self.conv1 = nn.Conv2d(3, 64,  kernel_size=3, padding=1)    
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout2d(p = 0.25)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout2d(p = 0.25)
        self.out = nn.Linear(self.feature_size, 7)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = x.view(-1, self.feature_size)
        x = self.out(x)
        return x


In [61]:

#
# Create the model and optimizer
# 
games = 0
model = Connect4Cnn()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
sum(p.numel() for p in model.parameters() if p.requires_grad)

113287

In [40]:
#
# Load model from checkpoint
#
games = 300000
training.loadCheckpoint(model, optimizer, f'connect4-{games}', actual_learning_rate)

In [324]:
for g in optimizer.param_groups:
    g['lr'] = actual_learning_rate

In [None]:
#
# TRAINING
#
gamesToGo = 300000
training.train(model, optimizer, gamesToGo, epsilon, omega, gameOffset = games, gamma = gamma)
games += gamesToGo

### Connect4 CNN training 
#### Layers: C64-RL-D0.25-C128-RL-D0.25
optimizer: SGD initial lr: 0.1  
validation 10000 games against normalized softmax moves and &omega; random moves  
exploration via normalized softmax and &epsilon;
loss over last 50.000 games

| Games     | lr        | &epsilon; | Loss                  | &omega;   |Cross      | Circle    | Remarks
| :-------: | :-:       | :---:     | :-----------:         | :----:    |:----:     | :-----:   | :-------:
|           | 0.1       | 0.5       |                       | 1         | 79.52%    | 70.14%    |


#### Layers: C64-C128-C128

optimizer: SGD initial lr: 0.1  
validation 10000 games against normalized softmax moves and &omega; random moves  
exploration via normalized softmax and &epsilon;
loss over last 50.000 games

| Games     | lr        | &epsilon; | Loss                  | &omega;   |Cross      | Circle    | Remarks
| :-------: | :-:       | :---:     | :-----------:         | :----:    |:----:     | :-----:   | :-------:
| 300.000   | 0.1       | 0.001     |                       | 0.5       | 97.36%    | 94.48%



### Archive until 300.000
#### Layers: C64-C128-C128
optimizer: SGD initial lr: 0.1  
validation 10000 games against random moves and &alpha; model moves  
exploration via softmax and &epsilon;

| Games     | lr    | &epsilon; | Loss                  | &alpha;   |Cross      | Circle     | Remarks
| :-------: | :-:   | :---:     | :-----------:         | :----:    |:----:     | :-----:    | :-------:
| 0         | 0.1   | 0.001     | -                     | 0         | 72.16%    | 60.04%     | 
| 50000     |       |           | 0.00858330437914119   |           | 99.76%    | 99.38%     |
| 100000    |       |           |  0.005832810262491694 |           | 99.96%    | 99.50%
| 150000    |       |           | 0.004442700849651191  |           | 99.98%    | 99.82%
| 200000    |       |           | 0.0038484052923500713 |           | 99.96%    | 99.96%
| 250000    |       |           | 0.0034724944925486854 |           | 100.00%   | 99.98% 
| 300000    |       |           | 0.0033271235034726485 |           | 100.00%   | 100.00%
|           |       |           |                       | 0.5       | 97.36%    | 94.48%

