# ReesSaver Discriminator Agent

## Notes:
- Roughly 90% of our boards are unique
- Every time you call generate_data it gets new games/games in a different order?

In [1]:
import chess

import chess.svg
import cv2
from IPython.display import display, SVG

import numpy as np
import random
from tqdm import tqdm
from importlib import reload
import gc

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import utils
reload(utils)
import utils


from sklearn.model_selection import train_test_split

  _C._set_default_tensor_type(t)


In [2]:
if torch.cuda.is_available():
    # Set default tensor type to CUDA tensors
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    
else:
    
    torch.set_default_tensor_type(torch.FloatTensor)
 
print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

made_loader = False

True


In [3]:
def clear_cuda():
    
    input()
    
    for obj in gc.get_objects():
        if torch.is_tensor(obj):
            if obj.is_cuda:
                print(type(obj), obj.size(), obj.device)
                del obj
    torch.cuda.empty_cache()
    gc.collect()

In [4]:
clear_cuda()

<class 'torch.Tensor'> torch.Size([64]) cuda:0




In [5]:
from utils.Datasets import *

import utils.Dataloading
reload(utils.Dataloading)
from utils.Dataloading import *
from utils.Game_playing import *

import utils.Playing_agents
reload(utils.Playing_agents)
from utils.Playing_agents import *

from utils.CSV_data import *
from utils.Puzzles import *

In [6]:
class MLPv2_1(nn.Module):

    def __init__(self):

        super().__init__()
        self.conv1 = nn.Conv2d(14, 64, 3, 1, padding=1, padding_mode = 'zeros')

        self.layers = nn.ModuleList()

        self.depth = 6

        for _ in range(self.depth):
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))

        self.linear = nn.Linear(4096, 128)

    def forward(self, x):

        x = self.conv1(x)

        for i in range(self.depth):
            j = i*4
            ph = x.clone()
            ph = self.layers[j](ph)
            ph = self.layers[j+1](ph)
            ph = F.relu(ph)
            ph = self.layers[j+2](ph)
            ph = self.layers[j+3](ph)

            x = x + ph
            x = F.relu(x)


        x = torch.flatten(x, start_dim=1)

        x = self.linear(x)

        minn, ila = x[:,:64], x[:,64:]

        return minn, ila

In [7]:
RDv2 = torch.load("Models/RDv2.3 CB.pt", map_location= device)

In [8]:
boards, meta, elo, moves, _, _, fens = generate_data("./Data/GAN_human_data.pgn", N = 40_000)
elo = [int(x) for x in elo]

0it [00:00, ?it/s]
100%|██████████| 40000/40000 [22:30<00:00, 29.62it/s]  


In [13]:
class generator_1(nn.Module):

    def __init__(self, conv_depth, hidden_size, hidden_depth):

        super().__init__()
        self.conv1 = nn.Conv2d(14, 64, 3, 1, padding=1, padding_mode = 'zeros')
        
        self.conv_layers = nn.ModuleList()
        self.hidden_layers = nn.ModuleList()
        
        self.conv_depth = conv_depth
        self.hidden_depth = hidden_depth
        self.hidden_size = hidden_size
        self.dropout = nn.Dropout(0.2)
        
        for _ in range(self.conv_depth):
            self.conv_layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.conv_layers.append(nn.BatchNorm2d(64))
            self.conv_layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.conv_layers.append(nn.BatchNorm2d(64))

        self.hidden_layers.append(nn.Linear(4096, hidden_size))
        
        for _ in range(self.hidden_depth - 2):

            self.hidden_layers.append(nn.Linear(hidden_size, hidden_size))

        self.hidden_layers.append(nn.Linear(hidden_size, 128))

    def forward(self, x):

        x = self.conv1(x)
        
        for i in range(self.conv_depth):
            j = i*4
            ph = x.clone()
            ph = self.conv_layers[j](ph)
            ph = self.conv_layers[j+1](ph)
            ph = F.relu(ph)
            ph = self.conv_layers[j+2](ph)
            ph = self.conv_layers[j+3](ph)
            
            x = x + ph
            x = F.relu(x)   
        
        x = torch.flatten(x, start_dim=1)
        
        for i in range(self.hidden_depth - 1):
            
            x = self.hidden_layers[i](x)
            x = F.leaky_relu(x)
            x = self.dropout(x)

        x = self.hidden_layers[-1](x)
        minn, ila = x[:,:64], x[:,64:]

        minn = F.softmax(minn, dim=1)
        ila = F.softmax(ila, dim=1)

        return torch.cat([minn, ila], dim=1)

In [18]:
class discriminator_1(nn.Module):

    def __init__(self, conv_depth, hidden_size, hidden_depth):

        super().__init__()
        self.conv1 = nn.Conv2d(14, 64, 3, 1, padding=1, padding_mode = 'zeros')

        self.conv_layers = nn.ModuleList()
        self.hidden_layers = nn.ModuleList()

        self.conv_depth = conv_depth
        self.hidden_depth = hidden_depth
        self.hidden_size = hidden_size
        self.alpha = 0.5
        self.dropout = nn.Dropout(0.2)

        for _ in range(self.conv_depth):
            self.conv_layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.conv_layers.append(nn.BatchNorm2d(64))
            self.conv_layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.conv_layers.append(nn.BatchNorm2d(64))

        self.board_linear = nn.Linear(4096, int(hidden_size*self.alpha))
        self.move_linear = nn.Linear(128, int(hidden_size*(1-self.alpha)))

        for _ in range(self.hidden_depth):

            self.hidden_layers.append(nn.Linear(hidden_size, hidden_size))

        self.hidden_layers.append(nn.Linear(hidden_size, 1))
    

    def forward(self, board, move):

        x = self.conv1(board)
        
        for i in range(self.conv_depth):
            j = i*4
            ph = x.clone()
            ph = self.conv_layers[j](ph)
            ph = self.conv_layers[j+1](ph)
            ph = F.relu(ph)
            ph = self.conv_layers[j+2](ph)
            ph = self.conv_layers[j+3](ph)
            
            x = x + ph
            x = F.relu(x)
                  
                  
        x = torch.flatten(x, start_dim=1)
        
        x = self.board_linear(x)
        x = F.leaky_relu(x)
        x = self.dropout(x)
        
        move = self.move_linear(move)
        move = F.leaky_relu(move)
        x = self.dropout(x)
        
        x = torch.cat((x, move), dim=1)

        for i in range(self.hidden_depth):
                
            x = self.hidden_layers[i](x)
            x = F.leaky_relu(x)
            x = self.dropout(x)

        x = self.hidden_layers[-1](x)
        x = F.sigmoid(x)
        
        return x

In [44]:
class GAN_1(nn.Module):
    #AI: 0, Human: 1
    def __init__(self, g_conv_depth, g_hidden_size, g_hidden_depth, d_conv_depth, d_hidden_size, d_hidden_depth, lr):
        
        super().__init__()
        
        print(device)
        
        self.generator = generator_1(g_conv_depth, g_hidden_size, g_hidden_depth)
        self.discriminator = discriminator_1(d_conv_depth, d_hidden_size, d_hidden_depth)
        
        self.logs = {"g_acc": [0], "d_acc_r": [0], "d_acc_f": [0], 
                     "g_loss": [0], "d_loss": [0],"d_dist_f": [0], "d_dist_r": [0], 
                     "cur_g_loss": 0, "cur_d_loss": 0}
        
        self.made_loader = False
        
        self.configure_optimizers(lr)
        
    def forward(self, x):
        return self.generator(x)
    
    def adversarial_loss(self, y_hat, y):
        return F.binary_cross_entropy(y_hat, y)
    
    def train_step(self, train_boards, real_moves, train_generator):
        
        #train generator
        if train_generator:

            self.opt_g.zero_grad()
            
            fake_moves = self(train_boards)
                
            y_hat = self.discriminator(train_boards, fake_moves)
            
            y = torch.ones(real_moves.size(0), 1).to(device)
            
            g_loss = self.adversarial_loss(y_hat, y)
            self.logs["cur_g_loss"] += g_loss.item()
            
            g_loss.backward()

            self.opt_g.step()
              
            
        else:

            self.opt_d.zero_grad()
            
            y_hat_real = self.discriminator(train_boards, real_moves)
            y_real = torch.ones(real_moves.size(0), 1).to(device)
            
            d_real_loss = self.adversarial_loss(y_hat_real, y_real)
            
            y_hat_fake = self.discriminator(train_boards, self(train_boards).detach())
            y_fake = torch.zeros(real_moves.size(0), 1).to(device)
            
            d_fake_loss = self.adversarial_loss(y_hat_fake, y_fake)
            
            d_loss = d_real_loss + d_fake_loss
            self.logs["cur_d_loss"] += d_loss.item()
            
            d_loss.backward()

            self.opt_d.step()
            
    
    def configure_optimizers(self, lr):
        self.lr = lr
        self.opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, weight_decay=0.0001)
        self.opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr,weight_decay=0.0001)
    
    def on_epoch_end(self, epoch, G, val_data=None):
        
        self.logs["g_loss"].append(self.logs["cur_g_loss"] / G)
        self.logs["d_loss"].append(self.logs["cur_d_loss"] / G)
        
        print(f'Epoch {epoch+1} with g_loss: {self.logs["cur_g_loss"] / G} and d_loss: {self.logs["cur_d_loss"] / G}')
        
        self.logs["cur_g_loss"] = 0
        self.logs["cur_d_loss"] = 0
        
        if epoch % 1 == 0:
            
            if val_data is not None:
                
                val_boards = val_data.bitboards
                real_moves = val_data.moves
                
                #fake_moves_ind = torch.argmax(fake_moves, dim=1)
                values = np.zeros(5)
                
                for i in range(0,len(val_boards), 100):
                
                    with torch.no_grad():
                        
                        curr_boards, curr_moves = val_boards[i:i+100], real_moves[i:i+100]
    
                        fake_moves = self(curr_boards)
                        
                        f_pred = self.discriminator(curr_boards, fake_moves)
                        r_pred = self.discriminator(curr_boards, curr_moves) 
                        
                        values[0] += torch.mean(torch.round(f_pred) == 0, dtype=torch.float).item() #d_acc_f
                        values[1] += torch.mean(torch.round(r_pred) == 1, dtype=torch.float).item() #d_acc_r
                        
                        values[2] += torch.mean(torch.abs(f_pred)) #d_dist_f
                        values[3] += torch.mean(torch.abs(1 - r_pred)) #d_dist_r
                        
                        values[4] = torch.mean((curr_moves == torch.round(fake_moves)).all(dim=1), dtype=torch.float).item() # g_acc
                
                n = int(len(val_boards) // 100)
                d_acc_f, d_acc_r = values[0] / n, values[1] / n
                d_dist_f, d_dist_r = values[2] / n, values[3] / n
                g_acc = values[4] / n
                
                print(f'Epoch: {epoch+1}, {g_acc=}, {d_acc_f=}, {d_acc_r=}')
                print(f"{d_dist_f=}, {d_dist_r=}")
                
                self.logs["d_acc_f"].append(d_acc_f)
                self.logs["d_acc_r"].append(d_acc_r)
                self.logs["d_dist_f"].append(d_dist_f)
                self.logs["d_dist_r"].append(d_dist_r)
                self.logs["g_acc"].append(g_acc)
                
                
            if epoch % 5 == 0:
            
                torch.save(self.generator, f"generator {epoch}.pt")
                torch.save(self.discriminator, f"discriminator {epoch}.pt")
            
    def create_dataloader(self, boards, meta, moves, B, N, N_val):

        if self.made_loader:

            clear_cuda()
            
        loader = DataLoader(GANData(boards[:N], meta[:N], moves[:N]), batch_size = B, shuffle = True, generator=torch.Generator(device=device))
        val_loader = GANData(boards[N:N+N_val], meta[N:N+N_val], moves[N:N+N_val])
        
        self.made_loader = True
        
        return loader, val_loader
        

        

In [36]:
class GANData(Dataset):

    def __init__(self, bitboards, white_turn, moves):

        self.bitboards = torch.tensor(bitboards, dtype = torch.float).to(device)

        self.moves = torch.zeros((self.bitboards.size(dim=0), 128), dtype = torch.float).to(device)

        for ind, move in tqdm(enumerate(moves), total=len(moves)):

            minn = move.from_square
            ila = move.to_square
        
            if not white_turn[ind]:
                minn = (63 - minn) // 8 * 8 + minn % 8
                ila = (63 - ila) // 8 * 8 + ila % 8

            self.moves[ind,minn] = 1
            self.moves[ind, ila + 64] = 1


    def __len__(self):

        return self.moves.size(dim=0)


    def __getitem__(self, idx):

        return self.bitboards[idx], self.moves[idx]
    

In [40]:
clear_cuda()

<class 'torch.Tensor'> torch.Size([64]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64, 14, 3, 3]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([128, 4096]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([128]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64, 64, 3, 3]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64]) cuda:0
<class 'torch.Tensor'> torch.Size([64]) cuda:0
<class 'torch.Tensor'> torch.Size([64]) cuda:0
<class 'torch.Tensor'> torch.Size([]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64, 64, 3, 3]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64]) cuda:0
<class 'torch.nn.parameter.Parameter'> torch.Size([64]) cuda:0
<class 'torch.Tensor

In [45]:
del RSv1

In [39]:
del loader

NameError: name 'loader' is not defined

In [46]:
RSv1 = GAN_1(g_conv_depth=6, g_hidden_size=1024, g_hidden_depth=3, 
             d_conv_depth=6, d_hidden_size=2048, d_hidden_depth=2,
             lr=0.001).to(device)

cuda


In [42]:
loader, val_data = RSv1.create_dataloader(boards, meta, moves, B = 512, N=2_300_000, N_val=5_000)
G = len(loader)

100%|██████████| 2300000/2300000 [01:58<00:00, 19377.67it/s]
100%|██████████| 5000/5000 [00:00<00:00, 18257.22it/s]


In [80]:
del RSv1.discriminator

RSv1.discriminator = discriminator_1(conv_depth=4, hidden_size=512, hidden_depth=3)
RSv1.configure_optimizers(0.001)

In [84]:
del RSv1.generator

RSv1.generator = generator_1(conv_depth=6, hidden_size=1024, hidden_depth=2)
RSv1.configure_optimizers(0.001)

In [23]:
train_discriminator = False
train_all = True

In [47]:
for epoch in range(0,50):

    reps = 0

    if train_all or train_discriminator:

        while RSv1.logs['d_acc_f'][-1] < 0.5:
            reps += 1
            if reps > 5:
                train_all = False
                train_discriminator = False
                break
            for bitboards, mvs in tqdm(loader):

                RSv1.train_step(bitboards, mvs, train_generator=False)


            RSv1.on_epoch_end(epoch, G, val_data)

    reps = 0
    if train_all or not train_discriminator:

        while RSv1.logs['d_acc_f'][-1] > 0.5:
            reps += 1
            if reps > 12:
                train_all = False
                train_discriminator = True
                break

            i=0
            for bitboards, mvs in tqdm(loader):

                if i > G // 8:
                    break

                RSv1.train_step(bitboards, mvs, train_generator=True)
                i += 1

            RSv1.on_epoch_end(epoch, G, val_data)

100%|██████████| 4493/4493 [04:03<00:00, 18.43it/s]


Epoch 1 with g_loss: 0.0 and d_loss: 19.3878704660583
Epoch: 1, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=0.00020007889717817305, d_dist_r=0.00016678428277373315


 13%|█▎        | 562/4493 [00:25<03:00, 21.83it/s]


Epoch 1 with g_loss: 0.002035154043810891 and d_loss: 0.0
Epoch: 1, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=0.9999819946289062, d_dist_r=0.00018417883664369582


100%|██████████| 4493/4493 [04:03<00:00, 18.44it/s]


Epoch 2 with g_loss: 0.0 and d_loss: 0.009300406484175612
Epoch: 2, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=6.7323325201869e-05, d_dist_r=2.0267809741199017e-05


 13%|█▎        | 562/4493 [00:24<02:54, 22.54it/s]


Epoch 2 with g_loss: 0.006101260051949374 and d_loss: 0.0
Epoch: 2, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=0.9999911499023437, d_dist_r=2.624332904815674e-05


100%|██████████| 4493/4493 [04:05<00:00, 18.26it/s]


Epoch 3 with g_loss: 0.0 and d_loss: 0.021861865038342775
Epoch: 3, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=9.222232736647129e-05, d_dist_r=5.693684332072735e-05


 13%|█▎        | 562/4493 [00:24<02:52, 22.77it/s]


Epoch 3 with g_loss: 0.011708084688143766 and d_loss: 0.0
Epoch: 3, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=1.0, d_dist_r=5.575772374868393e-05


100%|██████████| 4493/4493 [04:03<00:00, 18.45it/s]


Epoch 4 with g_loss: 0.0 and d_loss: 0.09424660241999815
Epoch: 4, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=4.404156468808651e-05, d_dist_r=5.184238310903311e-05


 13%|█▎        | 562/4493 [00:24<02:53, 22.62it/s]


Epoch 4 with g_loss: 0.002763847181938432 and d_loss: 0.0
Epoch: 4, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=0.9992449951171875, d_dist_r=2.910651033744216e-05


100%|██████████| 4493/4493 [04:02<00:00, 18.50it/s]


Epoch 5 with g_loss: 0.0 and d_loss: 0.005131404260858876
Epoch: 5, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=0.0001253604982048273, d_dist_r=5.77547587454319e-05


 13%|█▎        | 562/4493 [00:25<02:58, 21.98it/s]


Epoch 5 with g_loss: 1.2333246428903375 and d_loss: 0.0
Epoch: 5, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=0.00013303354382514953, d_dist_r=5.263927858322859e-05


 13%|█▎        | 562/4493 [00:24<02:53, 22.68it/s]


Epoch 5 with g_loss: 0.5458175546791728 and d_loss: 0.0
Epoch: 5, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=0.9999922943115235, d_dist_r=5.748895462602377e-05


100%|██████████| 4493/4493 [04:02<00:00, 18.50it/s]


Epoch 6 with g_loss: 0.0 and d_loss: 0.009643596516829734
Epoch: 6, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=0.0002390151284635067, d_dist_r=3.424680093303323e-05


 13%|█▎        | 562/4493 [00:24<02:53, 22.64it/s]


Epoch 6 with g_loss: 0.6137692446953307 and d_loss: 0.0
Epoch: 6, g_acc=0.0, d_acc_f=0.0035999999195337295, d_acc_r=1.0
d_dist_f=0.9722591400146484, d_dist_r=4.775002598762512e-05


100%|██████████| 4493/4493 [04:02<00:00, 18.49it/s]


Epoch 7 with g_loss: 0.0 and d_loss: 0.006518438749852626
Epoch: 7, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=0.00043078683316707613, d_dist_r=6.479330360889435e-05


 13%|█▎        | 562/4493 [00:24<02:53, 22.65it/s]


Epoch 7 with g_loss: 0.9453789774748009 and d_loss: 0.0
Epoch: 7, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=0.9999664306640625, d_dist_r=8.390598930418491e-05


100%|██████████| 4493/4493 [04:03<00:00, 18.47it/s]


Epoch 8 with g_loss: 0.0 and d_loss: 0.018617162718208433
Epoch: 8, g_acc=0.0, d_acc_f=0.9983999919891358, d_acc_r=0.9997999989986419
d_dist_f=0.001981533020734787, d_dist_r=0.0006544488668441772


 13%|█▎        | 562/4493 [00:24<02:53, 22.67it/s]


Epoch 8 with g_loss: 0.007508300145539877 and d_loss: 0.0
Epoch: 8, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=0.9999241638183594, d_dist_r=0.0005325081571936608


100%|██████████| 4493/4493 [04:02<00:00, 18.53it/s]


Epoch 9 with g_loss: 0.0 and d_loss: 0.0047573928193769535
Epoch: 9, g_acc=0.0, d_acc_f=1.0, d_acc_r=1.0
d_dist_f=0.0001377199962735176, d_dist_r=1.111453864723444e-05


 13%|█▎        | 562/4493 [00:24<02:53, 22.69it/s]


Epoch 9 with g_loss: 0.0021539088894441186 and d_loss: 0.0
Epoch: 9, g_acc=0.0, d_acc_f=0.0, d_acc_r=1.0
d_dist_f=0.9999976348876953, d_dist_r=1.1465431889519095e-05


100%|██████████| 4493/4493 [04:02<00:00, 18.50it/s]


Epoch 10 with g_loss: 0.0 and d_loss: 0.015260289510615227
Epoch: 10, g_acc=0.0, d_acc_f=0.9997999989986419, d_acc_r=0.9991999959945679
d_dist_f=0.0014577309787273407, d_dist_r=0.0008731110394001007


 13%|█▎        | 562/4493 [00:24<02:52, 22.75it/s]


Epoch 10 with g_loss: 0.7457047414068599 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9671999776363372, d_acc_r=0.9991999959945679
d_dist_f=0.04561284065246582, d_dist_r=0.00096535824239254


 13%|█▎        | 562/4493 [00:24<02:52, 22.78it/s]


Epoch 10 with g_loss: 0.745416375700838 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9679999804496765, d_acc_r=0.9991999959945679
d_dist_f=0.04470839500427246, d_dist_r=0.0009392369538545609


 13%|█▎        | 562/4493 [00:24<02:52, 22.78it/s]


Epoch 10 with g_loss: 0.7444948967543737 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9697999823093414, d_acc_r=0.9991999959945679
d_dist_f=0.044068069458007814, d_dist_r=0.0009893162548542023


 13%|█▎        | 562/4493 [00:25<02:58, 21.99it/s]


Epoch 10 with g_loss: 0.7459633431560925 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9681999790668487, d_acc_r=0.9991999959945679
d_dist_f=0.045111165046691895, d_dist_r=0.0008864986896514892


 13%|█▎        | 562/4493 [00:24<02:53, 22.70it/s]


Epoch 10 with g_loss: 0.7443548172161782 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.966799978017807, d_acc_r=0.9991999959945679
d_dist_f=0.044476103782653806, d_dist_r=0.0009320415556430817


 13%|█▎        | 562/4493 [00:24<02:53, 22.68it/s]


Epoch 10 with g_loss: 0.7453479233017324 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9665999805927277, d_acc_r=0.9991999959945679
d_dist_f=0.04498264312744141, d_dist_r=0.0009537439048290252


 13%|█▎        | 562/4493 [00:24<02:52, 22.73it/s]


Epoch 10 with g_loss: 0.7447707871140549 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9689999830722809, d_acc_r=0.9991999959945679
d_dist_f=0.044491057395935056, d_dist_r=0.0009810756891965866


 13%|█▎        | 562/4493 [00:24<02:53, 22.70it/s]


Epoch 10 with g_loss: 0.7445105138346105 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9675999820232392, d_acc_r=0.9991999959945679
d_dist_f=0.04417236328125, d_dist_r=0.0008773046731948852


 13%|█▎        | 562/4493 [00:24<02:52, 22.75it/s]


Epoch 10 with g_loss: 0.7450508544950742 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9683999824523926, d_acc_r=0.9991999959945679
d_dist_f=0.04453611850738525, d_dist_r=0.0009484696388244629


 13%|█▎        | 562/4493 [00:24<02:53, 22.69it/s]


Epoch 10 with g_loss: 0.7456486008094568 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9675999820232392, d_acc_r=0.9991999959945679
d_dist_f=0.0456885290145874, d_dist_r=0.000973176509141922


 13%|█▎        | 562/4493 [00:25<02:58, 22.05it/s]


Epoch 10 with g_loss: 0.7453303635319384 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9671999788284302, d_acc_r=0.9991999959945679
d_dist_f=0.04417887687683106, d_dist_r=0.0009392216801643371


 13%|█▎        | 562/4493 [00:24<02:52, 22.74it/s]


Epoch 10 with g_loss: 0.745216775253571 and d_loss: 0.0
Epoch: 10, g_acc=0.0, d_acc_f=0.9683999800682068, d_acc_r=0.9991999959945679
d_dist_f=0.0448710298538208, d_dist_r=0.0010053814947605132


In [135]:
val_boards = val_data.bitboards
real_moves = val_data.moves

minn, ila = RDv2(val_boards)
fake_moves = torch.cat([F.softmax(minn, dim=1), F.softmax(ila,dim=1)], dim=1)
torch.mean((real_moves == torch.round(fake_moves)).all(dim=1), dtype=torch.float).item()

0.2510000169277191

In [49]:
old_generator = torch.load("generator 5.pt")
torch.mean(RSv1.discriminator(val_data.bitboards[:1000], old_generator(val_data.bitboards[:1000])))

tensor(1.0000, grad_fn=<MeanBackward0>)

In [137]:
fake_moves = RSv1(val_boards)
print(fake_moves[0])

#fake_moves_ind = torch.argmax(fake_moves, dim=1)
print(torch.round(fake_moves[0]))

g_acc = torch.mean((real_moves == torch.round(fake_moves)).all(dim=1), dtype=torch.float).item()

tensor([1.7404e-42, 0.0000e+00, 2.2697e-41, 2.9514e-19, 2.4398e-16, 4.8634e-12,
        9.2207e-27, 3.0531e-33, 0.0000e+00, 0.0000e+00, 2.2381e-24, 2.3693e-12,
        1.0543e-04, 2.4633e-19, 4.6248e-19, 9.9989e-01, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.0044e-09, 2.2264e-06, 1.8982e-10, 8.1555e-16, 6.4270e-11,
        1.2278e-31, 0.0000e+00, 1.0315e-31, 1.0168e-20, 5.1734e-15, 8.6261e-07,
        3.3822e-08, 5.3728e-09, 0.0000e+00, 0.0000e+00, 8.4442e-37, 1.1300e-11,
        1.6480e-18, 1.1017e-12, 9.6544e-10, 5.9234e-10, 0.0000e+00, 0.0000e+00,
        3.2209e-39, 1.5248e-36, 2.6022e-42, 5.4401e-41, 0.0000e+00, 4.2039e-45,
        0.0000e+00, 0.0000e+00, 7.8557e-42, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+

In [141]:
test_against(lambda x: network_agent_prob_conv(x, RDv2), lambda x: network_agent_prob_conv(x, RSv1.generator), N=100)

100%|██████████| 50/50 [00:28<00:00,  1.77it/s]
100%|██████████| 50/50 [00:27<00:00,  1.80it/s]


(100, 0, 0, 1.0)

In [15]:
torch.cuda.memory_allocated() 

7274496