In [5]:
import import_ipynb
import chess
import math
import torch
import torch.nn as nn
import torch.optim as optim
import dset
import net
import autoencoder
import bitboards

c_const = 2
samplingRate = 0.4
seed = 42
mse = nn.MSELoss()

def cross_entropy(y_hat, y):
    y1, y2 = y[0], y[1]
    y_hat1 = (torch.clamp(y_hat[0], 1e-9, 1 - 1e-9))
    y_hat2 = (torch.clamp(y_hat[1], 1e-9, 1 - 1e-9))
    
    return -1/2 * ((y1 * torch.log(y_hat1)).sum(dim=1).mean() + (y2 * torch.log(y_hat2)).sum(dim=1).mean())

def train_mcts(batch, dataset_size, encoder, nnet, optimizer, reinf, *args):
    dataset = dset.SearchDataset(dataset_size, dset.Encode(encoder), reinf, *args)
    pick = math.floor(samplingRate*len(dataset))
    subset = torch.utils.data.random_split(dataset, [pick, len(dataset) - pick], generator=torch.Generator().manual_seed(seed))
    
    DataLoader = torch.utils.data.DataLoader(subset[0], batch_size=batch, shuffle=True, drop_last=True)
    
    noBatch = 0
    for embedding, value, policy in DataLoader:
        value_hat, policy_hat = nnet(embedding.view(embedding.shape[0],1, 256))

        mse_value = mse(value_hat, value)
        cross_entropy_value = cross_entropy(policy_hat, policy)
        loss = c_const * mse_value + cross_entropy_value
        print(f"Loss ({noBatch}): ", loss, mse_value, cross_entropy_value, end='\n')

        loss.backward()
        optimizer.step()
        noBatch += 1
        
    torch.save(nnet.state_dict(), "nnet_mcts.pt")
                
def train_alpha_beta(batch, dataset_size, encoder, nnet, optimizer, reinf, *args):
    dataset = dset.SearchDataset(dataset_size, dset.Encode(encoder), reinf, *args)
    pick = math.floor(samplingRate*len(dataset))
    subset = torch.utils.data.random_split(dataset, [pick, len(dataset) - pick], generator=torch.Generator().manual_seed(seed))
    
    DataLoader = torch.utils.data.DataLoader(subset[0], batch_size=batch, shuffle=True, drop_last=True)
    
    noBatch = 0
    for embedding, value in DataLoader:
        value_hat = nnet(embedding.view(embedding.shape[0],1, 256))

        mse_value = mse(value_hat, value)
        print(f"Loss ({noBatch}): ", mse_value, end='\n')

        mse_loss.backward()
        optimizer.step()
        noBatch += 1
        
    torch.save(nnet.state_dict(), "nnet_alpha_beta.pt")

In [6]:
BATCH = 10
DATASET_SIZE = 200
ARGS = (chess.Board(), net.Net().cuda(), autoencoder.autoencoder().cuda(), dset.SearchType.MCTS, 50)
encoder = autoencoder.autoencoder().cuda()
nnet = net.Net().cuda()
optimizer = optim.Adam(nnet.parameters(), weight_decay=0.01)

train_mcts(BATCH, DATASET_SIZE, encoder, nnet, optimizer, dset.ReinforcementType.MC, *ARGS)

  return F.mse_loss(input, target, reduction=self.reduction)


Loss (0):  tensor(0.6215, device='cuda:0', grad_fn=<AddBackward0>) tensor(0.2460, device='cuda:0', grad_fn=<MseLossBackward>) tensor(0.1295, device='cuda:0', grad_fn=<MulBackward0>)
Loss (1):  tensor(0.5593, device='cuda:0', grad_fn=<AddBackward0>) tensor(0.2147, device='cuda:0', grad_fn=<MseLossBackward>) tensor(0.1299, device='cuda:0', grad_fn=<MulBackward0>)
Loss (2):  tensor(0.4795, device='cuda:0', grad_fn=<AddBackward0>) tensor(0.1750, device='cuda:0', grad_fn=<MseLossBackward>) tensor(0.1295, device='cuda:0', grad_fn=<MulBackward0>)
Loss (3):  tensor(0.3255, device='cuda:0', grad_fn=<AddBackward0>) tensor(0.0984, device='cuda:0', grad_fn=<MseLossBackward>) tensor(0.1287, device='cuda:0', grad_fn=<MulBackward0>)
Loss (4):  tensor(0.1561, device='cuda:0', grad_fn=<AddBackward0>) tensor(0.0152, device='cuda:0', grad_fn=<MseLossBackward>) tensor(0.1256, device='cuda:0', grad_fn=<MulBackward0>)
Loss (5):  tensor(0.1253, device='cuda:0', grad_fn=<AddBackward0>) tensor(8.9362e-05, devi