In [5]:
import numpy as np
import random

import tqdm
import torch
import torch.utils
from torch import optim
from torch import nn
from torch.nn import functional as F

import gobanana as gb

In [6]:
class NaiveGenerator(gb.nn.Generator):
    def __init__(self, board_shape, num_metrics, hidden_dim):
        super().__init__(board_shape, num_metrics)
        board_size = board_shape[0] * board_shape[1]
        input_size = board_size + num_metrics
        self.fc1 = nn.Linear(input_size, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.dp1 = nn.Dropout(0.5)
        
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.dp2 = nn.Dropout(0.5)
        
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.bn3 = nn.BatchNorm1d(hidden_dim)
        self.dp3 = nn.Dropout(0.5)
        self.fc4 = nn.Linear(hidden_dim, board_size * 3)

    def forward(self, noise: torch.Tensor, metrics: torch.Tensor):
        x = torch.cat([noise.flatten(start_dim=1), metrics.flatten(start_dim=1)], dim=1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.bn1(x)
        x = self.dp1(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        x = self.bn2(x)
        x = self.dp2(x)
        
        x = self.fc3(x)
        x = F.relu(x)
        x = self.bn3(x)
        x = self.dp3(x)
        
        x = self.fc4(x)
        x = F.relu(x)
        x = x.reshape(-1, *self.board_shape, 3)
        x = F.softmax(x, dim=-1)
        return x

class NaiveEvaluator(gb.nn.Evaluator):
    def __init__(self, board_shape, num_metrics, hidden_dim):
        super().__init__(board_shape, num_metrics)
        #board_size = board_shape[0] * board_shape[1]
        #self.embed = nn.Embedding(3, 1)
        self.fc1 = nn.Linear(board_shape[0] * board_shape[1] * 3, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, hidden_dim)
        self.fc5 = nn.Linear(hidden_dim, hidden_dim)
        self.fc6 = nn.Linear(hidden_dim, num_metrics)
        
    def forward(self, one_hot_boards):
        x = one_hot_boards
        x = F.leaky_relu(self.fc1(x.flatten(start_dim=1)))
        x = F.leaky_relu(self.fc2(x))
        x = F.leaky_relu(self.fc3(x))
        x = F.leaky_relu(self.fc4(x))
        x = F.leaky_relu(self.fc5(x))
        x = self.fc6(x)
        return x
    

In [7]:
def batch_generate_boards(generator, board_shape, metrics, batch_size=32):
    generator.eval()
    with torch.no_grad():
        noise = torch.rand(batch_size, *board_shape)
        metrics = torch.tensor(metrics).float()
        assert batch_size == metrics.shape[0]
        generator_out = generator(noise, metrics)
        board_matrices = generator_out.argmax(-1).numpy()
    return [gb.game.Board(mat) for mat in board_matrices]

def count_bananas(board: gb.game.Board):
    return np.sum(board.mat == board.BANANA).item()

def build_evaluator_training_samples(boards):
    training_samples = []
    for board in boards:
        actual_num_bananas = count_bananas(board)
        board_one_hot_tensor = torch.tensor(board.mat).long()
        metrics_tensor = torch.tensor([actual_num_bananas]).float()
        sample = (board_one_hot_tensor, metrics_tensor)
        training_samples.append(sample)
    return training_samples

def train_evaluator(evaluator, train_loader, epochs=1):
    print("Training Evaluator ...")
    optimizer = optim.Adam(evaluator.parameters())
    loss_function = nn.MSELoss()
    evaluator.train()
    losses = []
    for _ in range(epochs):
        for x, y in train_loader:
            optimizer.zero_grad()
            predicted_metrics = evaluator(x)
            loss = loss_function(y, predicted_metrics)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
    print('loss:', np.mean(losses))
        
            
def train_generator(generator, evaluator, train_loader, epochs=1):
    print("Training Generator  ...")
    # should I include both parameters here and freeze the other one?
    optimizer = optim.Adam(generator.parameters(), lr=0.0001)
    #optimizer = optim.SGD(generator.parameters(), lr=0.0001)
    loss_function = nn.MSELoss()
    
    generator.train()
    losses = []
    for _ in range(epochs):
        for x, y in train_loader:
            optimizer.zero_grad()
            boards = generator(x, y)
            # argmax maybe the culprit of the generator not training.
            predicted_metrics = evaluator(boards)#boards.argmax(-1))
            loss = loss_function(y, predicted_metrics)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
            print('loss:', np.mean(losses))
    

def get_one_metrics():
    return [1]

def get_random_metrics():
    desired_num_bananas = random.randint(0, 9)
    return [desired_num_bananas]

def get_random_metrics_batch(batch_size):
    return [get_random_metrics() for _ in range(batch_size)]

In [16]:
def train_model(generator, evaluator, train_loader, optimizer_g, optimizer_e, epochs=1, train_e=True):
    print("Training Model  ...")
    # should I include both parameters here and freeze the other one?
    mse_criterion = nn.MSELoss()
    
    for _ in range(epochs):
        generator_losses = []
        evaluator_losses = []
        
        evaluator.train()
        generator.train()
        for x, y in train_loader:
            if train_e:
                ###############################################
                # Update E
                ##############################################
                # Forward pass generator , create a set of boards
                generator.eval()
                evaluator.train()
                boards = generator(x, y)
                
                # Forward pass evaluator
                predicted_metrics = evaluator(boards)
                
                # Compute True Metrics
                binarized_boards = torch.argmax(boards, dim=3, keepdim=False).data.numpy()
                true_metrics = torch.from_numpy(np.sum(binarized_boards == 2, axis = (1,2)).reshape(boards.size()[0],1)).float()

                # Calculate E's loss
                optimizer_e.zero_grad()
                err_e = mse_criterion(predicted_metrics, true_metrics)
                evaluator_losses.append(err_e.item())
                
                # Calculate the gradients for E
                err_e.backward()
 
                # Update E
                optimizer_e.step()
                print('E loss: ', np.mean(evaluator_losses))
            
            ###############################################
            # Update G
            ##############################################
                    
            # train genertor, and set evalutor to evalate only
            generator.train()
            evaluator.eval()
            
            # pass output directly from G to E
            boards = generator(x, y)
            predicted_metrics = evaluator(boards)
            
            # Calculate G's MSE loss
            optimizer_g.zero_grad() 
            
            # we may need to build categorical distrubtion per each tile
            # for now, let us use all cells in the tensor to build the categorical distrubition
            # I don't think this should cause a problem
            # flatten = boards.flatten(start_dim = 1)
            # entropy = categorical.Categorical(flatten).entropy()

            err_g = mse_criterion(y, predicted_metrics)
            
            
            err_g.backward()
            optimizer_g.step()
            generator_losses.append(err_g.item())
            
            #print(generator.fc4.weight.grad)
            #print(generator.fc3.weight.grad)
            #print(generator.fc2.weight.grad)
            #print(generator.fc1.weight.grad)
                  
            print('G loss:', np.mean(generator_losses))

**Train A model that can output a level with ONE Banana

In [8]:
board_shape = (3,3)
generator = NaiveGenerator(board_shape, 1, 256)
evaluator = NaiveEvaluator(board_shape, 1, 256)
batch_size = 512
iterations = 2000

for i in tqdm.notebook.tqdm(list(range(iterations))):
    generator_training_samples = []
    
    for _ in range(batch_size):
        noise = torch.randn(1, *board_shape)
        #metrics = torch.tensor(get_random_metrics()).float()
        metrics = torch.tensor(get_one_metrics()).float()
        generator_training_samples.append((noise, metrics))
        
    generator_train_loader = torch.utils.data.DataLoader(
        generator_training_samples,
        batch_size=batch_size
    )
    

    optimizer_g = optim.SGD(generator.parameters(),lr=0.0001, momentum=0.9)
    optimizer_e = optim.SGD(evaluator.parameters(), lr=0.001, momentum=0.9)
    
    # for some reason adam does not work as good as SGD
    #optimizer_g = optim.Adam(generator.parameters(),lr=0.0001)
    #optimizer_e = optim.Adam(evaluator.parameters(), lr=0.0001)
    
    train_eval = True
    train_model(generator, evaluator, generator_train_loader, optimizer_g, optimizer_e, epochs=5, train_e=train_eval)
  
    

HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))

Training Model  ...
E loss:  17.462127685546875
G loss: 1.0908936262130737
E loss:  14.101381301879883
G loss: 1.047071099281311
E loss:  9.459564208984375
G loss: 0.9912164211273193
E loss:  9.147956848144531
G loss: 0.926108717918396
E loss:  9.501867294311523
G loss: 0.8533459901809692
Training Model  ...
E loss:  9.214071273803711
G loss: 0.8376435041427612
E loss:  9.186407089233398
G loss: 0.8084427118301392
E loss:  9.009054183959961
G loss: 0.7678957581520081
E loss:  8.63427448272705
G loss: 0.7182407379150391
E loss:  8.491325378417969
G loss: 0.6615898609161377
Training Model  ...
E loss:  7.685627460479736
G loss: 0.6488833427429199
E loss:  7.507389545440674
G loss: 0.6254515051841736
E loss:  7.5054731369018555
G loss: 0.5926573276519775
E loss:  7.416515827178955
G loss: 0.5524371862411499
E loss:  7.345907211303711
G loss: 0.506517767906189
Training Model  ...
E loss:  7.441073417663574
G loss: 0.4956045150756836
E loss:  7.345254898071289
G loss: 0.4752200245857239
E l

In [15]:
# test generator 
generator.eval()
for _ in range(64):
    metrics = get_one_metrics()
    print("Desired metrics: ", metrics)
    metrics = torch.tensor(metrics).float().reshape(1, -1)
    noise = torch.randn(1, 9)
    print('Input Noise: \n', noise)
    board = generator(noise, metrics)
    print('Board Tensor: \n',board)
    print('Board: \n',board.argmax(-1))
    print()

Desired metrics:  [1]
Input Noise: 
 tensor([[-0.4865,  1.4574, -1.2855, -1.4279, -0.6453,  0.1917,  1.5447, -0.2996,
          1.5044]])
Board Tensor: 
 tensor([[[[0.6956, 0.1781, 0.1263],
          [0.7969, 0.0967, 0.1064],
          [0.4287, 0.2857, 0.2857]],

         [[0.8821, 0.0589, 0.0590],
          [0.3333, 0.3333, 0.3333],
          [0.2573, 0.5395, 0.2032]],

         [[0.2304, 0.5392, 0.2304],
          [0.2144, 0.5341, 0.2515],
          [0.3333, 0.3333, 0.3333]]]], grad_fn=<SoftmaxBackward>)
Board: 
 tensor([[[0, 0, 0],
         [0, 2, 1],
         [1, 1, 2]]])

Desired metrics:  [1]
Input Noise: 
 tensor([[ 0.3884,  0.6235, -1.7281, -0.6157, -0.2992,  1.3378,  0.4642,  0.3869,
          1.5130]])
Board Tensor: 
 tensor([[[[0.5210, 0.2471, 0.2318],
          [0.2645, 0.4923, 0.2431],
          [0.3324, 0.3353, 0.3324]],

         [[0.9581, 0.0202, 0.0217],
          [0.4722, 0.2639, 0.2639],
          [0.3686, 0.3041, 0.3273]],

         [[0.1647, 0.6636, 0.1717],
      