In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
from parse_data import get_data, get_modified_values, get_binary_values, make_data_scalar
import numpy as np
import random
from data_gen import Datagen
from recognition import Recognition
from generator import Generator
from evaluation import evaluate_model, bin_plot


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: ", device)

import torch
print(torch.__version__)
 

Using device:  cuda
1.12.0+cu116


In [2]:
gen = Datagen(device)

In [3]:
import random
import copy
# Hyperparameters
sequence_length = [2*i for i in range(4,16)] # 2-20 increments of two
hidden_layers = [1,2]*10 # 1 and 2
hidden_1 = [2**i for i in range(5,10)] # 2^4 to 2^9
hidden_2 =[2**i for i in range(5,10)] # 2^2 to 2^5
variance = [0.001, 0.01, 0.005, 0.05]
lr = [0.001, 0.01, 0.1, 0.005] # stop at 0.005
data_probability = [i/5 for i in range(1,6)]
noise_in_model = [True, False]
epochs = 100
optimizer = [optim.Adam, optim.SGD]
print(hidden_layers)

options = []

for seq_len in sequence_length:
    for layers in hidden_layers:
        for h1 in hidden_1:
            for h2 in hidden_2:
                for l in lr:
                    for v in variance:
                        for p in data_probability:
                            for n in noise_in_model:
                                entry = {}
                                entry["seq_len"] = seq_len
                                entry["layers"] = layers
                                entry["latent"] = h1
                                entry["hidden"] = h2
                                entry["l"] = l
                                entry["variance"] = v
                                entry["data_prob"] = p
                                entry["noise_model"] = n
                                options.append(entry)
                
                                         
random.shuffle(options)    


[1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]


In [4]:
import copy

import torch.utils.data as data
from itertools import chain
torch.autograd.set_detect_anomaly(True)
def loss(x, x_hat, mean, R, device=None, seq_len=1):


    bce = nn.MSELoss( reduction='sum').to(device)
    l = bce(x, x_hat)
    amount = len(mean)
    for m, r in zip(mean, R):
        C = r @ r.transpose(-2,-1)
        l += 0.5 * torch.sum(m.pow(2).sum(-1) 
                             + C.diagonal(dim1=-2,dim2=-1).sum(-1)
                            - 2*C.diagonal(dim1=-2,dim2=-1).log().sum(-1) -1)
        a = m.pow(2).sum(-1) + C.diagonal(dim1=-2,dim2=-1).sum(-1) - 2*C.diagonal(dim1=-2,dim2=-1).log().sum(-1)
        
    return l    

best_model = None
best_score = 10000000000000000
batch_size = 50
best_history= [0,0,0,0,0,0]
for entry in options:
    
    x_d, y_d = gen.get_generated_data(entry["seq_len"], entry["variance"], entry["data_prob"])
    x_t, y_t = gen.get_true_data(entry["seq_len"])
    x_val, y_val = gen.get_test_data(entry["seq_len"])


    model_g = Generator(hidden_size=entry["hidden"],
                        latent_dim=entry["latent"],
                        output_dim=y_d[0].size()[0],
                        layers=entry["layers"],
                        seq_len=batch_size,
                        device=device)
    model_r = Recognition(input_dim=x_d[0].size()[1],
                          latent_dim=entry["latent"],
                          layers=entry["layers"],
                          device=device)

    loader = data.DataLoader(data.TensorDataset(x_d, y_d), batch_size=batch_size, shuffle=True)
    optimizer = optim.Adam(chain(model_r.parameters(), model_g.parameters()), lr=0.01)
    #optimizer = optim.Adam(model_r.parameters())
    history = []
    bce = nn.BCELoss().to(device)
    for e in range(epochs):
        model_g.train()
        model_r.train()


        for x, y in loader:

            x.to(device)
            y.to(device)
            if x.size()[0] < batch_size:
                continue
            if random.random() < 0.5:
                continue
            model_g.make_internal_state(batch_size)
            rec = model_r(x)
            model_g.set_xi(rec[-1])
            b = model_g()
            l = loss(x, b, rec[0], rec[1], device, entry["seq_len"])
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
                        

        
        if e % 10 != 0:
            continue
        
        count = 0
        sum_loss = [0, 0]
        for j in range(2):
            for x, y in loader:
                if x.size()[0] < batch_size:
                    continue
                model_g.eval()
                model_g.make_internal_state(batch_size)
                model_g.make_xi()
                with torch.no_grad():
                    model_g.make_internal_state(batch_size)
                    rec = model_r(x)
                    model_g.set_xi(rec[-1])
                    b = model_g()
                    l = loss(x, b, rec[0], rec[1], device, entry["seq_len"])
                    res = []
                    
                    l = bce(b, x).cpu()
                    sum_loss[j] += l.item()
                    count += 1
                    
        
        
        sum_loss[0] /= count
     
        
        history.append([e, sum_loss[0], sum_loss[1]])
        print(history[-1])

        if len(history) > 15:
            #if no real improvements are being done stop the training. 
            # but keep doing the training if the results without correctly feeding values get better
            if abs(history[-15][1] - history[-1][1]) < 0.0001:
                break
    
    
    if history[-1][1] < best_score:
        print("New best model:\nNew loss: ", history[-1], "\nOld loss:", best_history[-1], "\nHistory:" , history[-10:])
        best_model = model_g
        best_history = history
        best_score = history[-1][1]
        best_config = entry
        evaluate_model(best_model, x_t, y_t,x_val,y_val, copy.deepcopy(entry))
    else:
        evaluate_model(model_g, x_t, y_t,x_val,y_val, copy.deepcopy(entry))
        print("Old model still stands:\nCurrent loss: ", history[-1], "\nBest loss:", best_history[-1])
    

[0, 0.3088997579088398, 94.46990215778351]
[10, 0.19448667219261717, 59.407810270786285]
[20, 0.19143690353904674, 58.58940780162811]


KeyboardInterrupt: 

In [None]:
torch.zeros(1,2,3)