In [1]:
# Parameters
y_encoding = "onehot"
n_contributions = 21
n_punishments = 31
n_cross_val = 8
fraction_training = 1.0
data = "../data/pilot_random1_player_round_slim.csv"
output_path = "../data/dev"
labels = {}
model_args = {"n_layers": 2, "hidden_size": 40}
optimizer_args = {"lr": 0.01, "weight_decay": 1e-04}
train_args = {"epochs": 1000, "batch_size": 40, "clamp_grad": 1, "eval_period": 10}
device = "cpu"


In [2]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import torch as th
from aimanager.model.data import create_syn_data, create_torch_data, get_cross_validations
from aimanager.model.artificial_humans import ArtificialHuman
from aimanager.model.evaluation import Evaluator

output_path = os.path.join(output_path, 'data')

In [3]:
df = pd.read_csv(data)
# df.head()

In [4]:
df = pd.read_csv(data)


data = create_torch_data(df)
syn_data = create_syn_data(n_contribution=21, n_punishment=31)

In [5]:
th_device = th.device(device)

metrics = []
confusion_matrix = []
syn_pred = []
ev = Evaluator()

for i, (train_data, test_data) in enumerate(get_cross_validations(data, n_cross_val)):
    model = ArtificialHuman(
        y_encoding=y_encoding, n_contributions=n_contributions, n_punishments=n_punishments, **model_args).to(th_device)

    train_data = {
        **model.encode_x(**train_data),
        **model.encode_y(**train_data),
        **train_data
    }
    train_data = {
        k: v.to(device)
        for k, v in train_data.items()
    }

    test_data = {
        **model.encode_x(**test_data),
        **model.encode_y(**test_data),
        **test_data
    }
    test_data = {
        k: v.to(device)
        for k, v in test_data.items()
    }

    syn_data_ = {
        **model.encode_x(**syn_data),
    }
    syn_data_ = {
        k: v.to(device)
        for k, v in syn_data_.items()
    }
    ev.set_data(test=test_data, train=train_data, syn=syn_data_)

    loss_fn = model.get_lossfn()

    optimizer = th.optim.Adam(model.parameters(), **optimizer_args)
    sum_loss = 0
    n_steps = 0
    batch_size = train_args['batch_size']



    for e in range(train_args['epochs']):
        ev.set_labels(cv_split=i, epoch=e)
        model.train()
        for start_idx in range(0, train_data['ah_y_enc'].shape[0], batch_size):
            perm = th.randperm(train_data['ah_y_enc'].size(0))
            idx = perm[:batch_size]
            batch_data = {
                 k: v[idx]
                for k, v in train_data.items()
            }

            optimizer.zero_grad()

            py = model(**batch_data).flatten(end_dim=-2)
            y_true = batch_data['ah_y_enc'].flatten(end_dim=-2)
            mask = batch_data['valid'].flatten()

            loss = loss_fn(py, y_true)

            loss = (loss * mask).sum() / mask.sum()

            loss.backward()

            if train_args['clamp_grad']:
                for param in model.parameters():
                    param.grad.data.clamp_(-train_args['clamp_grad'], train_args['clamp_grad'])
            optimizer.step()
            sum_loss += loss.item()
            n_steps +=1
        
        if e % train_args['eval_period'] == 0:
            avg_loss = sum_loss/n_steps
            print(f'CV {i} | Epoch {e} | Loss {avg_loss}')
            ev.add_loss(avg_loss)
            ev.eval_set(model, 'train')
            ev.eval_set(model, 'test')
            sum_loss = 0
            n_steps = 0

    ev.eval_sync(model)

ev.save(output_path, labels)
model_path = os.path.join(output_path, 'model.pt')
model.save(model_path)


CV 0 | Epoch 0 | Loss 2.9807808995246887
CV 0 | Epoch 10 | Loss 2.20641508102417
CV 0 | Epoch 20 | Loss 1.861360639333725
CV 0 | Epoch 30 | Loss 1.7908408343791962
CV 0 | Epoch 40 | Loss 1.772750225663185
CV 0 | Epoch 50 | Loss 1.730632171034813
CV 0 | Epoch 60 | Loss 1.7414883226156235
CV 0 | Epoch 70 | Loss 1.6942470848560334
CV 0 | Epoch 80 | Loss 1.7181887030601501
CV 0 | Epoch 90 | Loss 1.7229386180639268
CV 0 | Epoch 100 | Loss 1.6965611070394515
CV 0 | Epoch 110 | Loss 1.673082920908928
CV 0 | Epoch 120 | Loss 1.6635938346385957
CV 0 | Epoch 130 | Loss 1.6608507454395294
CV 0 | Epoch 140 | Loss 1.675378292798996
CV 0 | Epoch 150 | Loss 1.6710749328136445
CV 0 | Epoch 160 | Loss 1.6561614990234375
CV 0 | Epoch 170 | Loss 1.6468347996473311
CV 0 | Epoch 180 | Loss 1.6297781199216843
CV 0 | Epoch 190 | Loss 1.634636977314949
CV 0 | Epoch 200 | Loss 1.6269136548042298
CV 0 | Epoch 210 | Loss 1.6264343947172164
CV 0 | Epoch 220 | Loss 1.6413551717996597
CV 0 | Epoch 230 | Loss 1.6145

In [6]:
test_data['ah_y_enc'].size(0)

16