In [39]:
import numpy as np
import torch as t
import torch.nn.functional as F
import os
import os.path
import random

from tqdm.autonotebook import tqdm
from time import gmtime, strftime, time
from matplotlib import pyplot as plt
from torch import nn
from torch.optim import Adam
# from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary
from sklearn.metrics import f1_score, accuracy_score, classification_report
# from tqdm.autonotebook import tqdm

## With WandB

In [2]:
def seed_everything(seed=73):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    t.manual_seed(seed)
    t.cuda.manual_seed(seed)
    t.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed unless you tell it to be deterministic
    t.backends.cudnn.deterministic = True

seed_everything(1234)

In [14]:
%env WANDB_NOTEBOOK_NAME='w2v'

env: WANDB_NOTEBOOK_NAME='w2v'


In [3]:
import wandb

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33melliel[0m (use `wandb login --relogin` to force relogin)


True

In [4]:
sweep_config = {
    'method': 'random', #grid, random, bayesian
    'metric': {
      #'name': 'auc_score',
      'name': 'f1_score',
      'goal': 'maximize'   
    },
    'parameters': {

        'learning_rate': {
            'values': [5e-5, 3e-5]
        },
        'batch_size': {
            'values': [32, 64]
        },
        'epochs':{'value': 10}
        #'dropout':{
         #   'values': [0.3, 0.4, 0.5]
        #},
        #'tokenizer_max_len': {'value': 40},
    }
}

sweep_defaults = {
    'learning_rate': 3e-5,
    'batch_size': 64,
    'epochs': 10
    #'dropout': 0.3,
    #'tokenizer_max_len': 40
}

#wandb.init(project="w2v_aibo")
sweep_id = wandb.sweep(sweep_config, project="w2v_aibo", entity="elliel")

Create sweep with ID: ksqmd2i3
Sweep URL: https://wandb.ai/elliel/w2v_aibo/sweeps/ksqmd2i3


In [43]:
def log_metrics(preds, labels):
    #preds = t.from_numpy(preds)
    print("t to np", preds.dtype)
    #labels = t.from_numpy(labels)
    
    preds = np.stack(preds)
    print("preds stack", preds.dtype)
    #preds = preds.cpu().detach().numpy()
    #print("t to np", preds.dtype)
    labels = np.stack(labels)
    #labels = labels.cpu().detach().numpy()
    
    
    auc_micro_list = []
    for i in range(n_labels):
      current_pred = preds.T[i]
      current_label = labels.T[i]
      fpr_micro, tpr_micro, _ = metrics.roc_curve(current_label.T, current_pred.T)
      auc_micro = metrics.auc(fpr_micro, tpr_micro)
      auc_micro_list.append(auc_micro)
    
    return {"auc": np.array(auc_micro).mean()}
    

    #fpr_micro, tpr_micro, _ = metrics.roc_curve(labels.ravel(), preds.ravel())
    
    #auc_micro = metrics.auc(fpr_micro, tpr_micro)
    #return {"auc_micro": auc_micro}

In [None]:
def trainer(config=None):
    with wandb.init(config=config):
        config = wandb.config

        #train_dataset, valid_dataset = build_dataset(config.tokenizer_max_len)
        #train_data_loader, valid_data_loader = build_dataloader(train_dataset, valid_dataset, config.batch_size)
        #print("Length of Train Dataloader: ", len(train_data_loader))
        #print("Length of Valid Dataloader: ", len(valid_data_loader))

        device = t.device('cuda' if t.cuda.is_available() else 'cpu')

        #n_train_steps = int(len(train_dataset) / config.batch_size * 10)

        #model = ret_model(n_train_steps, config.dropout)
        #optimizer = ret_optimizer(model)
        #scheduler = ret_scheduler(optimizer, n_train_steps)
        #model.to(device)

        
        n_epochs = config.epochs

        #best_val_loss = 100
        #for epoch in tqdm(range(n_epochs)):
         #   train_loss = train_fn(train_data_loader, model, optimizer, device, scheduler)
         #   eval_loss, preds, labels = eval_fn(valid_data_loader, model, device)
          
         #   auc_score = log_metrics(preds, labels)["auc_micro"]
         #   print("AUC score: ", auc_score)
         #   avg_train_loss, avg_val_loss = train_loss / len(train_data_loader), eval_loss / len(valid_data_loader)
         #   wandb.log({
          #      "epoch": epoch + 1,
          #      "train_loss": avg_train_loss,
          #      "val_loss": avg_val_loss,
          #      "auc_score": auc_score,
          #  })
           # print("Average Train loss: ", avg_train_loss)
           # print("Average Valid loss: ", avg_val_loss)

            #if avg_val_loss < best_val_loss:
             #   best_val_loss = avg_val_loss
              #  torch.save(model.state_dict(), "./best_model.pt")  
               # print("Model saved as current val_loss is: ", best_val_loss)  
        
        
        train_dataset = TensorDataset(t.tensor(x_train), t.tensor(y_train))
        # train_dataset = TensorDataset(torch.from_numpy(x_train_eval).float(), torch.from_numpy(y_train_eval).float())
        val_dataset = TensorDataset(t.tensor(x_val), t.tensor(y_val))
        test_dataset = TensorDataset(t.tensor(x_test), t.tensor(y_test))

        train_loader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True) # num_workers
        val_loader = DataLoader(dataset=val_dataset, batch_size=config.batch_size, shuffle=False)
        test_loader = DataLoader(dataset=test_dataset, batch_size=config.batch_size, shuffle=False)
        
        #model = MLP(6144, 3000, 1000, 7)
        model = MLP(512, 3000, 1000, 7)
        #print(model)
        model = nn.DataParallel(model)
        wandb.watch(model)

        criterion = nn.CrossEntropyLoss()

        # set up the optimizer
        optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0)

        trainer = Trainer(model, "w2v", "aibo", criterion, optimizer, train_loader, val_loader, test_loader, 
                          cuda=t.cuda.is_available(),\
                          early_stopping_patience=20)

        # go, go, go... call fit on trainer
        res = trainer.fit(100)
        wandb.watch(model)

In [None]:
class Trainer:

    def __init__(self,
                 model,  # Model to be trained.
                 model_name,
                 dataset,
                 crit,  # Loss function
                 optim=None,  # Optimizer
                 train_dl=None,  # Training data set
                 val_test_dl=None,  # Validation data set
                 test_dl=None,  # Test data set
                 cuda=True,  # Whether to use the GPU
                 early_stopping_patience=-1):   # The patience for early stopping
                 #unsqueeze_needed=True
        self._model = model
        self.model_name = model_name
        self.dataset = dataset
        self._crit = crit
        self._optim = optim
        self._train_dl = train_dl
        self._val_test_dl = val_test_dl
        self._test_dl = test_dl
        self._cuda = cuda
        self._early_stopping_patience = early_stopping_patience
        #self._unsqueeze_needed = unsqueeze_needed
        

        if cuda:
            self._model = model.cuda()
            self._crit = crit.cuda()

    #def save_checkpoint(self, epoch):
     #   t.save({'state_dict': self._model.state_dict()}, 'checkpoints/checkpoint_{:03d}.ckp'.format(epoch))

    #def restore_checkpoint(self, epoch_n):
    def restore_checkpoint(self):
        path = 'checkpoints/' + self.model_name + '_checkpoint_{}.ckp'.format(get_datetime())
        if os.path.exists(path):
            #ckp = t.load('checkpoints/checkpoint_{:03d}.ckp'.format(epoch_n), 'cuda' if self._cuda else None)
            ckp = t.load(path, 'cuda' if self._cuda else None)
            self._model.load_state_dict(ckp['state_dict'])

    def save_onnx(self, fn):
        m = self._model.cpu()
        m.eval()
        x = t.randn(1, 3, 300, 300, requires_grad=True)
        y = self._model(x)
        t.onnx.export(m,  # model being run
                      x,  # model input (or a tuple for multiple inputs)
                      fn,  # where to save the model (can be a file or file-like object)
                      export_params=True,  # store the trained parameter weights inside the model file
                      opset_version=10,  # the ONNX version to export the model to
                      do_constant_folding=True,  # whether to execute constant folding for optimization
                      input_names=['input'],  # the model's input names
                      output_names=['output'],  # the model's output names
                      dynamic_axes={'input': {0: 'batch_size'},  # variable lenght axes
                                    'output': {0: 'batch_size'}})

    def train_step(self, x, y):
        # perform following steps:
        # -reset the gradients / clear the gradients of all optimized variables
        self._optim.zero_grad()
        # -propagate through the network / forward pass: compute predicted outputs by passing inputs to the model
        output = self._model.forward(x)
        # -calculate the loss
        loss = self._crit(output, y)
        # -compute gradient by backprop / backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # -update weights / perform a single optimization step (parameter update)
        self._optim.step()
        # -return the loss
        return loss, output

    def val_test_step(self, x, y):
        # predict
        # propagate through the network and calculate the loss and predictions
        pred = self._model.forward(x)
        # calculate the loss
        loss = self._crit(pred, y)
        # return the loss and the predictions
        return loss, pred

    def train_epoch(self):
        # set training mode / prepare model for training
        self._model.train()
        # iterate through the training set
        # clear lists to track next epoch
        total_loss = 0
        total_acc = 0
        for x, y in self._train_dl:
            # transfer the batch to "cuda()" -> the gpu if a gpu is given
            if self._cuda:
                x = x.cuda()
                y = y.cuda()
           # if self._unsqueeze_needed:
            #    x = x.unsqueeze(1)
            # perform a training step
            loss, pred = self.train_step(x, y)
            total_loss += loss.item()
            #total_acc += accuracy_score(y.cpu().detach().numpy(), np.hstack(pred))
            #total_acc += accuracy_score(y.cpu(), pred.cpu() > 0.5)
        # calculate the average loss for the epoch and return it
        total_loss = total_loss / len(self._train_dl)
        #total_acc = total_acc / len(self._train_dl)
        #print("Train: loss: {}, accuracy: {}".format(total_loss, total_acc))
        print("Train: loss: {}".format(total_loss))
        return total_loss

    def val_test(self, test=False):
        # set eval mode / prepare model for evaluation
        self._model.eval()
        # disable gradient computation (disable autograd engine)
        with t.no_grad():
            # iterate through the validation set
            # clear lists to track next epoch
            total_loss = 0
            total_acc = 0
            total_f1 = 0
            if test:
                dataset = self._test_dl
            else:
                dataset = self._val_test_dl
            for x, y in dataset:
                # transfer the batch to the gpu if given
                if self._cuda:
                    x = x.cuda()
                    y = y.cuda()
                #if self._unsqueeze_needed:
                 #   x = x.unsqueeze(1)

                # perform a validation step / forward pass: compute predicted outputs by passing inputs to the model
                loss, pred = self.val_test_step(x, y)       # pred.shape torch.Size([8, 5]) = bs, num_cl
                # calculate metrics for this iteration
                total_loss += loss.item()

                # deal with multilabel
                activation = t.nn.Softmax(dim=1)
                pred = activation(pred.data)
                pred = t.max(pred, 1)[1]    # choose maximum class index for the most predominant index
                # pred: tensor([4, 3, 2, 4, 0, 3, 4, 3])
                #pred = pred.cpu().detach()
                pred = pred.cpu().detach().numpy()

                # prepare to count predictions for each class
                correct_pred = {classname: 0 for classname in w2v_classes}
                total_pred = {classname: 0 for classname in w2v_classes}
                # collect the correct predictions for each class
                for label, prediction in zip(y, pred):
                    if label == prediction:
                        correct_pred[w2v_classes[label]] += 1
                    total_pred[w2v_classes[label]] += 1

                # print accuracy for each class
                #for classname, correct_count in correct_pred.items():
                 #   accuracy = 100 * float(correct_count) / total_pred[classname]
                    #print("Accuracy for class {:5s} is: {:.1f} %".format(classname, accuracy))
                  #  print("Accuracy for class {} is: {} %".format(classname, accuracy))

                #total_acc += accuracy_score(y.cpu(), pred.cpu() > 0.5)
                total_acc += accuracy_score(y.cpu().detach().numpy(), np.hstack(pred))
                #total_f1 += f1_score(y.cpu(), pred.cpu() > 0.5, average=None)
                total_f1 += f1_score(y.cpu().detach().numpy(), np.hstack(pred), average='weighted')
                # save the predictions and the labels for each batch

            # calculate the average loss and average metrics
            total_loss = total_loss / len(dataset)
            total_acc = total_acc / len(dataset)
            total_f1 = total_f1 / len(dataset)
             
            # return the loss and print the calculated metrics
            print("Test: loss: {}, accuracy: {}%, f-score: {}".format(total_loss, total_acc * 100, total_f1))
            print("y unique", np.unique(y))
            print("pred unique", np.unique(pred))
            print(classification_report(y.cpu(), pred, target_names=labels_dict.keys()))
        t.enable_grad()
        return total_loss, total_f1

    def fit(self, n_epochs):
        # to track the training loss as the model trains
        #train_losses = []
        # to track the validation loss as the model trains
        #valid_losses = []
        # to track the average training loss per epoch as the model trains
        avg_train_losses = []
        # to track the average validation loss per epoch as the model trains
        avg_valid_losses = []
        # store results
        #res = open('./results/' + self.model_name + '_results.txt', 'w')
        #res.write(50 * '=')
        #res.write('Model \n')
        #res.write(str(self._model) + '\n')

        # load the last checkpoint with the best model
        self.restore_checkpoint()

        # initialize the early_stopping object
        early_stopping = EarlyStopping(patience=self._early_stopping_patience, verbose=True)

        for epoch in range(1, n_epochs + 1):
            # train the model
            train_loss = self.train_epoch()
            # validate the model
            valid_loss, f1_score = self.val_test()

            # calculate average loss over an epoch
            #train_loss = np.average(train_losses)
            #train_loss = train_losses / len(self._train_dl)
            #valid_loss = np.average(valid_losses)
            
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "val_loss": valid_loss,
                #"auc_score": auc_score,
                "f1_score": f1_score
            })

            avg_train_losses.append(train_loss)
            avg_valid_losses.append(valid_loss)

            """
            # print training/validation statistics
            epoch_len = len(str(n_epochs))

            print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
                         f'train_loss: {train_loss:.5f} ' +
                         f'valid_loss: {valid_loss:.5f}')

            print(print_msg)
            """

            # early_stopping needs the validation loss to check if it has decreased,
            # if it has, it will make a checkpoint of the current model
            early_stopping(valid_loss, self._model, self.model_name, self.dataset)

            """
            # use the save_checkpoint function to save the model for each epoch
            save_flag = self._early_stopping_cb.step(l_dev)

            if save_flag:
                res.write(50 * '=')
                res.write('Epoch: ' + str(self.epoch) + ' Training Loss :' + str(l_train) + ' Development Loss :' + str(
                    l_dev))
                Trainer.save_checkpoint(self, self.epoch + 1, model_name)
                self.epoch_n = self.epoch + 1
            """
                

            if early_stopping.early_stop:
                print("Early stopping has been reached")
                break

            # load the last checkpoint with the best model
            #self._model.load_state_dict(t.load('checkpoint.pt'))
            #self.restore_checkpoint()
        
                #auc_score = log_metrics(preds, labels)["auc_micro"]
        #auc_score = log_metrics(pred, y)["auc_micro"]
        #print("AUC score: ", auc_score)
        #avg_train_loss, avg_val_loss = train_loss / len(train_loader), eval_loss / len(val_loader)

        #print("Average Train loss: ", avg_train_loss)
        #print("Average loss: ", total_loss)

        #if total_loss < best_val_loss:
         #   best_val_loss = total_loss
         #   t.save(model.state_dict(), "./best_model.pt")  
         #   print("Model saved as current val_loss is: ", best_val_loss)  
        
        # return model, avg_train_losses, avg_valid_losses
        #res.close()
        return avg_train_losses, avg_valid_losses
    
    def test(self):
        #avg_test_losses = []

        # load the last checkpoint with the best model
        self.restore_checkpoint()

        # initialize the early_stopping object
        #early_stopping = EarlyStopping(patience=self._early_stopping_patience, verbose=True)

        # validate the model
        test_loss = self.val_test()

        # calculate average loss over an epoch
        #train_loss = np.average(train_losses)
        #train_loss = train_losses / len(self._train_dl)
        #valid_loss = np.average(valid_losses)

        #avg_test_losses.append(test_loss)

            # early_stopping needs the validation loss to check if it has decreased,
            # if it has, it will make a checkpoint of the current model
            #early_stopping(test_loss, self._model, self.model_name, self.dataset)

            #if early_stopping.early_stop:
             #   print("Early stopping has been reached")
              #  break

        # return model, avg_train_losses, avg_valid_losses
        #res.close()
        #return avg_test_losses
        return test_loss

In [None]:
wandb.agent(sweep_id, function=trainer, count=6)

## Without WandB

In [16]:
#ft_df = np.load("data/AIBO/w2v/" + "aibo_ft.npy")
# hs_df = np.load("/Users/el/embrace/data/data_wav2vec2/icp/" + "icp_hs.npy")
#labels_df = np.load("data/AIBO/w2v/" + "aibo_labels.npy")

# labels_dict = {'ang': 1, 'exc': 3, 'fea': 2, 'fru': 4, 'hap': 3, 'neu': 0, 'oth': 6, 'sad': 4, 'sur': 5, 'xxx': 6}
labels_dict = {'A': 0, 'E': 1, 'N': 2, 'P': 3, 'R': 4}
ID_TO_CLASS = {v: k for k, v in labels_dict.items()}
w2v_classes = list(ID_TO_CLASS.keys())

#lb_df = np.array([labels_dict[letter] for letter in labels_df])

In [31]:
w2v_classes

[0, 1, 2, 3, 4]

In [17]:
BATCH_SIZE = 64
EPOCHS = 100

In [18]:
model = 'xvec'

In [19]:
PATH = "data/AIBO/wav2vec/"

x_train = np.load(PATH + model + "_aibo_ft_train.npy")
x_val = np.load(PATH + model + "_aibo_ft_val.npy")
x_test = np.load(PATH + model + "_aibo_ft_test.npy")

y_train = np.load(PATH + model + "_aibo_lb_train.npy", allow_pickle=True)
y_val = np.load(PATH + model + "_aibo_lb_val.npy", allow_pickle=True)
y_test = np.load(PATH + model + "_aibo_lb_test.npy", allow_pickle=True)

print(y_train.shape, y_val.shape, y_test.shape)       # (4696,) (935,) (1407,)
print(x_train.shape, x_val.shape, x_test.shape)       # (4696, 3072) (935, 3072) (1407, 3072)


(3139,) (84,) (102,)
(3139, 512) (84, 512) (102, 512)


In [20]:
train_dataset = TensorDataset(t.tensor(x_train), t.tensor(y_train))
# train_dataset = TensorDataset(torch.from_numpy(x_train_eval).float(), torch.from_numpy(y_train_eval).float())
val_dataset = TensorDataset(t.tensor(x_val), t.tensor(y_val))
test_dataset = TensorDataset(t.tensor(x_test), t.tensor(y_test))

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) # num_workers
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [21]:
labels_dict.keys()

dict_keys(['A', 'E', 'N', 'P', 'R'])

In [22]:
labels_dict.values()

dict_values([0, 1, 2, 3, 4])

In [23]:
np.unique(y_val)

array([0, 1, 2, 3, 4])

In [10]:
#my_x = ft_df
#my_y = lb_df

#split_index = int(my_y.shape[0]*0.8)
#print(split_index)

#all_indexes = list(range(my_x.shape[0]))
#test_indexes = all_indexes[(split_index + 1):]
#x_test = my_x[test_indexes]
#y_test = my_y[test_indexes]

#train_indexes = all_indexes[:(split_index + 1)]
#x_train = my_x[train_indexes]
#y_train = my_y[train_indexes]

#eval_split_index = 934
#all_indexes = list(range(x_train.shape[0]))
#eval_indexes = all_indexes[(eval_split_index + 1):]
#eval_train_indexes = all_indexes[:(eval_split_index + 1)]

#x_train_eval = x_train[eval_indexes]
#y_train_eval = y_train[eval_indexes]
#x_eval = x_train[eval_train_indexes]
#y_eval = y_train[eval_train_indexes]

# x_train_eval=np.vstack(x_train_eval).astype(np.float)
# y_train_eval=np.vstack(y_train_eval).astype(np.str)

In [11]:
#print(y_train_eval.shape, y_eval.shape, y_test.shape)       # (4696,) (935,) (1407,)
#print(x_train_eval.shape, x_eval.shape, x_test.shape)       # (4696, 3072) (935, 3072) (1407, 3072)

In [24]:
def get_device():
    if t.cuda.is_available():
        return "cuda"
    else:
        return "cpu"

In [54]:
labels_dict.keys()

dict_keys(['A', 'E', 'N', 'P', 'R'])

In [34]:
class Trainer:

    def __init__(self,
                 model,  # Model to be trained.
                 model_name,
                 dataset,
                 crit,  # Loss function
                 optim=None,  # Optimizer
                 train_dl=None,  # Training data set
                 val_test_dl=None,  # Validation data set
                 test_dl=None,  # Test data set
                 cuda=True,  # Whether to use the GPU
                 early_stopping_patience=-1):   # The patience for early stopping
                 #unsqueeze_needed=True
        self._model = model
        self.model_name = model_name
        self.dataset = dataset
        self._crit = crit
        self._optim = optim
        self._train_dl = train_dl
        self._val_test_dl = val_test_dl
        self._test_dl = test_dl
        self._cuda = cuda
        self._early_stopping_patience = early_stopping_patience
        #self._unsqueeze_needed = unsqueeze_needed
        

        if cuda:
            self._model = model.cuda()
            self._crit = crit.cuda()

    #def save_checkpoint(self, epoch):
     #   t.save({'state_dict': self._model.state_dict()}, 'checkpoints/checkpoint_{:03d}.ckp'.format(epoch))

    #def restore_checkpoint(self, epoch_n):
    def restore_checkpoint(self):
        path = 'checkpoints/' + self.model_name + '_checkpoint_{}.ckp'.format(get_datetime())
        if os.path.exists(path):
            #ckp = t.load('checkpoints/checkpoint_{:03d}.ckp'.format(epoch_n), 'cuda' if self._cuda else None)
            ckp = t.load(path, 'cuda' if self._cuda else None)
            self._model.load_state_dict(ckp['state_dict'])

    def save_onnx(self, fn):
        m = self._model.cpu()
        m.eval()
        x = t.randn(1, 3, 300, 300, requires_grad=True)
        y = self._model(x)
        t.onnx.export(m,  # model being run
                      x,  # model input (or a tuple for multiple inputs)
                      fn,  # where to save the model (can be a file or file-like object)
                      export_params=True,  # store the trained parameter weights inside the model file
                      opset_version=10,  # the ONNX version to export the model to
                      do_constant_folding=True,  # whether to execute constant folding for optimization
                      input_names=['input'],  # the model's input names
                      output_names=['output'],  # the model's output names
                      dynamic_axes={'input': {0: 'batch_size'},  # variable lenght axes
                                    'output': {0: 'batch_size'}})

    def train_step(self, x, y):
        # perform following steps:
        # -reset the gradients / clear the gradients of all optimized variables
        self._optim.zero_grad()
        # -propagate through the network / forward pass: compute predicted outputs by passing inputs to the model
        output = self._model.forward(x)
        # -calculate the loss
        loss = self._crit(output, y)
        # -compute gradient by backprop / backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # -update weights / perform a single optimization step (parameter update)
        self._optim.step()
        # -return the loss
        return loss, output

    def val_test_step(self, x, y):
        # predict
        # propagate through the network and calculate the loss and predictions
        pred = self._model.forward(x)
        # calculate the loss
        loss = self._crit(pred, y)
        # return the loss and the predictions
        return loss, pred

    def train_epoch(self):
        # set training mode / prepare model for training
        self._model.train()
        # iterate through the training set
        # clear lists to track next epoch
        total_loss = 0
        total_acc = 0
        for x, y in self._train_dl:
            # transfer the batch to "cuda()" -> the gpu if a gpu is given
            if self._cuda:
                x = x.cuda()
                y = y.cuda()
           # if self._unsqueeze_needed:
            #    x = x.unsqueeze(1)
            # perform a training step
            loss, pred = self.train_step(x, y)
            total_loss += loss.item()
            #total_acc += accuracy_score(y.cpu().detach().numpy(), np.hstack(pred))
            #total_acc += accuracy_score(y.cpu(), pred.cpu() > 0.5)
        # calculate the average loss for the epoch and return it
        total_loss = total_loss / len(self._train_dl)
        #total_acc = total_acc / len(self._train_dl)
        #print("Train: loss: {}, accuracy: {}".format(total_loss, total_acc))
        print("Train: loss: {}".format(total_loss))
        return total_loss

    def val_test(self, test=False):
        # set eval mode / prepare model for evaluation
        self._model.eval()
        # disable gradient computation (disable autograd engine)
        with t.no_grad():
            # iterate through the validation set
            # clear lists to track next epoch
            total_loss = 0
            total_acc = 0
            total_f1 = 0
            if test:
                dataset = self._test_dl
            else:
                dataset = self._val_test_dl
            for x, y in dataset:
                # transfer the batch to the gpu if given
                if self._cuda:
                    x = x.cuda()
                    y = y.cuda()
                #if self._unsqueeze_needed:
                 #   x = x.unsqueeze(1)

                # perform a validation step / forward pass: compute predicted outputs by passing inputs to the model
                loss, pred = self.val_test_step(x, y)       # pred.shape torch.Size([8, 5]) = bs, num_cl
                # calculate metrics for this iteration
                total_loss += loss.item()

                # deal with multilabel
                activation = t.nn.Softmax(dim=1)
                pred = activation(pred.data)
                pred = t.max(pred, 1)[1]    # choose maximum class index for the most predominant index
                # pred: tensor([4, 3, 2, 4, 0, 3, 4, 3])
                #pred = pred.cpu().detach()
                pred = pred.cpu().detach().numpy()

                # prepare to count predictions for each class
                correct_pred = {classname: 0 for classname in w2v_classes}
                total_pred = {classname: 0 for classname in w2v_classes}
                # collect the correct predictions for each class
                for label, prediction in zip(y, pred):
                    if label == prediction:
                        correct_pred[w2v_classes[label]] += 1
                    total_pred[w2v_classes[label]] += 1

                # print accuracy for each class
                #for classname, correct_count in correct_pred.items():
                 #   accuracy = 100 * float(correct_count) / total_pred[classname]
                    #print("Accuracy for class {:5s} is: {:.1f} %".format(classname, accuracy))
                  #  print("Accuracy for class {} is: {} %".format(classname, accuracy))

                #total_acc += accuracy_score(y.cpu(), pred.cpu() > 0.5)
                total_acc += accuracy_score(y.cpu().detach().numpy(), np.hstack(pred))
                #total_f1 += f1_score(y.cpu(), pred.cpu() > 0.5, average=None)
                total_f1 += f1_score(y.cpu().detach().numpy(), np.hstack(pred), average='weighted')
                # save the predictions and the labels for each batch

            # calculate the average loss and average metrics
            total_loss = total_loss / len(dataset)
            total_acc = total_acc / len(dataset)
            total_f1 = total_f1 / len(dataset)
             
            # return the loss and print the calculated metrics
            print("Test: loss: {}, accuracy: {}%, f-score: {}".format(total_loss, total_acc * 100, total_f1))
            print("y unique", t.unique(y))
            print("pred unique", np.unique(pred))
            #print(classification_report(y.cpu(), pred, target_names=labels_dict.keys()))
            print(classification_report(y.cpu(), pred, target_names=t.unique(y)))
        t.enable_grad()
        return total_loss, total_f1

    def fit(self, n_epochs):
        # to track the training loss as the model trains
        #train_losses = []
        # to track the validation loss as the model trains
        #valid_losses = []
        # to track the average training loss per epoch as the model trains
        avg_train_losses = []
        # to track the average validation loss per epoch as the model trains
        avg_valid_losses = []
        # store results
        #res = open('./results/' + self.model_name + '_results.txt', 'w')
        #res.write(50 * '=')
        #res.write('Model \n')
        #res.write(str(self._model) + '\n')

        # load the last checkpoint with the best model
        self.restore_checkpoint()

        # initialize the early_stopping object
        early_stopping = EarlyStopping(patience=self._early_stopping_patience, verbose=True)

        for epoch in range(1, n_epochs + 1):
            # train the model
            train_loss = self.train_epoch()
            # validate the model
            valid_loss, f1_score = self.val_test()

            # calculate average loss over an epoch
            #train_loss = np.average(train_losses)
            #train_loss = train_losses / len(self._train_dl)
            #valid_loss = np.average(valid_losses)

            avg_train_losses.append(train_loss)
            avg_valid_losses.append(valid_loss)

            """
            # print training/validation statistics
            epoch_len = len(str(n_epochs))

            print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
                         f'train_loss: {train_loss:.5f} ' +
                         f'valid_loss: {valid_loss:.5f}')

            print(print_msg)
            """

            # early_stopping needs the validation loss to check if it has decreased,
            # if it has, it will make a checkpoint of the current model
            early_stopping(valid_loss, self._model, self.model_name, self.dataset)

            """
            # use the save_checkpoint function to save the model for each epoch
            save_flag = self._early_stopping_cb.step(l_dev)

            if save_flag:
                res.write(50 * '=')
                res.write('Epoch: ' + str(self.epoch) + ' Training Loss :' + str(l_train) + ' Development Loss :' + str(
                    l_dev))
                Trainer.save_checkpoint(self, self.epoch + 1, model_name)
                self.epoch_n = self.epoch + 1
            """
                

            if early_stopping.early_stop:
                print("Early stopping has been reached")
                break

            # load the last checkpoint with the best model
            #self._model.load_state_dict(t.load('checkpoint.pt'))
            #self.restore_checkpoint()
        
                #auc_score = log_metrics(preds, labels)["auc_micro"]
        #auc_score = log_metrics(pred, y)["auc_micro"]
        #print("AUC score: ", auc_score)
        #avg_train_loss, avg_val_loss = train_loss / len(train_loader), eval_loss / len(val_loader)

        #print("Average Train loss: ", avg_train_loss)
        #print("Average loss: ", total_loss)

        #if total_loss < best_val_loss:
         #   best_val_loss = total_loss
         #   t.save(model.state_dict(), "./best_model.pt")  
         #   print("Model saved as current val_loss is: ", best_val_loss)  
        
        # return model, avg_train_losses, avg_valid_losses
        #res.close()
        return avg_train_losses, avg_valid_losses
    
    def test(self):
        #avg_test_losses = []

        # load the last checkpoint with the best model
        self.restore_checkpoint()

        # initialize the early_stopping object
        #early_stopping = EarlyStopping(patience=self._early_stopping_patience, verbose=True)

        # validate the model
        test_loss = self.val_test()

        # calculate average loss over an epoch
        #train_loss = np.average(train_losses)
        #train_loss = train_losses / len(self._train_dl)
        #valid_loss = np.average(valid_losses)

        #avg_test_losses.append(test_loss)

            # early_stopping needs the validation loss to check if it has decreased,
            # if it has, it will make a checkpoint of the current model
            #early_stopping(test_loss, self._model, self.model_name, self.dataset)

            #if early_stopping.early_stop:
             #   print("Early stopping has been reached")
              #  break

        # return model, avg_train_losses, avg_valid_losses
        #res.close()
        #return avg_test_losses
        return test_loss

In [26]:
class EarlyStopping:
    # https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, epoch=-1, verbose=False, delta=0, trace_func=print):
    #def __init__(self, patience=7, epoch=0, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.epoch = epoch
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.trace_func = trace_func

    def __call__(self, val_loss, model, model_name, dataset):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, model_name, dataset)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                print("EarlyStopping counter is higher than patience")
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, model_name, dataset)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, model_name, dataset):
        """
        Saves model when validation loss decreases
        """
        if not os.path.isdir('./checkpoints/'):
            os.makedirs('./checkpoints/')
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        #a.save(model.state_dict(), self.path)
        path = 'checkpoints/' + model_name + dataset + '_checkpoint_{}.ckp'.format(get_datetime())
        t.save({'state_dict': model.state_dict()}, path)
        self.val_loss_min = val_loss
        #t.save({'state_dict': self._model.state_dict()}, 'checkpoints/' + model_name + 'checkpoint.ckp')


def get_datetime():
    #return strftime("%Y-%m-%d_%H:%M:%S", gmtime())
    # storing per day to have different runs from different days
    return strftime("%Y-%m-%d", gmtime())

In [27]:
device = get_device()

In [28]:

"""
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            #nn.Linear(3072, 1000),     # icp_ft.npy
            nn.Linear(6144, 1000),
            nn.ReLU(),
            nn.Linear(1000, 7)
        )

    def forward(self, x):
        # convert tensor (128, 1, 28, 28) --> (128, 1*28*28)
        x = x.view(x.size(0), -1)
        #print(x.shape)
        x = self.layers(x)
        return x
"""


# defining model
class MLP(nn.Module):
    def __init__(self, in_dim, hidden_dim_1, hidden_dim_2, out_dim=2):
        super(MLP, self).__init__()

        self.in_dim = in_dim
        self.hidden_dim_1 = hidden_dim_1
        self.hidden_dim_2 = hidden_dim_2
        self.out_dim = out_dim

        ## 1st hidden layer
        self.linear_1 = nn.Linear(self.in_dim, self.hidden_dim_1)
        self.linear_1.weight.detach().normal_(0.0, 0.1)
        self.linear_1.bias.detach().zero_()
        self.linear_1_bn = nn.BatchNorm1d(self.hidden_dim_1, momentum=0.6)

        ## 2nd hidden layer
        self.linear_2 = nn.Linear(self.hidden_dim_1, self.hidden_dim_2)
        self.linear_2.weight.detach().normal_(0.0, 0.1)
        self.linear_2.bias.detach().zero_()
        self.linear_2_bn = nn.BatchNorm1d(self.hidden_dim_2, momentum=0.6)

        ## Out layer
        self.linear_out = nn.Linear(self.hidden_dim_2, self.out_dim)
        self.linear_out.weight.detach().normal_(0.0, 0.1)
        self.linear_out.bias.detach().zero_()

    def forward(self, x):
        out = self.linear_1(x)
        out = self.linear_1_bn(out)
        out = F.relu(out)

        out = self.linear_2(out)
        out = self.linear_2_bn(out)
        out = F.relu(out)
        out = F.dropout(out, p=0.175, training=self.training)

        out = self.linear_out(out)
        return out

In [41]:
x_train.shape

(3139, 512)

In [42]:
#model = MLP(6144, 3000, 1000, 7)
model = MLP(512, 3000, 1000, 7)
print(model)
#ummary(model, (64, 512))

criterion = nn.CrossEntropyLoss()

# set up the optimizer
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0)

trainer = Trainer(model, "w2v", "aibo", criterion, optimizer, train_loader, val_loader, test_loader, 
                  cuda=t.cuda.is_available(),\
                  early_stopping_patience=20)

# go, go, go... call fit on trainer
res = trainer.fit(100)

# plot the results
plt.plot(np.arange(len(res[0])), res[0], label='train loss')
plt.plot(np.arange(len(res[1])), res[1], label='val loss')
plt.yscale('log')
plt.legend()
plt.savefig('{}_losses_{}.png'.format("aibo", "w2v"))

MLP(
  (linear_1): Linear(in_features=512, out_features=3000, bias=True)
  (linear_1_bn): BatchNorm1d(3000, eps=1e-05, momentum=0.6, affine=True, track_running_stats=True)
  (linear_2): Linear(in_features=3000, out_features=1000, bias=True)
  (linear_2_bn): BatchNorm1d(1000, eps=1e-05, momentum=0.6, affine=True, track_running_stats=True)
  (linear_out): Linear(in_features=1000, out_features=7, bias=True)
)


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_mm)

In [19]:
print("Starting testing")
test_res = trainer.test()

Starting testing
Test: loss: 1.8888876736164093, accuracy: 61.71875%, f-score: 0.5146918113039609
