In [1]:
import numpy as np
import pandas as pd 
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
import torch as to
import torch.nn as nn 
import torch.nn.functional as func
from torch.utils import data
from torch.utils.data import DataLoader
import random 
import time 
import torch.nn.utils.rnn as rnnutils
from sklearn.model_selection import train_test_split
import torchviz
from torchsummary import summary
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score 
from itertools import tee
import hashlib

Using TensorFlow backend.



### Info 
This notebook is a continuation of the 'baseline.ipynb' nootebook. 

### Setup 
+> Added balancing of the data-set. Balancing is done in a way that samples are added until the less frequent class isn't same as more frequent class. Samples are added squentialy, not randomly, thus ensuring that the differenece of the most frequent sample and least frequent sample in the data-set is one.

+> f1 is now macro

+> batchnormalization was added befor convolution and removed :D 

+> we are currently stopping the training if f1 is not changing (max_constant_f1 denotes the max number of epochs that the f1 is not changing)

+> additonal data was added to the results dataframe (precision and recall for each class)

+> to ease the strain on memory this code has all input data is represented as indices, thus it runs a bit slower because it has to access the input dataframe to attain input vectors

+> original dataframe is preprocesed and deleted so that memory is freed 

+> now you can specify a gpu, ouput of all results, predictions and models (CONSTANT: 'MODELS_NAME')

In [2]:
#CELL METADATA
DATASET = '/home/mbosnjak/Datasets/embedded_comments_last_100_min_20com_per_auth_w_wc_10_200_no_mbti.csv'
FOLDS = '/home/mbosnjak/Datasets/comments_last_100_min_20com_per_auth_w_wc_10_200.csv.folds.csv'
AUTHORS = '/home/mbosnjak/Datasets/author_profiles.csv'

MODELS_NAME = 'baseline_take2_twin'
OUTPUT_DIR = f'mpr_{MODELS_NAME}/'
RESULTS = OUTPUT_DIR + f'results_{MODELS_NAME}.csv'

RELEVANT_DIFFERENCE = 0.00001
NUMBER_OF_CLASSES = 2
STATE = 156
no_of_folds = 2
cuda_device_index = 1

mbti_labels_1 = ['introverted', 'intuitive']
mbti_labels_2 = ['thinking', 'perceiving']
mbti_labels = mbti_labels_2 #+ mbti_labels_1
MODELS_PREFORMANCE_COLUMNS = ['val_f1', 'val_precision_0', 'val_precision_1', 'val_precision_macro' ,\
                              'val_recall_0', 'val_recall_1', 'val_recall_macro', \
                              'test_f1', 'test_precision_0', 'test_precision_1', 'test_precision_macro',\
                              'test_recall_0', 'test_recall_1', 'test_recall_macro',\
                              'epoch']
MODELS_PARAMETER_COLUMNS = ['mbti_trait', 'fold', 'learning_rate', 'batch_size',\
                           'kernels_count', 'sentences_count' ]
MODELS_META_DATA_COLUMNS = ['models_name', 'hash_id']
to.manual_seed(STATE)

<torch._C.Generator at 0x7f0d7798c830>

In [3]:
#CELL PARAMETERS
batch_sizes = [1, 4, 16, 32]
learning_rates = [0.001, 0.01, 0.0001, 0.00001, 0.000001, 0.0000001, 0.00000001, 0.000000001, 0.0000000001]
kernel_sizes = [4, 8, 12, 16]
sentences_counts = [2, 4]
n_epochs = 30
use_GPU = True
#TODO: this is sentences per author not comments per author, quick fix is max size 200*100 (200 max sentences by comment * 100 comments)
comments_per_author = 20000
#comments_per_author = 20
cuda_device = to.device(cuda_device_index) if use_GPU else None
validation_p = .2
print_status_batch = 1000
max_constant_f1 = 3

In [4]:
def get_checkpoints_name(models_identifier):
    return OUTPUT_DIR+"save_"+MODELS_NAME+'_'+models_identifier + ".pt"

def get_predictions_file_name(models_identifier, target, fold):
    return OUTPUT_DIR + f'{target}_{fold}_{MODELS_NAME}_{models_identifier}_predictions'

def delimiter():
    print('-'*23)

In [5]:
input_df = pd.read_csv(DATASET)
#input_df = pd.read_csv(DATASET, nrows = 2000) # TODO: remove nrows
folds_df = pd.read_csv(FOLDS, usecols =['author', 'fold'])
authors_profiles_df = pd.read_csv(AUTHORS)
authors_with_mbti = list(authors_profiles_df[authors_profiles_df.introverted.notnull()]['author'])

In [6]:
#TODO: add comment delimiter -> to recognize comments end and beginning
def merger(comments):
    npad = [(0, max(sentences_counts)), (0, 0)]
    carpet = comments[:comments_per_author].as_matrix(columns=comments.columns[1:1025])
    carpet_padded = np.pad(carpet, pad_width=npad, mode='constant', constant_values=0)
    #print(f'before {carpet.shape} - after {carpet_padded.shape}')
    return to.tensor(carpet_padded)

In [7]:
#Data preparation
data_df = input_df[input_df['author'].isin(authors_with_mbti)]
del input_df
present_authors = sorted(data_df['author'].unique())
authors_indices = dict(zip(present_authors , list(range(len(present_authors)))))
input_x_df = data_df.sort_values(by=['author', 'Unnamed: 0']).groupby(['author']).apply(merger).tolist()
del data_df

  after removing the cwd from sys.path.


In [8]:
results = pd.DataFrame(data=None, columns=MODELS_META_DATA_COLUMNS+MODELS_PREFORMANCE_COLUMNS+MODELS_PARAMETER_COLUMNS)
for name in MODELS_PREFORMANCE_COLUMNS:
    results = results.astype({name: 'float64'})

In [9]:
def set_hash(index):
    global results
    identifier = hashlib.md5(''.join([str(x) for x in results[MODELS_PARAMETER_COLUMNS].iloc[index]]).encode('utf-8')).hexdigest()
    results.at[index, 'hash_id'] = identifier
    return identifier

def create_new_model_input(mbti_trait, fold, learning_rate, batch_size,\
                           kernels_count, sentences_count):
    global results
    results = results.append({'models_name':MODELS_NAME, 'mbti_trait' : mbti_trait, 'fold' : fold, 'learning_rate' : learning_rate, 'batch_size' : batch_size, 'kernels_count' : kernels_count, 'sentences_count' : sentences_count}, ignore_index=True)
    return set_hash(len(results)-1)    

def update_models_val_results(models_identifier, val_f1, val_precision_0, val_precision_1, val_precision_m, val_recall_0, val_recall_1, val_recall_m, epoch):
    global results
    results.loc[results['hash_id'] == models_identifier, 'val_f1'] = val_f1
    results.loc[results['hash_id'] == models_identifier, 'val_precision_0'] = val_precision_0
    results.loc[results['hash_id'] == models_identifier, 'val_precision_1'] = val_precision_1
    results.loc[results['hash_id'] == models_identifier, 'val_precision_macro'] = val_precision_m
    results.loc[results['hash_id'] == models_identifier, 'val_recall_0'] = val_recall_0
    results.loc[results['hash_id'] == models_identifier, 'val_recall_1'] = val_recall_1
    results.loc[results['hash_id'] == models_identifier, 'val_recall_macro'] = val_recall_m
    results.loc[results['hash_id'] == models_identifier, 'epoch'] = epoch
    
def update_models_test_results(models_identifier, test_f1, test_precision_0, test_precision_1, precision_m, test_recall_0, test_recall_1, test_recall_m):
    global results
    results.loc[results['hash_id'] == models_identifier, 'test_f1'] = test_f1
    results.loc[results['hash_id'] == models_identifier, 'test_precision_0'] = test_precision_0
    results.loc[results['hash_id'] == models_identifier, 'test_precision_1'] = test_precision_1
    results.loc[results['hash_id'] == models_identifier, 'test_precision_macro'] = test_precision_m
    results.loc[results['hash_id'] == models_identifier, 'test_recall_0'] = test_recall_0
    results.loc[results['hash_id'] == models_identifier, 'test_recall_1'] = test_recall_1
    results.loc[results['hash_id'] == models_identifier, 'test_recall_macro'] = test_recall_m
    
def get_best_models_data(target, fold):
    return results.loc[results[(results.mbti_trait == target) & (results.fold == fold)]['val_f1'].idxmax()]
    

In [10]:
create_new_model_input('dfsfdsf',333,3,223333,1,13)

'e5a677ccc00e12a5dc84842655a04262'

[1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0

### FLOW

In [11]:
def get_input_output(data_authors, target):
    output_prep = authors_profiles_df[authors_profiles_df['author'].isin(data_authors)][target].tolist()
    input_indices = [index for author, index in authors_indices.items() if author in data_authors]
    return input_indices, output_prep

def balance_binary_data(input_df, output_df):
    positive_indices = [index for index, element in enumerate(output_df) if element == 1.0]
    negative_indices = [index for index, element in enumerate(output_df) if element == 0.0]
    negative_count = len(negative_indices)
    positive_count = len(positive_indices)
    
    if(positive_count > negative_count):
        return create_balanced_data(positive_indices, negative_indices, input_df, output_df)
    elif(negative_count > positive_count):
        return create_balanced_data(negative_indices, positive_indices, input_df, output_df)
        
def create_balanced_data(more_frequent, less_frequent, input_df, output_df):
    more_frequent_count = len(more_frequent)
    less_frequent_count = len(less_frequent)
    constant_multiplyer = more_frequent_count // less_frequent_count
    remaining_additions = more_frequent_count % less_frequent_count
    balanced_indices = less_frequent*constant_multiplyer + less_frequent[:remaining_additions] + more_frequent
    output_df_balanced = [output_df[index] for index in balanced_indices]
    input_df_balanced = [input_df[index] for index in balanced_indices]
    return input_df_balanced, output_df_balanced
    
def data_preparation(target, fold):
    test_data_authors = folds_df[(folds_df['fold'] == fold) & (folds_df['author'].isin(present_authors))]['author'].tolist()
    train_data_authors = folds_df[(folds_df['fold'] != fold) & (folds_df['author'].isin(present_authors))]['author'].tolist()
    train_input_indices, train_output = get_input_output(train_data_authors, target)
    test_input_indices, test_output = get_input_output(test_data_authors, target)

    train_input_indices, val_input_indices, train_output, val_output = train_test_split(train_input_indices, train_output, test_size=validation_p, random_state=STATE)
    assert len(train_input_indices) == len(train_output)
    assert len(test_input_indices) == len(test_output)
    assert len(val_input_indices) == len(val_output)
    train_input_indices, train_output = balance_binary_data(train_input_indices, train_output)
    return train_input_indices, train_output, val_input_indices, val_output, test_input_indices, test_output

def chunks(l, n):
    return [l[i:i + n] for i in range(0, len(l), n)]  

def create_minibatches(data_X, data_y, minibatch_size, cuda_dev):
    for idx_list in chunks(range(len(data_X)), minibatch_size):
        data_X_indices = [data_X[index] for index in idx_list]
        data_y_idx = [int(data_y[index]) for index in idx_list]
        
        minibatch_X = [input_x_df[index] for index in data_X_indices]
        minibatch_X = rnnutils.pad_sequence(minibatch_X, batch_first=True, padding_value = 0) 
        minibatch_X = minibatch_X.unsqueeze(1)
        minibatch_y = to.tensor(data_y_idx)
        if cuda_dev is not None:
            minibatch_X = minibatch_X#.to(device=cuda_dev, dtype=to.float)
            minibatch_y = minibatch_y.to(device=cuda_dev, dtype=to.long)

        yield((minibatch_X, minibatch_y))   


In [12]:
class ConvoCarpet(nn.Module):
    def __init__ (self, embedding_size = 1024, kernels_count=64, sentences_count=2):
        super(ConvoCarpet, self).__init__()
        self.batch_norm = nn.BatchNorm2d(1)
        self.conv_layer = nn.Conv2d(1, kernels_count, [sentences_count, embedding_size])
        self.pool_layer = nn.AdaptiveMaxPool2d((1, None))
        self.fc_layer = nn.Linear(kernels_count, NUMBER_OF_CLASSES)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, input_batch):
        #normalized_input = self.batch_norm(input_batch)
        conv_output = func.relu(self.conv_layer(input_batch))
        maxpool_output = self.pool_layer(conv_output)
        maxpool_output = maxpool_output.flatten(start_dim=1)
        linear_output = self.fc_layer(maxpool_output)
        output = self.softmax(linear_output)
        #return func.log_softmax(linear_output)
        return output


In [13]:
def calcMeasures(total_logits, total_true, threshold):
    total_preds = []
    for minibatch_logits in total_logits:
        minibatch_preds = minibatch_logits[:,1] > threshold
        total_preds.append(minibatch_preds)
    total_preds = to.cat(total_preds)
    total_true = to.cat(total_true)
    F1 = f1_score(total_true.cpu(), total_preds.cpu(), pos_label = 1, average='macro')
    precision_m = precision_score(total_true.cpu(), total_preds.cpu(), average='macro')
    precision_1 = precision_score(total_true.cpu(), total_preds.cpu(), pos_label = 1)
    precision_0 = precision_score(total_true.cpu(), total_preds.cpu(), pos_label = 0)
    recall_m = recall_score(total_true.cpu(), total_preds.cpu(), average='macro')
    recall_1 = recall_score(total_true.cpu(), total_preds.cpu(), pos_label = 1)
    recall_0 = recall_score(total_true.cpu(), total_preds.cpu(), pos_label = 0)
    return F1, precision_0, precision_1, precision_m, recall_0, recall_1, recall_m

def apply_model(model, loss_func, data_loader):
    model.eval()
    total_loss = 0
    predicted = []
    true_output = []
    minibatch_loss = 0
    with to.no_grad():
        n_batch = 0
        for inputs, labels in data_loader:
            n_batch += 1
            minibatch_logits = model(inputs.to(device=cuda_device, dtype=to.float))
            minibatch_loss = loss_func(minibatch_logits, labels)
            
            total_loss += minibatch_loss
            predicted.append(minibatch_logits)
            true_output.append(labels)
            
    return total_loss, predicted, true_output

def optimize_models_params(trait, fold, train_input_indices, train_output, val_input_indices, val_output):
    print('Start training ...')
    
    loss = nn.CrossEntropyLoss()
    
    for batch_size in batch_sizes:
        for kernel_size in kernel_sizes:
            for sent_size in sentences_counts:
                for learning_rate in learning_rates:                
                    
                    train_loader = create_minibatches(train_input_indices, train_output, batch_size, cuda_device)
                    val_loader = create_minibatches(val_input_indices, val_output, batch_size, cuda_device)
                    
                    delimiter()
                    delimiter()
                    print(f'Starting training for BS: {batch_size} KS: {kernel_size} SS: {sent_size} LR: {learning_rate}')
                    delimiter()
                    
                    models_identifier = create_new_model_input(trait, fold, learning_rate, batch_size, kernel_size, sent_size)
                    
                    model = ConvoCarpet(kernels_count=kernel_size, sentences_count=sent_size)
                    if use_GPU:
                          model.to(cuda_device)

                    optimizer = to.optim.Adam(model.parameters(), learning_rate, amsgrad = True)

                    train_model(train_loader, val_loader, models_identifier, model, loss, optimizer)
                    results.to_csv(RESULTS)
    

def train_model(train_loader, val_loader, models_identifier, model, loss, optimizer):
    best_f1_val = 0
    last_f1 = 0
    counter = 0
    
    for epoch in range(0, n_epochs):
        epoch_start = time.time()
        model.train()
        total_loss = 0
        n_batch = 0
        train_loader, train_loader_backup = tee(train_loader)
        for inputs, labels in train_loader_backup:

            n_batch += 1
            #print(f"Current batch is {n_batch}")

            model.zero_grad()

            logits = model(inputs.to(device=cuda_device, dtype=to.float))
            minibatch_loss = loss(logits, labels)
            total_loss += minibatch_loss
            minibatch_loss.backward()
            optimizer.step()
            if(n_batch % print_status_batch == 0):
                
                delimiter()
                print(f"Train - Epoch {epoch} - batch {n_batch}, batch loss is {minibatch_loss:.6f}, total loss is {total_loss:.6f}")
                delimiter()

                
        delimiter()
        val_loader, val_loader_backup = tee(val_loader)
        val_loss, val_logits, true_val = apply_model(model, loss, val_loader_backup)
        val_f1_score, val_precision_0, val_precision_1, val_precision_m, val_recall_0, val_recall_1, val_recall_m = calcMeasures(val_logits, true_val, 0.5)
        
        
        print(f'Epoch {epoch} end: {time.time()-epoch_start}')
        print(f'Validation loss: {val_loss:.7f} - F1 score: {val_f1_score:.7f}')
        print(f'0 class -> precision: {val_precision_0:.7f} - recall: {val_recall_0:.7f}')
        print(f'1 class -> precision: {val_precision_1:.7f} - recall: {val_recall_1:.7f}')
        print(f'precision: {val_precision_m:.7f} - recall: {val_recall_m:.7f} - MACRO')
        delimiter()
        
        if (abs(val_f1_score - last_f1) <= RELEVANT_DIFFERENCE):
            counter += 1
        else:
            counter = 0
        
        last_f1 = val_f1_score
        
        if(counter > max_constant_f1):
            return
        
        if (val_f1_score > best_f1_val):
            update_models_val_results(models_identifier, val_f1_score, val_precision_0, val_precision_1, val_precision_m, val_recall_0, val_precision_1, val_recall_m, epoch)
            best_f1_val = val_f1_score
        # save a checkpoint
            best_checkpoint_filename = get_checkpoints_name(models_identifier)
            to.save({
              "models_identifier" : models_identifier, 
              "model_state_dict" : model.state_dict(),
              "optimizer_state_dict" : optimizer.state_dict()
            }, best_checkpoint_filename)
    

In [None]:
for target in mbti_labels:
    print(f"Creating model for target: {target}")
    for fold in range(0, no_of_folds):
        
        print(f'Processing fold {fold}...')
        
        train_input_indices, train_output, val_input_indices, val_output, test_input_indices, test_output = data_preparation(target, fold)
        
        optimize_models_params(target, fold, train_input_indices, train_output, val_input_indices, val_output)
        
        
        best_models = get_best_models_data(target, fold)
        #Recreate model
        models_identifier = best_models['hash_id']
        model = ConvoCarpet(kernels_count=best_models['kernels_count'] , sentences_count=best_models['sentences_count'])
        if use_GPU:
              model.to(cuda_device)

        checkpoint = to.load(get_checkpoints_name(models_identifier))
        model.load_state_dict(checkpoint["model_state_dict"])
        loss = nn.CrossEntropyLoss()

        
        print("."*15)
        print("."*15)
        
        print(f'Apply best model to test for {target} on fold {fold}.')
        test_loader = create_minibatches(test_input_indices, test_output, 1, cuda_device)
        test_loss, test_logits, test_true = apply_model(model, loss, test_loader)
        test_f1_score, test_precision_0, test_precision_1, test_precision_m, test_recall_0, test_recall_1, test_recall_m = calcMeasures(test_logits, test_true, 0.5)
        print(f'Test loss: {test_loss:.5f} - F1 score: {test_f1_score:.7f} ')
        print(f'0 class -> precision: {test_precision_0:.7f} - recall: {test_recall_0:.7f}')
        print(f'1 class -> precision: {test_precision_1:.7f} - recall: {test_recall_1:.7f}')
        print(f'precision: {test_precision_m:.7f} - recall: {test_recall_m:.7f} - MACRO')
              
        update_models_test_results(models_identifier, test_f1_score, test_precision_0, test_precision_1, test_precision_m, test_recall_0, test_recall_1, test_recall_m)
        with open(get_predictions_file_name(models_identifier, target, fold), 'w+') as f:
            for pre_l, true_l in zip(test_logits, test_true):
                f.write(f'{pre_l.cpu()[0][0]}, {pre_l.cpu()[0][1]}, {true_l.cpu().tolist()[0]}\n')
        results.to_csv(RESULTS)
        
        ##Save best models data
        
        print(f"+++ Finished with training and testing model for {target} on fold {fold}. +++")
        
        print("."*15)
        print("."*15)

Creating model for target: thinking
Processing fold 0...
Start training ...
-----------------------
-----------------------
Starting training for BS: 1 KS: 4 SS: 2 LR: 0.001
-----------------------
-----------------------
Train - Epoch 0 - batch 1000, batch loss is 0.313273, total loss is 314.536163
-----------------------
-----------------------
Train - Epoch 0 - batch 2000, batch loss is 0.313278, total loss is 627.818726
-----------------------
-----------------------
Train - Epoch 0 - batch 3000, batch loss is 0.313288, total loss is 941.108765
-----------------------
-----------------------
Train - Epoch 0 - batch 4000, batch loss is 1.313241, total loss is 1631.354614
-----------------------
-----------------------
Train - Epoch 0 - batch 5000, batch loss is 0.313262, total loss is 2504.381836
-----------------------
-----------------------
Train - Epoch 0 - batch 6000, batch loss is 0.313262, total loss is 2817.614258
-----------------------
-----------------------
Train - Epoch

-----------------------
Epoch 1 end: 15.349604606628418
Validation loss: 890.9356689 - F1 score: 0.4058824
0 class -> precision: 0.0000000 - recall: 0.0000000
1 class -> precision: 0.6831683 - recall: 1.0000000
precision: 0.3415842 - recall: 0.5000000 - MACRO
-----------------------
-----------------------
Train - Epoch 2 - batch 1000, batch loss is 1.313262, total loss is 1313.244263
-----------------------
-----------------------
Train - Epoch 2 - batch 2000, batch loss is 1.313262, total loss is 2626.476807
-----------------------
-----------------------
Train - Epoch 2 - batch 3000, batch loss is 1.313262, total loss is 3939.709229
-----------------------
-----------------------
Train - Epoch 2 - batch 4000, batch loss is 0.313262, total loss is 4876.156738
-----------------------
-----------------------
Train - Epoch 2 - batch 5000, batch loss is 0.313262, total loss is 5189.633301
-----------------------
-----------------------
Train - Epoch 2 - batch 6000, batch loss is 0.313262

-----------------------
Train - Epoch 3 - batch 7000, batch loss is 0.314167, total loss is 4169.565918
-----------------------
-----------------------
Epoch 3 end: 16.908008813858032
Validation loss: 890.6162109 - F1 score: 0.4058824
0 class -> precision: 0.0000000 - recall: 0.0000000
1 class -> precision: 0.6831683 - recall: 1.0000000
precision: 0.3415842 - recall: 0.5000000 - MACRO
-----------------------
-----------------------
Train - Epoch 4 - batch 1000, batch loss is 1.189385, total loss is 1293.090698
-----------------------
-----------------------
Train - Epoch 4 - batch 2000, batch loss is 0.314422, total loss is 1699.052612
-----------------------
-----------------------
Train - Epoch 4 - batch 3000, batch loss is 0.313344, total loss is 2012.830322
-----------------------
-----------------------
Train - Epoch 4 - batch 4000, batch loss is 1.311353, total loss is 2702.719238
-----------------------
-----------------------
Train - Epoch 4 - batch 5000, batch loss is 0.322381

-----------------------
Train - Epoch 0 - batch 5000, batch loss is 0.781751, total loss is 3375.162354
-----------------------
-----------------------
Train - Epoch 0 - batch 6000, batch loss is 0.703855, total loss is 4134.833496
-----------------------
-----------------------
Train - Epoch 0 - batch 7000, batch loss is 0.716696, total loss is 4842.217285
-----------------------
-----------------------
Epoch 0 end: 20.621358394622803
Validation loss: 969.2320557 - F1 score: 0.4657647
0 class -> precision: 0.3030303 - recall: 0.1116071
1 class -> precision: 0.6813451 - recall: 0.8809524
precision: 0.4921877 - recall: 0.4962798 - MACRO
-----------------------
-----------------------
Train - Epoch 1 - batch 1000, batch loss is 0.676416, total loss is 692.784180
-----------------------
-----------------------
Train - Epoch 1 - batch 2000, batch loss is 0.627053, total loss is 1338.777954
-----------------------
-----------------------
Train - Epoch 1 - batch 3000, batch loss is 0.561171,

-----------------------
Train - Epoch 7 - batch 5000, batch loss is 0.692232, total loss is 3482.989258
-----------------------
-----------------------
Train - Epoch 7 - batch 6000, batch loss is 0.653302, total loss is 4174.682129
-----------------------
-----------------------
Train - Epoch 7 - batch 7000, batch loss is 0.653024, total loss is 4818.866211
-----------------------
-----------------------
Epoch 7 end: 14.972870349884033
Validation loss: 938.7424927 - F1 score: 0.4080254
0 class -> precision: 0.5000000 - recall: 0.0022321
1 class -> precision: 0.6834278 - recall: 0.9989648
precision: 0.5917139 - recall: 0.5005985 - MACRO
-----------------------
-----------------------
Train - Epoch 8 - batch 1000, batch loss is 0.751035, total loss is 742.987915
-----------------------
-----------------------
Train - Epoch 8 - batch 2000, batch loss is 0.707142, total loss is 1432.931396
-----------------------
-----------------------
Train - Epoch 8 - batch 3000, batch loss is 0.595849,