In [17]:
import numpy as np
import pandas as pd 
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
import torch as to
import torch.nn as nn 
import torch.nn.functional as func
from torch.utils import data
from torch.utils.data import DataLoader
import random 
import time 
import torch.nn.utils.rnn as rnnutils
from sklearn.model_selection import train_test_split
import torchviz
from torchsummary import summary
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score 
from itertools import tee
import hashlib
from torch.optim.lr_scheduler import StepLR
import gc
import os 


### Info 
This notebook is a continuation of the 'baseline_take2_twin.ipynb' nootebook. 

### Setup 
+> we have only added one hidden layer 
+> added l1 and l2 regularization 

### Tech fixes 
+> added automatic directory creation

In [125]:
mbti_labels_1 = ['introverted', 'intuitive']
mbti_labels_2 = ['thinking', 'perceiving']

#Before each run 
RUN_IDENTIFICATOR = 'all_run5'
mbti_labels = mbti_labels_1+mbti_labels_2

In [126]:
#CELL METADATA
DATASET = '/home/mbosnjak/Datasets/embedded_comments_last_100_min_20com_per_auth_w_wc_10_200_no_mbti.csv'
FOLDS = '/home/mbosnjak/Datasets/comments_last_100_min_20com_per_auth_w_wc_10_200.csv.folds.csv'
AUTHORS = '/home/mbosnjak/Datasets/author_profiles.csv'

MODELS_NAME = 'baseline3'
OUTPUT_DIR = f'mpr_{MODELS_NAME}/'
RESULTS = OUTPUT_DIR + f'results_{MODELS_NAME}_{RUN_IDENTIFICATOR}.csv'

RELEVANT_DIFFERENCE = 0.0001
NUMBER_OF_CLASSES = 2
STATE = 156
cuda_device_index = 1

L1_Type = 'L1'
L2_Type = 'L2'
NO_REGULARIZATION = 'None'
MODELS_PREFORMANCE_COLUMNS = ['val_f1', 'val_precision_0', 'val_precision_1', 'val_precision_macro' ,\
                              'val_recall_0', 'val_recall_1', 'val_recall_macro', \
                              'test_f1', 'test_precision_0', 'test_precision_1', 'test_precision_macro',\
                              'test_recall_0', 'test_recall_1', 'test_recall_macro',\
                              'epoch']
MODELS_PARAMETER_COLUMNS = ['mbti_trait', 'fold', 'learning_rate', 'batch_size',\
                           'kernels_count', 'sentences_count', 'hidden_layer1', 'alpha', 'regularization_type',\
                            'decay_rate', 'decay_epoch', 'run_identificator']
MODELS_META_DATA_COLUMNS = ['models_name', 'hash_id']

to.manual_seed(STATE)
use_GPU = True
cuda_device = to.device(cuda_device_index) if use_GPU else None
validation_p = .2
print_status_batch = 100
max_constant_f1 = 6

In [128]:
#CELL PARAMETERS #12,4,12, 5*10^(-6)
batch_sizes = [64]
learning_rates = [5*10**-6, 10**-6, 10**-5, 10**-4]
kernel_sizes = [12, 16]
sentences_counts = [4]
hidden_layer1_sizes = [12, 16]

alphas = [0.001, 0.0001, 0.2, 0.3, 0.002, 0.27]
regularization_types = [L1_Type, L2_Type, NO_REGULARIZATION]

n_epochs = 150
STEP_SIZE = 60000
GAMMA = 0.5
no_of_folds = 5
#TODO: this is sentences per author not comments per author, quick fix is max size 200*100 (200 max sentences by comment * 100 comments)
comments_per_author = 20000

In [129]:
results = pd.DataFrame(data=None, columns=MODELS_META_DATA_COLUMNS+MODELS_PARAMETER_COLUMNS+MODELS_PREFORMANCE_COLUMNS)
for name in MODELS_PREFORMANCE_COLUMNS:
    results = results.astype({name: 'float64'})
for name in MODELS_META_DATA_COLUMNS:
    results = results.astype({name: 'object'})

In [20]:
if(not os.path.isdir(OUTPUT_DIR)):
    os.makedirs(OUTPUT_DIR)

In [32]:
def get_checkpoints_name(models_identifier):
    return OUTPUT_DIR+"save_" + models_identifier + f'_{RUN_IDENTIFICATOR}.pt"

def get_predictions_file_name(models_identifier, target, fold):
    return OUTPUT_DIR + f'{target}_{fold}_{RUN_IDENTIFICATOR}_{models_identifier}_predictions'

def delimiter():
    print('-'*23)

In [33]:
input_df = pd.read_csv(DATASET)
#input_df = pd.read_csv(DATASET, nrows = 2000) # TODO: remove nrows
folds_df = pd.read_csv(FOLDS, usecols =['author', 'fold'])
authors_profiles_df = pd.read_csv(AUTHORS)
authors_with_mbti = list(authors_profiles_df[authors_profiles_df.introverted.notnull()]['author'])

In [35]:
#TODO: add comment delimiter -> to recognize comments end and beginning
def merger(comments):
    npad = [(0, max(sentences_counts)), (0, 0)]
    carpet = comments[:comments_per_author].as_matrix(columns=comments.columns[1:1025])
    carpet_padded = np.pad(carpet, pad_width=npad, mode='constant', constant_values=0)
    #print(f'before {carpet.shape} - after {carpet_padded.shape}')
    return to.tensor(carpet_padded)

In [34]:
#Data preparation
data_df = input_df[input_df['author'].isin(authors_with_mbti)]
del input_df
present_authors = sorted(data_df['author'].unique())
authors_indices = dict(zip(present_authors , list(range(len(present_authors)))))
input_x_df = data_df.sort_values(by=['author', 'Unnamed: 0']).groupby(['author']).apply(merger).tolist()
del data_df

  after removing the cwd from sys.path.


In [130]:
gc.collect()

6643

In [137]:
def set_hash(index):
    global results
    identifier = hashlib.md5(''.join([str(x) for x in results[MODELS_PARAMETER_COLUMNS].iloc[index]]).encode('utf-8')).hexdigest()
    results.at[index, 'hash_id'] = identifier
    return identifier

def create_new_model_input(mbti_trait, fold, learning_rate, batch_size,\
                           kernels_count, sentences_count, hidden, alpha,\
                          regularization):
    global results
    results = results.append({'models_name':MODELS_NAME, 'mbti_trait' : mbti_trait,\
                              'fold' : fold, 'learning_rate' : learning_rate,\
                              'batch_size' : batch_size, 'kernels_count' : kernels_count,\
                              'sentences_count' : sentences_count, 'hidden_layer1': hidden,\
                              'alpha': alpha, 'regularization_type' : regularization, 'decay_rate' : GAMMA,\
                              'decay_epoch' : STEP_SIZE, 'run_identificator' : RUN_IDENTIFICATOR,\
                             'hash_id': 'DUMMY_VALUE'}, ignore_index=True)
    return set_hash(len(results)-1)    

def update_models_val_results(models_identifier, val_f1, val_precision_0, val_precision_1, val_precision_m, val_recall_0, val_recall_1, val_recall_m, epoch):
    global results
    results.loc[results['hash_id'] == models_identifier, 'val_f1'] = val_f1
    results.loc[results['hash_id'] == models_identifier, 'val_precision_0'] = val_precision_0
    results.loc[results['hash_id'] == models_identifier, 'val_precision_1'] = val_precision_1
    results.loc[results['hash_id'] == models_identifier, 'val_precision_macro'] = val_precision_m
    results.loc[results['hash_id'] == models_identifier, 'val_recall_0'] = val_recall_0
    results.loc[results['hash_id'] == models_identifier, 'val_recall_1'] = val_recall_1
    results.loc[results['hash_id'] == models_identifier, 'val_recall_macro'] = val_recall_m
    results.loc[results['hash_id'] == models_identifier, 'epoch'] = epoch
    
def update_models_test_results(models_identifier, test_f1, test_precision_0, test_precision_1, precision_m, test_recall_0, test_recall_1, test_recall_m):
    global results
    results.loc[results['hash_id'] == models_identifier, 'test_f1'] = test_f1
    results.loc[results['hash_id'] == models_identifier, 'test_precision_0'] = test_precision_0
    results.loc[results['hash_id'] == models_identifier, 'test_precision_1'] = test_precision_1
    results.loc[results['hash_id'] == models_identifier, 'test_precision_macro'] = test_precision_m
    results.loc[results['hash_id'] == models_identifier, 'test_recall_0'] = test_recall_0
    results.loc[results['hash_id'] == models_identifier, 'test_recall_1'] = test_recall_1
    results.loc[results['hash_id'] == models_identifier, 'test_recall_macro'] = test_recall_m
    
def get_best_models_data(target, fold):
    return results.loc[results[(results.mbti_trait == target) & (results.fold == fold)]['val_f1'].idxmax()]
    

In [132]:
results

Unnamed: 0,models_name,hash_id,mbti_trait,fold,learning_rate,batch_size,kernels_count,sentences_count,hidden_layer1,alpha,...,val_recall_1,val_recall_macro,test_f1,test_precision_0,test_precision_1,test_precision_macro,test_recall_0,test_recall_1,test_recall_macro,epoch


### FLOW

In [133]:
def get_input_output(data_authors, target):
    output_prep = authors_profiles_df[authors_profiles_df['author'].isin(data_authors)][target].tolist()
    input_indices = [index for author, index in authors_indices.items() if author in data_authors]
    return input_indices, output_prep

def balance_binary_data(input_df, output_df):
    positive_indices = [index for index, element in enumerate(output_df) if element == 1.0]
    negative_indices = [index for index, element in enumerate(output_df) if element == 0.0]
    negative_count = len(negative_indices)
    positive_count = len(positive_indices)
    
    if(positive_count > negative_count):
        return create_balanced_data(positive_indices, negative_indices, input_df, output_df)
    elif(negative_count > positive_count):
        return create_balanced_data(negative_indices, positive_indices, input_df, output_df)
        
def create_balanced_data(more_frequent, less_frequent, input_df, output_df):
    more_frequent_count = len(more_frequent)
    less_frequent_count = len(less_frequent)
    constant_multiplyer = more_frequent_count // less_frequent_count
    remaining_additions = more_frequent_count % less_frequent_count
    balanced_indices = less_frequent*constant_multiplyer + less_frequent[:remaining_additions] + more_frequent
    output_df_balanced = [output_df[index] for index in balanced_indices]
    input_df_balanced = [input_df[index] for index in balanced_indices]
    return input_df_balanced, output_df_balanced
    
def data_preparation(target, fold):
    test_data_authors = folds_df[(folds_df['fold'] == fold) & (folds_df['author'].isin(present_authors))]['author'].tolist()
    train_data_authors = folds_df[(folds_df['fold'] != fold) & (folds_df['author'].isin(present_authors))]['author'].tolist()
    train_input_indices, train_output = get_input_output(train_data_authors, target)
    test_input_indices, test_output = get_input_output(test_data_authors, target)

    train_input_indices, val_input_indices, train_output, val_output = train_test_split(train_input_indices, train_output, test_size=validation_p, random_state=STATE)
    assert len(train_input_indices) == len(train_output)
    assert len(test_input_indices) == len(test_output)
    assert len(val_input_indices) == len(val_output)
    return train_input_indices, train_output, val_input_indices, val_output, test_input_indices, test_output

def chunks(l, n):
    return [l[i:i + n] for i in range(0, len(l), n)]  

def create_minibatches(data_X, data_y, minibatch_size, cuda_dev):
    for idx_list in chunks(range(len(data_X)), minibatch_size):
        data_X_indices = [data_X[index] for index in idx_list]
        data_y_idx = [int(data_y[index]) for index in idx_list]
        
        minibatch_X = [input_x_df[index] for index in data_X_indices]
        minibatch_X = rnnutils.pad_sequence(minibatch_X, batch_first=True, padding_value = 0) 
        minibatch_X = minibatch_X.unsqueeze(1)
        minibatch_y = to.tensor(data_y_idx)
        if cuda_dev is not None:
            minibatch_X = minibatch_X#.to(device=cuda_dev, dtype=to.float)
            minibatch_y = minibatch_y.to(device=cuda_dev, dtype=to.long)

        yield((minibatch_X, minibatch_y))   


In [134]:
class ConvoCarpet(nn.Module):
    def __init__ (self, embedding_size = 1024, kernels_count=64, sentences_count=2, hidden1=4):
        super(ConvoCarpet, self).__init__()
        self.conv_layer = nn.Conv2d(1, kernels_count, [sentences_count, embedding_size])
        self.pool_layer = nn.AdaptiveMaxPool2d((1, None))
        self.fc_layer1 = nn.Linear(kernels_count, hidden1)
        self.fc_layer2 = nn.Linear(hidden1, NUMBER_OF_CLASSES)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, input_batch):
        conv_output = func.relu(self.conv_layer(input_batch))
        maxpool_output = self.pool_layer(conv_output)
        maxpool_output = maxpool_output.flatten(start_dim=1)
        linear_output1 = self.fc_layer1(maxpool_output)
        linear_output2 = self.fc_layer2(linear_output1)
        output = self.softmax(linear_output2)
        return output


In [138]:
def calcMeasures(total_logits, total_true, threshold):
    total_preds = []
    for minibatch_logits in total_logits:
        minibatch_preds = minibatch_logits[:,1] > threshold
        total_preds.append(minibatch_preds)
    total_preds = to.cat(total_preds)
    total_true = to.cat(total_true)
    F1 = f1_score(total_true.cpu(), total_preds.cpu(), pos_label = 1, average='macro')
    precision_m = precision_score(total_true.cpu(), total_preds.cpu(), average='macro')
    precision_1 = precision_score(total_true.cpu(), total_preds.cpu(), pos_label = 1)
    precision_0 = precision_score(total_true.cpu(), total_preds.cpu(), pos_label = 0)
    recall_m = recall_score(total_true.cpu(), total_preds.cpu(), average='macro')
    recall_1 = recall_score(total_true.cpu(), total_preds.cpu(), pos_label = 1)
    recall_0 = recall_score(total_true.cpu(), total_preds.cpu(), pos_label = 0)
    return F1, precision_0, precision_1, precision_m, recall_0, recall_1, recall_m

def apply_model(model, loss_func, data_loader):
    model.eval()
    total_loss = 0
    predicted = []
    true_output = []
    minibatch_loss = 0
    with to.no_grad():
        n_batch = 0
        for inputs, labels in data_loader:
            n_batch += 1
            minibatch_logits = model(inputs.to(device=cuda_device, dtype=to.float))
            minibatch_loss = loss_func(minibatch_logits, labels)
            
            total_loss += minibatch_loss
            predicted.append(minibatch_logits)
            true_output.append(labels)
            
    return total_loss, predicted, true_output

def optimize_models_params(trait, fold, train_indices, output, val_input_indices, val_output):
    print('Start training ...')
    
    all_sum = len(output)
    positive = sum(output)
    negative = all_sum - positive
    print(f'Postive: {positive}: Negative: {negative}')
    loss_weights = to.tensor([negative, positive])
    loss_weights = loss_weights.to(device=cuda_device, dtype=to.float)
    loss = nn.CrossEntropyLoss(weight=loss_weights)
    
    train_input_indices, train_output = balance_binary_data(train_indices, output)
    
    for batch_size in batch_sizes:
        for kernel_size in kernel_sizes:
            for sent_size in sentences_counts:
                for hidden in hidden_layer1_sizes:
                    for regularization_type in regularization_types:
                        for alpha in alphas:
                            for learning_rate in learning_rates:                

                                train_loader = create_minibatches(train_input_indices, train_output, batch_size, cuda_device)
                                val_loader = create_minibatches(val_input_indices, val_output, batch_size, cuda_device)

                                delimiter()
                                delimiter()
                                print(f'Starting training for BS: {batch_size} KS: {kernel_size} SS: {sent_size} LR: {learning_rate} HL: {hidden}')
                                delimiter()

                                models_identifier = create_new_model_input(trait, fold, learning_rate, batch_size, kernel_size, sent_size, hidden, alpha, regularization_type)

                                model = ConvoCarpet(kernels_count=kernel_size, sentences_count=sent_size, hidden1=hidden)
                                if use_GPU:
                                      model.to(cuda_device)

                                optimizer = to.optim.Adam(model.parameters(), learning_rate, amsgrad = True)

                                exp_lr_scheduler = StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

                                train_model(train_loader, val_loader, models_identifier, model, loss, optimizer, exp_lr_scheduler, alpha, regularization_type)
                                results.to_csv(RESULTS)
def calculate_L2_reg(parameters, alpha):
    l2_regularization = 0
    for parameter in parameters:
        l2_regularization += alpha * to.pow(to.norm(parameter, 2), 2.0)
    return l2_regularization

def calculate_L1_reg(parameters, alpha):
    l1_regularization = 0
    for parameter in parameters:
        l1_regularization += alpha * to.sum(to.abs(parameter))
    return l1_regularization

def train_model(train_loader, val_loader, models_identifier, model, loss, optimizer, exp_lr_scheduler, alpha, regularization_type):
    best_f1_val = 0
    last_f1 = 0
    counter = 0
    
    for epoch in range(0, n_epochs):
        epoch_start = time.time()
        model.train()
        total_loss = 0
        n_batch = 0
        
        predicted = []
        true_output = []
        
        train_loader, train_loader_backup = tee(train_loader)
        for inputs, labels in train_loader_backup:
            n_batch += 1
            model.zero_grad()

            logits = model(inputs.to(device=cuda_device, dtype=to.float))
                
            minibatch_loss = loss(logits, labels) 
            if(regularization_type == L1_Type):
                minibatch_loss += calculate_L1_reg(model.parameters(), alpha)
            elif(regularization_type == L2_Type):
                minibatch_loss += calculate_L2_reg(model.parameters(), alpha)
                
            total_loss += minibatch_loss
            
            minibatch_loss.backward()
            optimizer.step()
            
            predicted.append(logits)
            true_output.append(labels)
            
            if(n_batch % print_status_batch == 0):
                
                delimiter()               
                #preds = logits[:,1] > 0.5
                #train_f1_score = f1_score(labels.cpu().detach().numpy(), preds.cpu().detach().numpy(), pos_label = 1, average='macro')
                print(f"Train - Epoch {epoch} - batch {n_batch}, batch loss is {minibatch_loss:.6f}")
                delimiter()

        
        
        delimiter()
        exp_lr_scheduler.step()
        val_loader, val_loader_backup = tee(val_loader)
        val_loss, val_logits, true_val = apply_model(model, loss, val_loader_backup)
        val_f1_score, val_precision_0, val_precision_1, val_precision_m, val_recall_0, val_recall_1, val_recall_m = calcMeasures(val_logits, true_val, 0.5)
        train_f1_score, _, _, _, _, _,_ = calcMeasures(predicted, true_output, 0.5)
        
        print(f'Epoch {epoch} end: {time.time()-epoch_start}, TRAIN F1 is: {train_f1_score}')
        print(f'Validation loss: {val_loss:.7f} - F1 score: {val_f1_score:.7f}')
        print(f'0 class -> precision: {val_precision_0:.7f} - recall: {val_recall_0:.7f}')
        print(f'1 class -> precision: {val_precision_1:.7f} - recall: {val_recall_1:.7f}')
        print(f'precision: {val_precision_m:.7f} - recall: {val_recall_m:.7f} - MACRO')
        delimiter()
        
        if (abs(val_f1_score - last_f1) <= RELEVANT_DIFFERENCE):
            counter += 1
        else:
            counter = 0
        
        last_f1 = val_f1_score
        
        if(counter >= max_constant_f1):
            return
        
        if (val_f1_score > best_f1_val):
            update_models_val_results(models_identifier, val_f1_score, val_precision_0, val_precision_1, val_precision_m, val_recall_0, val_precision_1, val_recall_m, epoch)
            best_f1_val = val_f1_score
        # save a checkpoint
            best_checkpoint_filename = get_checkpoints_name(models_identifier)
            to.save({
              "models_identifier" : models_identifier, 
              "model_state_dict" : model.state_dict(),
              "optimizer_state_dict" : optimizer.state_dict()
            }, best_checkpoint_filename)
    

In [None]:
for target in mbti_labels:
    print(f"Creating model for target: {target}")
    for fold in range(0, no_of_folds):
        
        print(f'Processing fold {fold}...')
        
        train_input_indices, train_output, val_input_indices, val_output, test_input_indices, test_output = data_preparation(target, fold)
        
        optimize_models_params(target, fold, train_input_indices, train_output, val_input_indices, val_output)
        
        
        best_models = get_best_models_data(target, fold)
        #Recreate model
        models_identifier = best_models['hash_id']
        model = ConvoCarpet(kernels_count=best_models['kernels_count'], sentences_count=best_models['sentences_count'], hidden1=best_models['hidden_layer1'])
        if use_GPU:
              model.to(cuda_device)

        checkpoint = to.load(get_checkpoints_name(models_identifier))
        model.load_state_dict(checkpoint["model_state_dict"])
        loss = nn.CrossEntropyLoss()

        print("."*15)
        print("."*15)
        
        print(f'Apply best model to test for {target} on fold {fold}.')
        test_loader = create_minibatches(test_input_indices, test_output, 1, cuda_device)
        test_loss, test_logits, test_true = apply_model(model, loss, test_loader)
        test_f1_score, test_precision_0, test_precision_1, test_precision_m, test_recall_0, test_recall_1, test_recall_m = calcMeasures(test_logits, test_true, 0.5)
        print(f'Test loss: {test_loss:.5f} - F1 score: {test_f1_score:.7f} ')
        print(f'0 class -> precision: {test_precision_0:.7f} - recall: {test_recall_0:.7f}')
        print(f'1 class -> precision: {test_precision_1:.7f} - recall: {test_recall_1:.7f}')
        print(f'precision: {test_precision_m:.7f} - recall: {test_recall_m:.7f} - MACRO')
              
        update_models_test_results(models_identifier, test_f1_score, test_precision_0, test_precision_1, test_precision_m, test_recall_0, test_recall_1, test_recall_m)
        with open(get_predictions_file_name(models_identifier, target, fold), 'w+') as f:
            for pre_l, true_l in zip(test_logits, test_true):
                f.write(f'{pre_l.cpu()[0][0]}, {pre_l.cpu()[0][1]}, {true_l.cpu().tolist()[0]}\n')
        results.to_csv(RESULTS)
        
        ##Save best models data
        
        print(f"+++ Finished with training and testing model for {target} on fold {fold}. +++")
        
        print("."*15)
        print("."*15)

Creating model for target: introverted
Processing fold 0...
Start training ...
Postive: 4433.0: Negative: 1219.0
-----------------------
-----------------------
Starting training for BS: 64 KS: 12 SS: 4 LR: 4.9999999999999996e-06 HL: 12
-----------------------
-----------------------
Train - Epoch 0 - batch 100, batch loss is 1.114566
-----------------------
-----------------------
Epoch 0 end: 11.966232538223267, TRAIN F1 is: 0.38438705117217664
Validation loss: 15.3951015 - F1 score: 0.4492005
0 class -> precision: 0.1351351 - recall: 0.0166667
1 class -> precision: 0.7857662 - recall: 0.9712747
precision: 0.4604506 - recall: 0.4939707 - MACRO
-----------------------
-----------------------
Train - Epoch 1 - batch 100, batch loss is 1.100512
-----------------------
-----------------------
Epoch 1 end: 4.985890865325928, TRAIN F1 is: 0.3926140399869825
Validation loss: 15.3510017 - F1 score: 0.4505623
0 class -> precision: 0.2105263 - recall: 0.0133333
1 class -> precision: 0.7878136 

-----------------------
Epoch 19 end: 4.541491746902466, TRAIN F1 is: 0.45940887388061985
Validation loss: 15.1507006 - F1 score: 0.4400000
0 class -> precision: 0.0000000 - recall: 0.0000000
1 class -> precision: 0.7873848 - recall: 0.9973070
precision: 0.3936924 - recall: 0.4986535 - MACRO
-----------------------
-----------------------
Train - Epoch 20 - batch 100, batch loss is 0.925501
-----------------------
-----------------------
Epoch 20 end: 4.448367118835449, TRAIN F1 is: 0.4616104914878634
Validation loss: 15.1488609 - F1 score: 0.4400000
0 class -> precision: 0.0000000 - recall: 0.0000000
1 class -> precision: 0.7873848 - recall: 0.9973070
precision: 0.3936924 - recall: 0.4986535 - MACRO
-----------------------
-----------------------
Train - Epoch 21 - batch 100, batch loss is 0.919566
-----------------------
-----------------------
Epoch 21 end: 5.0403032302856445, TRAIN F1 is: 0.4643419484413212
Validation loss: 15.1463947 - F1 score: 0.4404432
0 class -> precision: 0.0

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Epoch 0 end: 9.833331108093262, TRAIN F1 is: 0.3333333333333333
Validation loss: 18.8877449 - F1 score: 0.1750292
0 class -> precision: 0.2121641 - recall: 1.0000000
1 class -> precision: 0.0000000 - recall: 0.0000000
precision: 0.1060820 - recall: 0.5000000 - MACRO
-----------------------
-----------------------
Train - Epoch 1 - batch 100, batch loss is 1.255581
-----------------------
-----------------------
Epoch 1 end: 4.259723663330078, TRAIN F1 is: 0.3333333333333333
Validation loss: 18.7921391 - F1 score: 0.1750292
0 class -> precision: 0.2121641 - recall: 1.0000000
1 class -> precision: 0.0000000 - recall: 0.0000000
precision: 0.1060820 - recall: 0.5000000 - MACRO
-----------------------
-----------------------
Train - Epoch 2 - batch 100, batch loss is 1.247833
-----------------------
-----------------------
Epoch 2 end: 4.24547004699707, TRAIN F1 is: 0.3333333333333333
Validation loss: 18.6992893 - F1 score: 0.1750292
0 class -> precision: 0.2121641 - recall: 1.0000000
1 cla

-----------------------
Train - Epoch 13 - batch 100, batch loss is 0.855033
-----------------------
-----------------------
Epoch 13 end: 5.535979509353638, TRAIN F1 is: 0.43220565499187236
Validation loss: 15.4514141 - F1 score: 0.4436964
0 class -> precision: 0.3333333 - recall: 0.0033333
1 class -> precision: 0.7880936 - recall: 0.9982047
precision: 0.5607134 - recall: 0.5007690 - MACRO
-----------------------
-----------------------
Train - Epoch 14 - batch 100, batch loss is 0.845431
-----------------------
-----------------------
Epoch 14 end: 4.72149920463562, TRAIN F1 is: 0.4355677281207511
Validation loss: 15.4199982 - F1 score: 0.4439288
0 class -> precision: 0.5000000 - recall: 0.0033333
1 class -> precision: 0.7882436 - recall: 0.9991023
precision: 0.6441218 - recall: 0.5012178 - MACRO
-----------------------
-----------------------
Train - Epoch 15 - batch 100, batch loss is 0.836814
-----------------------
-----------------------
Epoch 15 end: 4.244459629058838, TRAIN F1

-----------------------
Epoch 1 end: 4.4327874183654785, TRAIN F1 is: 0.39054363336888775
Validation loss: 15.4036713 - F1 score: 0.4946357
0 class -> precision: 0.2275862 - recall: 0.1100000
1 class -> precision: 0.7895981 - recall: 0.8994614
precision: 0.5085922 - recall: 0.5047307 - MACRO
-----------------------
-----------------------
Train - Epoch 2 - batch 100, batch loss is 0.767915
-----------------------
-----------------------
Epoch 2 end: 4.422712087631226, TRAIN F1 is: 0.39571242582093613
Validation loss: 15.3162680 - F1 score: 0.4795128
0 class -> precision: 0.2142857 - recall: 0.0700000
1 class -> precision: 0.7879939 - recall: 0.9308797
precision: 0.5011398 - recall: 0.5004399 - MACRO
-----------------------
-----------------------
Train - Epoch 3 - batch 100, batch loss is 0.763142
-----------------------
-----------------------
Epoch 3 end: 4.471920728683472, TRAIN F1 is: 0.4037226644571361
Validation loss: 15.2448626 - F1 score: 0.4724038
0 class -> precision: 0.22388

-----------------------
Train - Epoch 21 - batch 100, batch loss is 0.733458
-----------------------
-----------------------
Epoch 21 end: 4.331095933914185, TRAIN F1 is: 0.49316922577438027
Validation loss: 14.8630505 - F1 score: 0.4546418
0 class -> precision: 0.2941176 - recall: 0.0166667
1 class -> precision: 0.7888332 - recall: 0.9892280
precision: 0.5414754 - recall: 0.5029473 - MACRO
-----------------------
-----------------------
Train - Epoch 22 - batch 100, batch loss is 0.732821
-----------------------
-----------------------
Epoch 22 end: 4.642135381698608, TRAIN F1 is: 0.4972174895630286
Validation loss: 14.8564901 - F1 score: 0.4549151
0 class -> precision: 0.3125000 - recall: 0.0166667
1 class -> precision: 0.7889843 - recall: 0.9901257
precision: 0.5507421 - recall: 0.5033962 - MACRO
-----------------------
-----------------------
Train - Epoch 23 - batch 100, batch loss is 0.732206
-----------------------
-----------------------
Epoch 23 end: 4.662025690078735, TRAIN F

-----------------------
Train - Epoch 41 - batch 100, batch loss is 0.724177
-----------------------
-----------------------
Epoch 41 end: 4.477135419845581, TRAIN F1 is: 0.5594586936141039
Validation loss: 14.7672491 - F1 score: 0.4695238
0 class -> precision: 0.3846154 - recall: 0.0333333
1 class -> precision: 0.7910663 - recall: 0.9856373
precision: 0.5878408 - recall: 0.5094853 - MACRO
-----------------------
-----------------------
Train - Epoch 42 - batch 100, batch loss is 0.723783
-----------------------
-----------------------
Epoch 42 end: 4.675894737243652, TRAIN F1 is: 0.5620449498687841
Validation loss: 14.7625303 - F1 score: 0.4695238
0 class -> precision: 0.3846154 - recall: 0.0333333
1 class -> precision: 0.7910663 - recall: 0.9856373
precision: 0.5878408 - recall: 0.5094853 - MACRO
-----------------------
-----------------------
Train - Epoch 43 - batch 100, batch loss is 0.723379
-----------------------
-----------------------
Epoch 43 end: 4.647476673126221, TRAIN F1

-----------------------
Train - Epoch 61 - batch 100, batch loss is 0.717533
-----------------------
-----------------------
Epoch 61 end: 4.0234599113464355, TRAIN F1 is: 0.6027238036633091
Validation loss: 14.6784859 - F1 score: 0.4751324
0 class -> precision: 0.3250000 - recall: 0.0433333
1 class -> precision: 0.7911208 - recall: 0.9757630
precision: 0.5580604 - recall: 0.5095482 - MACRO
-----------------------
-----------------------
Train - Epoch 62 - batch 100, batch loss is 0.717223
-----------------------
-----------------------
Epoch 62 end: 4.183553695678711, TRAIN F1 is: 0.6041887404196701
Validation loss: 14.6737757 - F1 score: 0.4751324
0 class -> precision: 0.3250000 - recall: 0.0433333
1 class -> precision: 0.7911208 - recall: 0.9757630
precision: 0.5580604 - recall: 0.5095482 - MACRO
-----------------------
-----------------------
-----------------------
Starting training for BS: 64 KS: 12 SS: 4 LR: 1e-06 HL: 12
-----------------------
-----------------------
Train - Ep

In [52]:
gc.collect()

1543