In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# import os
# os.chdir('drive/MyDrive/NLP Project')

In [3]:
# !pip install bertviz

In [41]:
import torch
from tqdm.notebook import tqdm
from torch.utils.data import TensorDataset
import pandas as pd
import numpy as np

In [42]:
# !pip install transformers
from transformers import BertForSequenceClassification
from transformers import AutoModelForSequenceClassification
from transformers import BertTokenizer, BertModel
from bertviz import head_view, model_view

In [43]:
train_df = pd.read_csv('train.csv')
train_df = train_df[['MASKED_DOCUMENT', 'TRUE_SENTIMENT']]

In [44]:
test_df = pd.read_csv('fixed_test.csv')
test_df = test_df[['MASKED_DOCUMENT', 'TRUE_SENTIMENT']]

In [45]:
dev_df = pd.read_csv('dev.csv')
dev_df = train_df[['MASKED_DOCUMENT', 'TRUE_SENTIMENT']]

In [46]:
possible_labels = train_df.TRUE_SENTIMENT.unique()

label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index
label_dict

{'Negative': 0, 'Neutral': 1, 'Positive': 2}

In [47]:
train_df['label'] = train_df.TRUE_SENTIMENT.replace(label_dict)

In [48]:
test_df['label'] = test_df.TRUE_SENTIMENT.replace(label_dict)

In [49]:
dev_df['label'] = dev_df.TRUE_SENTIMENT.replace(label_dict)

In [50]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=True)

In [51]:
encoded_data_train = tokenizer.batch_encode_plus(
    train_df.MASKED_DOCUMENT.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    max_length=256, 
    return_tensors='pt',
    truncation=True,
    padding='max_length'
)

In [52]:
encoded_data_val = tokenizer.batch_encode_plus(
    dev_df.MASKED_DOCUMENT.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    max_length=256, 
    return_tensors='pt',
    truncation=True,
    padding='max_length'
)

In [53]:
encoded_data_test = tokenizer.batch_encode_plus(
    test_df.MASKED_DOCUMENT.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    max_length=256, 
    return_tensors='pt',
    truncation=True,
    padding='max_length'
)

In [54]:
input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(test_df.label.values)

In [55]:
input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(train_df.label.values)

In [56]:
input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(dev_df.label.values)

In [57]:
# print(type(labels_train),type(labels_test))

In [58]:
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)
dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test)

In [59]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
batch_size = 16
dataloader_train = DataLoader(dataset_train, 
                              sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)
dataloader_validation = DataLoader(dataset_val, 
                                   sampler=SequentialSampler(dataset_val), 
                                   batch_size=batch_size)
dataloader_test = DataLoader(dataset_test,
                            sampler=SequentialSampler(dataset_test),
                            batch_size=batch_size)

In [60]:
from sklearn.metrics import f1_score

def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='macro')

def accuracy_per_class(preds, labels):
    label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

In [68]:
import random
epochs = 30
seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

def evaluate(model, dataloader_val):
    model.to(device)
    model.eval()
    loss_val_total = 0
    predictions, true_vals = [], []
    for batch in dataloader_val:
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }
        with torch.no_grad():        
            outputs = model(**inputs)
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)

    loss_val_avg = loss_val_total/len(dataloader_val) 
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
    return loss_val_avg, predictions, true_vals
    
def train(model, optimizer, scheduler, name):
    for epoch in tqdm(range(1, epochs+1)):
        model.to(device)
        model.train()
        loss_train_total = 0
        progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
        for batch in progress_bar:
            model.zero_grad()
            batch = tuple(b.to(device) for b in batch)
            inputs = {'input_ids':      batch[0],
                      'attention_mask': batch[1],
                      'labels':         batch[2],
                    }       
            outputs = model(**inputs)
            loss = outputs[0]
            loss_train_total += loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
        torch.save(model.state_dict(), f'data_volume/finetuned_{name}_unmasked_epoch_{epoch}.model')
        tqdm.write(f'Epoch {epoch}')
        loss_train_avg = loss_train_total/len(dataloader_train)            
        tqdm.write(f'Training loss: {loss_train_avg}')
        val_loss, predictions, true_vals = evaluate(model, dataloader_validation)
        val_f1 = f1_score_func(predictions, true_vals)
        tqdm.write(f'Validation loss: {val_loss}')
        tqdm.write(f'Val F1 (Macro): {val_f1}')
        _, test_predictions, test_true_vals = evaluate(model, dataloader_test)
        test_f1 = f1_score_func(test_predictions, test_true_vals)
        tqdm.write(f'Test F1: {test_f1}')

In [69]:
model_bert = AutoModelForSequenceClassification.from_pretrained("bert-base-cased",
                                                      num_labels=len(label_dict),
                                                      output_attentions=True,
                                                      output_hidden_states=False)

model_bert_distil = AutoModelForSequenceClassification.from_pretrained("distilbert-base-cased",
                                                      num_labels=len(label_dict),
                                                      output_attentions=True,
                                                      output_hidden_states=False)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [70]:
torch.cuda.is_available()

False

In [71]:
MODELS = [
#     {
#         "model": model_bert,
#         "name": "BERT_BASE_UNCASED"
#     },
    {
        "model": model_bert_distil,
        "name": "DISTILBERT_BASE_UNCASED"
    }
]


In [72]:
# Hyper-parameters
from transformers import AdamW, get_linear_schedule_with_warmup

def multi_model_train(model, name=""):
    optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8)
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps=0,
                                                num_training_steps=len(dataloader_train)*epochs)
    print("Training ", name)
    train(model, optimizer, scheduler, name)
    print()

for i in MODELS:
    multi_model_train(i["model"], i["name"])

Training  DISTILBERT_BASE_UNCASED


  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 1
Training loss: 0.9265302297614869
Validation loss: 0.862181754339309
Val F1 (Macro): 0.40261793815022257
Test F1: 0.3442043043452425


Epoch 2:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 2
Training loss: 0.8563833991686504
Validation loss: 0.7891822542463031
Val F1 (Macro): 0.41837420046878265
Test F1: 0.350944274698094


Epoch 3:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 3
Training loss: 0.7990057298115322
Validation loss: 0.7197669132834389
Val F1 (Macro): 0.477316968870535
Test F1: 0.3427148982704538


Epoch 4:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 4
Training loss: 0.7189947563977469
Validation loss: 0.5896555525915963
Val F1 (Macro): 0.5581185496512154
Test F1: 0.361955735152179


Epoch 5:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 5
Training loss: 0.6246559222539266
Validation loss: 0.5083581456825846
Val F1 (Macro): 0.6085674725039797
Test F1: 0.3395830881487584


Epoch 6:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 6
Training loss: 0.5103401848248073
Validation loss: 0.3491844105933394
Val F1 (Macro): 0.8735612660099458
Test F1: 0.40604616588757353


Epoch 7:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 7
Training loss: 0.41313203197150006
Validation loss: 0.2318815356563954
Val F1 (Macro): 0.9107233251353989
Test F1: 0.3993776844783607


Epoch 8:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 8
Training loss: 0.31315812819770406
Validation loss: 0.16441296259207386
Val F1 (Macro): 0.9438805136044207
Test F1: 0.39170618486785647


Epoch 9:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 9
Training loss: 0.2423791657601084
Validation loss: 0.11373455745744564
Val F1 (Macro): 0.9635181339714389
Test F1: 0.3888069211055111


Epoch 10:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 10
Training loss: 0.1805266172403381
Validation loss: 0.07336135107048211
Val F1 (Macro): 0.970256584405654
Test F1: 0.42174515461657514


Epoch 11:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 11
Training loss: 0.14795135366952136
Validation loss: 0.04782606355757231
Val F1 (Macro): 0.9796572966640884
Test F1: 0.40761035007610347


Epoch 12:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 12
Training loss: 0.12655085879821507
Validation loss: 0.03585086101444349
Val F1 (Macro): 0.9884902725415955
Test F1: 0.38858985996908846


Epoch 13:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 13
Training loss: 0.10540230817416506
Validation loss: 0.030708044607170103
Val F1 (Macro): 0.9865183941982559
Test F1: 0.39003133688346653


Epoch 14:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 14
Training loss: 0.09363936842862694
Validation loss: 0.024883115983712265
Val F1 (Macro): 0.9900718730592762
Test F1: 0.3840784743015981


Epoch 15:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 15
Training loss: 0.08310791055416866
Validation loss: 0.02818076098559513
Val F1 (Macro): 0.991174136149923
Test F1: 0.37410162121388596


Epoch 16:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 16
Training loss: 0.061336154106261566
Validation loss: 0.02064012108270877
Val F1 (Macro): 0.9935947500878147
Test F1: 0.39000848781958225


Epoch 17:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 17
Training loss: 0.058429576884650274
Validation loss: 0.013193665674563298
Val F1 (Macro): 0.9951404133454024
Test F1: 0.3830535405504249


Epoch 18:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 18
Training loss: 0.0703211901527885
Validation loss: 0.009656572305816336
Val F1 (Macro): 0.9958727942911622
Test F1: 0.4024021338779722


Epoch 19:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 19
Training loss: 0.04638263802834055
Validation loss: 0.010680086109953532
Val F1 (Macro): 0.9968121477532844
Test F1: 0.3877288081582413


Epoch 20:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 20
Training loss: 0.030974942136284274
Validation loss: 0.011711178623954765
Val F1 (Macro): 0.9928762907236252
Test F1: 0.40854150518016064


Epoch 21:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 21
Training loss: 0.033451351189744724
Validation loss: 0.00504273792148091
Val F1 (Macro): 0.997378708841156
Test F1: 0.3874749447386964


Epoch 22:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 22
Training loss: 0.03257426153729847
Validation loss: 0.00504042826207643
Val F1 (Macro): 0.9972744270520281
Test F1: 0.3908075110497851


Epoch 23:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 23
Training loss: 0.02658608761537055
Validation loss: 0.00711073756077288
Val F1 (Macro): 0.9981774267314053
Test F1: 0.36996257667455773


Epoch 24:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 24
Training loss: 0.026788667873361625
Validation loss: 0.0051365032589403975
Val F1 (Macro): 0.9981774267314053
Test F1: 0.3860908675682471


Epoch 25:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 25
Training loss: 0.019188977014398135
Validation loss: 0.005810169928978818
Val F1 (Macro): 0.9990857357118813
Test F1: 0.37155649256055195


Epoch 26:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 26
Training loss: 0.02199257969768951
Validation loss: 0.003076653317596841
Val F1 (Macro): 0.9987451776191221
Test F1: 0.3810392931876308


Epoch 27:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 27
Training loss: 0.01808539780904539
Validation loss: 0.003180505731991919
Val F1 (Macro): 0.9989738238703348
Test F1: 0.37999939663373644


Epoch 28:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 28
Training loss: 0.024504379783597652
Validation loss: 0.0029976215316959757
Val F1 (Macro): 0.9989738238703348
Test F1: 0.3764338665538815


Epoch 29:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 29
Training loss: 0.026551249919915185
Validation loss: 0.003231283230204662
Val F1 (Macro): 0.9993142215587145
Test F1: 0.3802492395648948


Epoch 30:   0%|          | 0/210 [00:00<?, ?it/s]

Epoch 30
Training loss: 0.01674863118751091
Validation loss: 0.0031011756051330373
Val F1 (Macro): 0.9989738238703348
Test F1: 0.3763063040738352



In [None]:
def model_evaluattion(model, name):
    model.load_state_dict(torch.load('data_volume_unmasked_microf1/finetuned_{}_unmasked_epoch_10.model'.format(name), map_location=torch.device('cuda')))
    _, predictions, true_vals = evaluate(model, dataloader_validation)
    accuracy_per_class(predictions, true_vals)

for i in MODELS:
    model_evaluattion(i["model"], i["name"])

In [None]:
# !zip -r data_volume.zip data_volume
# from google.colab import files
# files.download("data_volume.zip")

In [29]:
def model_evaluattion_test(model, name):
    for i in range(1,10):
        print(f'i: {i}')
        model.load_state_dict(torch.load('data_volume/finetuned_{}_epoch_{}.model'.format(name, i), map_location=torch.device('mps')))
        _, predictions, true_vals = evaluate(model, dataloader_test)
        val_f1 = f1_score_func(predictions, true_vals)
        print(f'f1: {val_f1}')
        accuracy_per_class(predictions, true_vals)

In [30]:
model_evaluattion_test(model_bert, "BERT_BASE_UNCASED")

i: 1
f1: 0.3404925378875298
Class: Negative
Accuracy: 0/139

Class: Neutral
Accuracy: 131/320

Class: Positive
Accuracy: 268/368

i: 2
f1: 0.3420584213710453
Class: Negative
Accuracy: 0/139

Class: Neutral
Accuracy: 164/320

Class: Positive
Accuracy: 227/368

i: 3
f1: 0.3528718782779657
Class: Negative
Accuracy: 2/139

Class: Neutral
Accuracy: 184/320

Class: Positive
Accuracy: 205/368

i: 4
f1: 0.3847307013659715
Class: Negative
Accuracy: 16/139

Class: Neutral
Accuracy: 177/320

Class: Positive
Accuracy: 182/368

i: 5
f1: 0.3902969789333479
Class: Negative
Accuracy: 16/139

Class: Neutral
Accuracy: 126/320

Class: Positive
Accuracy: 250/368

i: 6
f1: 0.3916872331563053
Class: Negative
Accuracy: 22/139

Class: Neutral
Accuracy: 189/320

Class: Positive
Accuracy: 157/368

i: 7
f1: 0.39763004643006034
Class: Negative
Accuracy: 19/139

Class: Neutral
Accuracy: 185/320

Class: Positive
Accuracy: 177/368

i: 8
f1: 0.37568853178559464
Class: Negative
Accuracy: 11/139

Class: Neutral
Accurac

In [None]:
# def evaluate(model, dataloader_val):
#     model.to(device)
#     model.eval()
#     loss_val_total = 0
#     predictions, true_vals = [], []
#     for batch in dataloader_val:
#         batch = tuple(b.to(device) for b in batch)
#         inputs = {'input_ids':      batch[0],
#                   'attention_mask': batch[1],
#                   'labels':         batch[2],
#                  }
#         with torch.no_grad():        
#             outputs = model(**inputs)
#         loss = outputs[0]
#         logits = outputs[1]
#         loss_val_total += loss.item()
#         logits = logits.detach().cpu().numpy()
#         label_ids = inputs['labels'].cpu().numpy()
#         predictions.append(logits)
#         true_vals.append(label_ids)

#     loss_val_avg = loss_val_total/len(dataloader_val) 
#     predictions = np.concatenate(predictions, axis=0)
#     true_vals = np.concatenate(true_vals, axis=0)
#     return loss_val_avg, predictions, true_vals

In [None]:
# _, predictions, true_vals = evaluate(model_bert_distil, dataloader_test)


In [None]:
rint()

i: 1
f1: 0.18738096344125985
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 311/320

Class: Positive
Accuracy: 0/368

i: 2
f1: 0.23109008607750556
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 260/320

Class: Positive
Accuracy: 41/368

i: 3


KeyboardInterrupt: 

In [34]:
def model_evaluation_sentiment_test(model, name):
    for i in range(1,11):
        print(f'i: {i}')
        model.load_state_dict(torch.load('data_volume_sentiment_analysis/finetuned_{}_.model'.format(i), map_location=torch.device('mps')))
        _, predictions, true_vals = evaluate(model, dataloader_test)
        val_f1 = f1_score_func(predictions, true_vals)
        print(f'f1: {val_f1}')
        accuracy_per_class(predictions, true_vals)

In [36]:
test_df_sent = pd.read_csv('fixed_test.csv')
test_df_sent = test_df[['MASKED_DOCUMENT', 'TRUE_SENTIMENT']]
label_dict_sent = {'Negative': 2, 'Neutral': 0, 'Positive': 1}
test_df_sent['label'] = test_df_sent.TRUE_SENTIMENT.replace(label_dict_sent)
encoded_data_test_sent = tokenizer.batch_encode_plus(
    test_df_sent.MASKED_DOCUMENT.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    max_length=256, 
    return_tensors='pt',
    truncation=True,
    padding='max_length'
)

In [38]:
dataloader_test_sent = DataLoader(dataset_test,
                            sampler=SequentialSampler(dataset_test),
                            batch_size=batch_size)

In [39]:
input_ids_test_sent = encoded_data_test_sent['input_ids']
attention_masks_test_sent = encoded_data_test_sent['attention_mask']
labels_test_sent = torch.tensor(test_df_sent.label.values)

In [40]:
model_bert = AutoModelForSequenceClassification.from_pretrained("bert-base-cased",
                                                      num_labels=len(label_dict),
                                                      output_attentions=True,
                                                      output_hidden_states=False)

model_evaluation_sentiment_test(model_bert, '')

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

i: 1
f1: 0.18738096344125985
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 311/320

Class: Positive
Accuracy: 0/368

i: 2
f1: 0.23109008607750556
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 260/320

Class: Positive
Accuracy: 41/368

i: 3
f1: 0.23141282925495954
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 262/320

Class: Positive
Accuracy: 41/368

i: 4
f1: 0.25608106857482166
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 212/320

Class: Positive
Accuracy: 86/368

i: 5
f1: 0.2533569477844752
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 180/320

Class: Positive
Accuracy: 106/368

i: 6
f1: 0.25613310351896507
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 206/320

Class: Positive
Accuracy: 90/368

i: 7
f1: 0.23895496585509235
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 242/320

Class: Positive
Accuracy: 55/368

i: 8
f1: 0.2495700358494087
Class: Negative
Accuracy: 1/139

Class: Neutral
Accuracy: 209/3