In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import re
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer, BertForSequenceClassification, AdamW
from transformers import get_linear_schedule_with_warmup
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm 

In [2]:
# Adjusting the cleaning function to handle non-string values
def clean_text(text):
    # Check if the text is a string
    if not isinstance(text, str):
        return ""
    
    # Remove HTML tags and non-alphanumeric characters
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    
    # Convert to lowercase
    text = text.lower()
    
    return text

# # Reapply the cleaning function
# data['Statement_clean'] = data['statement'].apply(clean_text)

In [5]:
new_data = pd.read_csv("politifact_plus_data.csv",)

In [7]:
# Load train, test, validation data
train_data = pd.read_csv("train2.tsv",sep='\t', header=None)
test_data = pd.read_csv("test2.tsv",sep='\t', header=None)
val_data = pd.read_csv("val2.tsv",sep='\t', header=None)

In [8]:
# Rename all the columns
train_data.rename({1: 'id', 2: 'label', 3: 'statement', 4: 'subject', 5: 'speaker', 6: 'job-title',
           7: 'state_info', 8: 'party_affiliation', 9: 'barely_true_counts', 10: 'false_counts',
           11: 'half_true_counts', 12: 'mostly_true_counts', 13: 'pants_on_fire_counts', 14: 'context',
           15: 'justification'
          }, axis = 1, inplace = True)
test_data.rename({1: 'id', 2: 'label', 3: 'statement', 4: 'subject', 5: 'speaker', 6: 'job-title',
           7: 'state_info', 8: 'party_affiliation', 9: 'barely_true_counts', 10: 'false_counts',
           11: 'half_true_counts', 12: 'mostly_true_counts', 13: 'pants_on_fire_counts', 14: 'context',
           15: 'justification'
          }, axis = 1, inplace = True)
val_data.rename({1: 'id', 2: 'label', 3: 'statement', 4: 'subject', 5: 'speaker', 6: 'job-title',
           7: 'state_info', 8: 'party_affiliation', 9: 'barely_true_counts', 10: 'false_counts',
           11: 'half_true_counts', 12: 'mostly_true_counts', 13: 'pants_on_fire_counts', 14: 'context',
           15: 'justification'
          }, axis = 1, inplace = True)

In [8]:
# Preprocess data, eliminate null values and clean texts
train_data['Statement_clean'] = train_data['statement'].apply(clean_text)
# train_data['Justification_clean'] = train_data['justification'].apply(clean_text)
train_data = train_data[~train_data['label'].isna()]

test_data['Statement_clean'] = test_data['statement'].apply(clean_text)
# test_data['Justification_clean'] = test_data['justification'].apply(clean_text)
test_data = test_data[~test_data['label'].isna()]

val_data['Statement_clean'] = val_data['statement'].apply(clean_text)
# val_data['Justification_clean'] = val_data['justification'].apply(clean_text)
val_data = val_data[~val_data['label'].isna()]

# Simple preprocessing
train_data['text'] = train_data['Statement_clean']  # Combining Justification + statement
train_data = train_data[['text', 'label']]  

val_data['text'] = val_data['Statement_clean'] 
val_data = val_data[['text', 'label']] 

test_data['text'] = test_data['Statement_clean']
test_data = test_data[['text', 'label']] 

In [9]:
# Initialize label encoder
label_encoder1 = LabelEncoder()
label_encoder2 = LabelEncoder()
label_encoder3 = LabelEncoder()

# Encode the labels
train_data['label'] = label_encoder1.fit_transform(train_data['label'])
val_data['label'] = label_encoder2.fit_transform(val_data['label'])
test_data['label'] = label_encoder3.fit_transform(test_data['label'])

# Split the data (with encoded labels)
train_texts = train_data['text'][:5000]
val_texts = val_data['text'][:5000]
test_texts = test_data['text'][:5000]

train_labels = train_data['label'][:5000]
val_labels = val_data['label'][:5000]
test_labels = test_data['label'][:5000]

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('distilbert-base-uncased')

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples, padding='max_length', truncation=True, max_length=512)

# Apply the tokenizer to the dataset
train_encodings = tokenize_function(train_texts.tolist())
val_encodings = tokenize_function(val_texts.tolist())
test_encodings = tokenize_function(test_texts.tolist())

# Dataset class
class PoliticalBiasDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Convert to dataset format
train_dataset = PoliticalBiasDataset(train_encodings, train_labels.tolist())
val_dataset = PoliticalBiasDataset(val_encodings, val_labels.tolist())
test_dataset = PoliticalBiasDataset(test_encodings, test_labels.tolist())

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Load pre-trained BERT model
model = BertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=len(train_data['label'].unique()))

# Optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=1e-6)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader) * 3)

# Move model to GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

from sklearn.utils.class_weight import compute_class_weight

# Compute class weights for training loss
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(train_labels), y=train_labels)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# Training loop
model.train()
for epoch in range(3):
    for batch in tqdm(train_loader):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(outputs.logits.view(-1, len(train_data['label'].unique())), labels.view(-1))
        # outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        # loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()

# Function to evaluate the model
def evaluate_model(model, data_loader):
    model.eval()
    predictions = []
    references = []
    for batch in tqdm(data_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        with torch.no_grad():
            outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions.extend(torch.argmax(logits, dim=-1).tolist())
        references.extend(labels.tolist())
    return classification_report(references, predictions, output_dict=True)

# Evaluate on training, validation, and test sets
train_report = evaluate_model(model, train_loader)
val_report = evaluate_model(model, val_loader)
test_report = evaluate_model(model, test_loader) 

print("Training Set Evaluation:\n", train_report)
print("\nValidation Set Evaluation:\n", val_report)
print("\nTest Set Evaluation:\n", test_report)

# Classification report
# print(classification_report(references, predictions))

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['encoder.layer.3.attention.self.key.bias', 'encoder.layer.2.attention.output.dense.weight', 'encoder.layer.4.attention.output.dense.weight', 'encoder.layer.3.attention.self.key.weight', 'encoder.layer.3.intermediate.dense.weight', 'encoder.layer.10.attention.output.LayerNorm.weight', 'encoder.layer.1.attention.output.LayerNorm.weight', 'encoder.layer.8.attention.self.value.weight', 'encoder.layer.1.ou

Training Set Evaluation:
 {'0': {'precision': 0.2358490566037736, 'recall': 0.03221649484536082, 'f1-score': 0.05668934240362811, 'support': 776.0}, '1': {'precision': 0.2718978102189781, 'recall': 0.14723320158102768, 'f1-score': 0.191025641025641, 'support': 1012.0}, '2': {'precision': 0.23503569467325644, 'recall': 0.42502482621648463, 'f1-score': 0.3026874115983027, 'support': 1007.0}, '3': {'precision': 0.2138364779874214, 'recall': 0.3497942386831276, 'f1-score': 0.2654176424668228, 'support': 972.0}, '4': {'precision': 0.6666666666666666, 'recall': 0.004901960784313725, 'f1-score': 0.009732360097323601, 'support': 408.0}, '5': {'precision': 0.2017167381974249, 'recall': 0.22787878787878788, 'f1-score': 0.21400113830392717, 'support': 825.0}, 'accuracy': 0.2264, 'macro avg': {'precision': 0.3041670740579202, 'recall': 0.19784158499818372, 'f1-score': 0.17325892264927423, 'support': 5000.0}, 'weighted avg': {'precision': 0.26822515240375044, 'recall': 0.2264, 'f1-score': 0.1961245




# Trying with Pytorch (Two BERT models in a Siamese Network)

**Basic model architecture and ideas from https://github.com/manideep2510/siamese-BERT-fake-news-detection-LIAR**

In [16]:
# Preprocess data, eliminate null values and clean texts
train_data['Statement_clean'] = train_data['statement'].apply(clean_text)
train_data['Justification_clean'] = train_data['justification'].apply(clean_text)
train_data = train_data[~train_data['label'].isna()]

test_data['Statement_clean'] = test_data['statement'].apply(clean_text)
test_data['Justification_clean'] = test_data['justification'].apply(clean_text)
test_data = test_data[~test_data['label'].isna()]

val_data['Statement_clean'] = val_data['statement'].apply(clean_text)
val_data['Justification_clean'] = val_data['justification'].apply(clean_text)
val_data = val_data[~val_data['label'].isna()]

In [13]:
new_data['Statement_clean'] = new_data['statement'].apply(clean_text)
new_data['Justification_clean'] = new_data['justification'].apply(clean_text)
new_data = new_data[~new_data['label'].isna()]

In [20]:
df1 = train_data[['label', 'Statement_clean', 'Justification_clean']]

In [19]:
df2 = new_data[['label', 'Statement_clean', 'Justification_clean']]

In [22]:
train_data = pd.concat([df1, df2], ignore_index=True)
train_data

Unnamed: 0,label,Statement_clean,Justification_clean
0,false,says the annies list political group supports ...,thats a premise that he fails to back up annie...
1,half-true,when did the decline of coal start it started ...,surovell said the decline of coal started when...
2,mostly-true,hillary clinton agrees with john mccain by vot...,obama said he would have voted against the ame...
3,false,health care reform legislation is likely to ma...,the release may have a point that mikulskis co...
4,half-true,the economic turnaround started at the end of ...,crist said that the economic turnaround starte...
...,...,...,...
11124,pants-fire,chemtrails are being put into the atmosphere a...,atmospheric chemists and geochemists dispute t...
11125,pants-fire,there is no biden presidency the real biden wa...,president joe biden and former secretary of st...
11126,pants-fire,video shows someone impersonating joe biden in...,if this video shows anything its that biden h...
11127,pants-fire,el ceo de pfizer es un lagarto demonio,el ceo de pfizer albert bourla es humanola teo...


In [23]:
# Building the Network
class SiameseBERTNetwork(nn.Module):
    def __init__(self, num_labels):
        super(SiameseBERTNetwork, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.1)
        # Since we are concatenating the outputs, the input features to the linear layer are doubled
        self.classifier = nn.Linear(768 * 2, num_labels)

    def forward(self, input_ids1, attention_mask1, input_ids2, attention_mask2):
        output1 = self.bert(input_ids1, attention_mask=attention_mask1)
        output2 = self.bert(input_ids2, attention_mask=attention_mask2)

        pooled_output1 = self.dropout(output1.pooler_output)
        pooled_output2 = self.dropout(output2.pooler_output)

        # Concatenate the outputs
        concat_output = torch.cat((pooled_output1, pooled_output2), dim=1)

        # Pass through the classifier
        logits = self.classifier(concat_output)

        return logits

In [24]:
# Example dataset class
class TextPairDataset(Dataset):
    def __init__(self, text_pairs, labels, tokenizer, max_len=128):
        self.text_pairs = text_pairs
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.text_pairs)
    
    def __getitem__(self, idx):
        statement, justification = self.text_pairs[idx]
        label = self.labels[idx]

        # Tokenize the statement
        encoded_statement = self.tokenizer(
            statement, 
            padding='max_length', 
            truncation=True, 
            max_length=self.max_len, 
            return_tensors='pt'
        )
        input_ids_statement = encoded_statement['input_ids'].squeeze(0)
        attention_mask_statement = encoded_statement['attention_mask'].squeeze(0)

        # Tokenize the justification
        encoded_justification = self.tokenizer(
            justification, 
            padding='max_length', 
            truncation=True, 
            max_length=self.max_len, 
            return_tensors='pt'
        )
        input_ids_justification = encoded_justification['input_ids'].squeeze(0)
        attention_mask_justification = encoded_justification['attention_mask'].squeeze(0)

        # Return all the elements as separate items
        return input_ids_statement, attention_mask_statement, input_ids_justification, attention_mask_justification, torch.tensor(label)

In [25]:
# Load and prepare data
def prepare_data(data):
    # Mapping labels to numerical values
    label_mapping = {'false': 0, 'half-true': 1, 'mostly-true': 2, 'true': 3, 'barely-true': 4, 'pants-fire': 5}
    data['label'] = data['label'].map(label_mapping)
    
    text_pairs = data[['Statement_clean', 'Justification_clean']].values.tolist()
    labels = data['label'].values.tolist()

    return text_pairs, labels

In [26]:
# Prepare train, validation, and test sets
train_text_pairs, train_labels = prepare_data(train_data)
val_text_pairs, val_labels = prepare_data(val_data)
test_text_pairs, test_labels = prepare_data(test_data)

In [27]:
# Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Creating instances of TextPairDataset
train_dataset = TextPairDataset(train_text_pairs, train_labels, tokenizer)
val_dataset = TextPairDataset(val_text_pairs, val_labels, tokenizer)
test_dataset = TextPairDataset(test_text_pairs, test_labels, tokenizer)

# Creating DataLoader instances
batch_size = 8  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [28]:
loss_function = nn.CrossEntropyLoss()

# Training loop 
def train(model, data_loader, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    for batch in tqdm(data_loader):

        # Unpack the data from the dataset
        input_ids_statement, attention_mask_statement, input_ids_justification, attention_mask_justification, labels = [b.to(device) for b in batch]
        # # Unpack the data from the dataset
        # input_ids_statement = batch['input_ids_statement'].to(device)
        # attention_mask_statement = batch['attention_mask_statement'].to(device)
        # input_ids_justification = batch['input_ids_justification'].to(device)
        # attention_mask_justification = batch['attention_mask_justification'].to(device)
        # labels = batch['labels'].to(device)

        optimizer.zero_grad()
        logits = model(input_ids_statement, attention_mask_statement, input_ids_justification, attention_mask_justification)
        
        loss = loss_function(logits, labels)

        loss.backward()
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()

    return total_loss / len(data_loader)

def evaluate(model, data_loader, device):
    model.eval()
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in tqdm(data_loader):
            # Unpack the data from the dataset
            input_ids_statement, attention_mask_statement, input_ids_justification, attention_mask_justification, labels = [b.to(device) for b in batch]
            logits = model(input_ids_statement, attention_mask_statement, input_ids_justification, attention_mask_justification)
            logits = logits.detach().cpu().numpy()
            label_ids = labels.to('cpu').numpy()

            batch_predictions = np.argmax(logits, axis=1)
            predictions.extend(batch_predictions)
            true_labels.extend(label_ids)

    return classification_report(true_labels, predictions, output_dict=True)

num_epochs = 5

# Model, Optimizer, and Scheduler
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_labels = 6 
model = SiameseBERTNetwork(num_labels)
optimizer = AdamW(model.parameters(), lr=5e-5)
total_steps = len(train_loader) * num_epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# Training and evaluation loop
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, scheduler, device)
    val_report = evaluate(model, val_loader, device)
    print(f"Epoch {epoch}, Training Loss: {train_loss}")
    print(f"Validation Report: {val_report}")

# Evaluate on the test set 
test_report = evaluate(model, test_loader, device)
print(f"Test Set Evaluation Report: {test_report}")

100%|██████████| 1392/1392 [29:32<00:00,  1.27s/it]
100%|██████████| 161/161 [01:10<00:00,  2.29it/s]


Epoch 0, Training Loss: 1.7269615991704765
Validation Report: {'0': {'precision': 0.2876712328767123, 'recall': 0.3193916349809886, 'f1-score': 0.3027027027027027, 'support': 263.0}, '1': {'precision': 0.2040169133192389, 'recall': 0.7782258064516129, 'f1-score': 0.323283082077052, 'support': 248.0}, '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 251.0}, '3': {'precision': 1.0, 'recall': 0.005917159763313609, 'f1-score': 0.011764705882352941, 'support': 169.0}, '4': {'precision': 0.20930232558139536, 'recall': 0.0379746835443038, 'f1-score': 0.0642857142857143, 'support': 237.0}, '5': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 116.0}, 'accuracy': 0.2235202492211838, 'macro avg': {'precision': 0.2834984119628911, 'recall': 0.19025154745670314, 'f1-score': 0.11700603415797033, 'support': 1284.0}, 'weighted avg': {'precision': 0.2685812927667736, 'recall': 0.2235202492211838, 'f1-score': 0.1378574491789343, 'support': 1284.0}}


100%|██████████| 1392/1392 [29:38<00:00,  1.28s/it]
100%|██████████| 161/161 [01:10<00:00,  2.28it/s]


Epoch 1, Training Loss: 1.6068484474016331
Validation Report: {'0': {'precision': 0.2849162011173184, 'recall': 0.19391634980988592, 'f1-score': 0.23076923076923075, 'support': 263.0}, '1': {'precision': 0.2680851063829787, 'recall': 0.2540322580645161, 'f1-score': 0.26086956521739135, 'support': 248.0}, '2': {'precision': 0.25513196480938416, 'recall': 0.3466135458167331, 'f1-score': 0.2939189189189189, 'support': 251.0}, '3': {'precision': 0.22676579925650558, 'recall': 0.3609467455621302, 'f1-score': 0.27853881278538817, 'support': 169.0}, '4': {'precision': 0.25877192982456143, 'recall': 0.2489451476793249, 'f1-score': 0.2537634408602151, 'support': 237.0}, '5': {'precision': 0.4375, 'recall': 0.1206896551724138, 'f1-score': 0.1891891891891892, 'support': 116.0}, 'accuracy': 0.26090342679127726, 'macro avg': {'precision': 0.2885285002317914, 'recall': 0.2541906170175007, 'f1-score': 0.251174859623389, 'support': 1284.0}, 'weighted avg': {'precision': 0.2771484095691273, 'recall': 0

100%|██████████| 1392/1392 [29:39<00:00,  1.28s/it]
100%|██████████| 161/161 [01:10<00:00,  2.28it/s]


Epoch 2, Training Loss: 1.2711485921908383
Validation Report: {'0': {'precision': 0.25316455696202533, 'recall': 0.22813688212927757, 'f1-score': 0.24, 'support': 263.0}, '1': {'precision': 0.2422680412371134, 'recall': 0.3790322580645161, 'f1-score': 0.29559748427672955, 'support': 248.0}, '2': {'precision': 0.2747747747747748, 'recall': 0.24302788844621515, 'f1-score': 0.25792811839323465, 'support': 251.0}, '3': {'precision': 0.2372093023255814, 'recall': 0.30177514792899407, 'f1-score': 0.26562499999999994, 'support': 169.0}, '4': {'precision': 0.22702702702702704, 'recall': 0.17721518987341772, 'f1-score': 0.1990521327014218, 'support': 237.0}, '5': {'precision': 0.4594594594594595, 'recall': 0.14655172413793102, 'f1-score': 0.22222222222222224, 'support': 116.0}, 'accuracy': 0.2531152647975078, 'macro avg': {'precision': 0.2823171936309969, 'recall': 0.2459565150967253, 'f1-score': 0.24673749293226807, 'support': 1284.0}, 'weighted avg': {'precision': 0.2669971152429994, 'recall'

100%|██████████| 1392/1392 [29:43<00:00,  1.28s/it]
100%|██████████| 161/161 [01:10<00:00,  2.28it/s]


Epoch 3, Training Loss: 0.6394956305853209
Validation Report: {'0': {'precision': 0.2696245733788396, 'recall': 0.30038022813688214, 'f1-score': 0.2841726618705036, 'support': 263.0}, '1': {'precision': 0.22580645161290322, 'recall': 0.28225806451612906, 'f1-score': 0.2508960573476703, 'support': 248.0}, '2': {'precision': 0.2947761194029851, 'recall': 0.3147410358565737, 'f1-score': 0.3044315992292871, 'support': 251.0}, '3': {'precision': 0.28368794326241137, 'recall': 0.23668639053254437, 'f1-score': 0.25806451612903225, 'support': 169.0}, '4': {'precision': 0.21008403361344538, 'recall': 0.2109704641350211, 'f1-score': 0.21052631578947367, 'support': 237.0}, '5': {'precision': 0.4411764705882353, 'recall': 0.12931034482758622, 'f1-score': 0.2, 'support': 116.0}, 'accuracy': 0.25934579439252337, 'macro avg': {'precision': 0.28752593197647, 'recall': 0.24572442133412276, 'f1-score': 0.2513485250609945, 'support': 1284.0}, 'weighted avg': {'precision': 0.2724374748713033, 'recall': 0.

100%|██████████| 1392/1392 [29:40<00:00,  1.28s/it]
100%|██████████| 161/161 [01:10<00:00,  2.28it/s]


Epoch 4, Training Loss: 0.24794589627625144
Validation Report: {'0': {'precision': 0.24642857142857144, 'recall': 0.2623574144486692, 'f1-score': 0.2541436464088398, 'support': 263.0}, '1': {'precision': 0.23765432098765432, 'recall': 0.31048387096774194, 'f1-score': 0.2692307692307692, 'support': 248.0}, '2': {'precision': 0.3076923076923077, 'recall': 0.3346613545816733, 'f1-score': 0.3206106870229008, 'support': 251.0}, '3': {'precision': 0.271523178807947, 'recall': 0.24260355029585798, 'f1-score': 0.25625000000000003, 'support': 169.0}, '4': {'precision': 0.22641509433962265, 'recall': 0.20253164556962025, 'f1-score': 0.21380846325167038, 'support': 237.0}, '5': {'precision': 0.45454545454545453, 'recall': 0.1724137931034483, 'f1-score': 0.25000000000000006, 'support': 116.0}, 'accuracy': 0.26401869158878505, 'macro avg': {'precision': 0.29070982130025963, 'recall': 0.2541752714945018, 'f1-score': 0.26067392765236336, 'support': 1284.0}, 'weighted avg': {'precision': 0.27512057821

100%|██████████| 159/159 [01:09<00:00,  2.28it/s]

Test Set Evaluation Report: {'0': {'precision': 0.2901023890784983, 'recall': 0.3413654618473896, 'f1-score': 0.31365313653136534, 'support': 249.0}, '1': {'precision': 0.23547400611620795, 'recall': 0.29056603773584905, 'f1-score': 0.26013513513513514, 'support': 265.0}, '2': {'precision': 0.29642857142857143, 'recall': 0.34439834024896265, 'f1-score': 0.31861804222648754, 'support': 241.0}, '3': {'precision': 0.2929936305732484, 'recall': 0.22115384615384615, 'f1-score': 0.2520547945205479, 'support': 208.0}, '4': {'precision': 0.26857142857142857, 'recall': 0.22169811320754718, 'f1-score': 0.24289405684754523, 'support': 212.0}, '5': {'precision': 0.42857142857142855, 'recall': 0.16304347826086957, 'f1-score': 0.23622047244094488, 'support': 92.0}, 'accuracy': 0.2786108918705604, 'macro avg': {'precision': 0.3020235757232305, 'recall': 0.26370421290907736, 'f1-score': 0.2705959396170044, 'support': 1267.0}, 'weighted avg': {'precision': 0.2868064575063748, 'recall': 0.27861089187056


