In [17]:
import pandas as pd
import numpy as np
import datasets
import nltk
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from scipy.sparse import hstack
from sklearn.model_selection import RandomizedSearchCV
from tabulate import tabulate

from transformers import Trainer
from sklearn.metrics import classification_report
import os
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()
train_data_path = "./data/English dataset/train.jsonl"
test_data_path = "./data/English dataset/test.jsonl"

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\timna\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\timna\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\timna\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
def pretty_print_report_dict(report):
	report_df = pd.DataFrame(report).transpose()
	report_df = report_df.round(3)

	class_metrics = report_df.iloc[:-3, :].copy()

	summary_metrics = report_df.iloc[-3:, :].copy()
	summary_metrics = summary_metrics.drop(columns=['support'])

	print("CLASS PERFORMANCE")
	print(tabulate(class_metrics, headers='keys', tablefmt='heavy_outline', numalign="center"))
	print()
	print("GLOBAL AVERAGES")
	print(tabulate(summary_metrics, headers='keys', tablefmt='heavy_outline', numalign="center"))

# Data preprocessing

In [3]:
def preprocess_text(text): # From the labs
	# Tokenize the text into words
	words = word_tokenize(text.lower())  # Convert text to lowercase

	# Remove punctuation
	table = str.maketrans('', '', string.punctuation)
	words = [word.translate(table) for word in words if word.isalpha()]

	# Remove stopwords
	stop_words = set(stopwords.words('english'))
	words = [word for word in words if word not in stop_words]

	# Lemmatization
	lemmatized_words = [lemmatizer.lemmatize(word) for word in words]

	# Join the words back into a string
	preprocessed_text = ' '.join(lemmatized_words)
	return preprocessed_text

(We load the dataset. We join togeder eintailment and not mentioned, so we can focus on predicting only if something is a contradiction)

In [4]:
train_data = pd.DataFrame(datasets.load_dataset("json", data_files=train_data_path)["train"])
test_data = pd.DataFrame(datasets.load_dataset("json", data_files=test_data_path)["train"])

label_map = {"Contradiction": 1, "Entailment": 0, "NotMentioned": 0}
train_data["label"] = train_data["label"].map(label_map)
test_data["label"] = test_data["label"].map(label_map)

train_data = train_data.drop("doc_id", axis=1)
train_data = train_data.drop("key", axis=1)
test_data = test_data.drop("doc_id", axis=1)
test_data = test_data.drop("key", axis=1)

(After we load the dataset, we inspect it for class inbalance)

In [5]:
train_data["label"].value_counts(normalize=True)

label
0    0.883048
1    0.116952
Name: proportion, dtype: float64

(We can see that most of the data isn't contradictions. The data is quite imbalanced)

In [6]:
longest_premise = max(train_data['premise'].apply(len).max(), test_data['premise'].apply(len).max())
longest_hypotises = max(train_data['hypothesis'].apply(len).max(), test_data['hypothesis'].apply(len).max())
longest_sentance = max(longest_premise, longest_hypotises)
print("Longest premise: ", longest_premise)
print("Longest hypothesis: ", longest_hypotises)

print("---------------------------------")
mean = np.mean(train_data['premise'].apply(len))
std = np.std(train_data['premise'].apply(len))

print("Mean premise length: ", mean)
print("+1 std: ", mean+std)
print("+2 std: ", mean+2*std)
print("+3 std: ", mean+3*std)

Longest premise:  3098
Longest hypothesis:  162
---------------------------------
Mean premise length:  296.27826449728826
+1 std:  651.2505192635402
+2 std:  1006.2227740297922
+3 std:  1361.1950287960442


(We inspect the lenght of the data. We do this to see if it would be beneficial only keeping smaller sizes of the data, so we can cleanly feed it into BERT model. We conclude that we would need to thin our data too much to be worth it)

# Traditional ML

In [20]:
vectorizer_premise = TfidfVectorizer()
vectorizer_hypothesis = TfidfVectorizer()

train_data_vectorised = train_data.copy()

X_premise= vectorizer_premise.fit_transform(train_data["premise"])
X_hypothesis = vectorizer_hypothesis.fit_transform(train_data["hypothesis"])
train_data_vectorised = hstack([X_premise, X_hypothesis])

Y_premise = vectorizer_premise.transform(test_data["premise"])
Y_hypothesis = vectorizer_hypothesis.transform(test_data["hypothesis"])
test_data_vectorised = hstack([Y_premise, Y_hypothesis])

## Logistic regression

In [79]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

grid_serch_dict = {
	"l1_ratio": [0, 0.5, 1],
	"C": [0.1, 0.5, 1.0, 2.0, 10, 50],
	"class_weight": [None, "balanced"],
	"solver": ["saga"]
}

logreg_model = RandomizedSearchCV(LogisticRegression(max_iter=5000), grid_serch_dict, n_iter=10, cv=3, scoring='f1')
logreg_model.fit(train_data_vectorised, train_data["label"])

predictions = logreg_model.predict(test_data_vectorised)

display_params = [[k, str(v)] for k, v in logreg_model.best_params_.items()]
print(tabulate(display_params, headers=["Hyperparameter", "Value"], tablefmt="heavy_outline"))



┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
┃ Hyperparameter   ┃ Value    ┃
┣━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━┫
┃ solver           ┃ saga     ┃
┃ l1_ratio         ┃ 0.5      ┃
┃ class_weight     ┃ balanced ┃
┃ C                ┃ 50       ┃
┗━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━┛


In [82]:
report_dict = classification_report(test_data["label"], predictions, zero_division=0, output_dict=True)
pretty_print_report_dict(report_dict)

CLASS PERFORMANCE
┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    ┃  precision  ┃  recall  ┃  f1-score  ┃  support  ┃
┣━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━╋━━━━━━━━━━━┫
┃ 0  ┃    0.982    ┃  0.974   ┃   0.978    ┃   1871    ┃
┃ 1  ┃    0.795    ┃  0.845   ┃   0.819    ┃    220    ┃
┗━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━┛

GLOBAL AVERAGES
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
┃              ┃  precision  ┃  recall  ┃  f1-score  ┃
┣━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━┫
┃ accuracy     ┃    0.961    ┃  0.961   ┃   0.961    ┃
┃ macro avg    ┃    0.888    ┃   0.91   ┃   0.899    ┃
┃ weighted avg ┃    0.962    ┃  0.961   ┃   0.961    ┃
┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┛


## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

grid_serch_dict = {
	"n_estimators": [50, 100, 200],
	"max_depth": [None, 5, 10, 20],
	"min_samples_split": [2, 5, 10, 20, 50],
	"class_weight": [None, "balanced"]
}

rf_model = RandomizedSearchCV(RandomForestClassifier(random_state=67), grid_serch_dict, n_iter=15, cv=3, scoring='f1')
rf_model.fit(train_data_vectorised, train_data["label"])

predictions = rf_model.predict(test_data_vectorised)

display_params = [[k, str(v)] for k, v in rf_model.best_params_.items()]
print(tabulate(display_params, headers=["Hyperparameter", "Value"], tablefmt="heavy_outline"))

┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
┃ Hyperparameter    ┃ Value    ┃
┣━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━┫
┃ n_estimators      ┃ 50       ┃
┃ min_samples_split ┃ 2        ┃
┃ max_depth         ┃ None     ┃
┃ class_weight      ┃ balanced ┃
┗━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━┛


In [74]:
report_dict = classification_report(test_data["label"], predictions, zero_division=0, output_dict=True)
pretty_print_report_dict(report_dict)

CLASS PERFORMANCE
┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    ┃  precision  ┃  recall  ┃  f1-score  ┃  support  ┃
┣━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━╋━━━━━━━━━━━┫
┃ 0  ┃    0.975    ┃   0.99   ┃   0.983    ┃   1871    ┃
┃ 1  ┃    0.905    ┃  0.782   ┃   0.839    ┃    220    ┃
┗━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━┛

GLOBAL AVERAGES
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
┃              ┃  precision  ┃  recall  ┃  f1-score  ┃
┣━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━┫
┃ accuracy     ┃    0.968    ┃  0.968   ┃   0.968    ┃
┃ macro avg    ┃    0.94     ┃  0.886   ┃   0.911    ┃
┃ weighted avg ┃    0.967    ┃  0.968   ┃   0.967    ┃
┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┛


## SVC

In [85]:
from sklearn.svm import SVC

grid_serch_dict = {
	"C": [0.1, 0.5, 1.0, 2.0, 10, 50],
	"kernel": ["linear", "sigmoid", "rbf"],
	"class_weight": [None, "balanced"]
}

svm_model = RandomizedSearchCV(SVC(), grid_serch_dict, n_iter=10, cv=3, scoring='f1')
svm_model.fit(train_data_vectorised, train_data["label"])

predictions = svm_model.predict(test_data_vectorised)

display_params = [[k, str(v)] for k, v in svm_model.best_params_.items()]
print(tabulate(display_params, headers=["Hyperparameter", "Value"], tablefmt="heavy_outline"))

┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ Hyperparameter   ┃ Value   ┃
┣━━━━━━━━━━━━━━━━━━╋━━━━━━━━━┫
┃ kernel           ┃ rbf     ┃
┃ class_weight     ┃ None    ┃
┃ C                ┃ 10      ┃
┗━━━━━━━━━━━━━━━━━━┻━━━━━━━━━┛


In [86]:
report_dict = classification_report(test_data["label"], predictions, zero_division=0, output_dict=True)
pretty_print_report_dict(report_dict)

CLASS PERFORMANCE
┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    ┃  precision  ┃  recall  ┃  f1-score  ┃  support  ┃
┣━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━╋━━━━━━━━━━━┫
┃ 0  ┃    0.981    ┃  0.989   ┃   0.985    ┃   1871    ┃
┃ 1  ┃    0.898    ┃  0.841   ┃   0.869    ┃    220    ┃
┗━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━┛

GLOBAL AVERAGES
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
┃              ┃  precision  ┃  recall  ┃  f1-score  ┃
┣━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━┫
┃ accuracy     ┃    0.973    ┃  0.973   ┃   0.973    ┃
┃ macro avg    ┃    0.94     ┃  0.915   ┃   0.927    ┃
┃ weighted avg ┃    0.973    ┃  0.973   ┃   0.973    ┃
┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┛


# Transformer-Based Classifier

## Training

In [19]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import Dataset

def preprocess_function(examples):
	inputs = tokenizer(examples["premise"], examples["hypothesis"], 
			max_length=1024, truncation="only_first", padding="max_length")

	global_attention_mask = [[0] * len(ids) for ids in inputs["input_ids"]]

	for mask in global_attention_mask:
		mask[0] = 1 
		
	inputs["global_attention_mask"] = global_attention_mask
	return inputs

model_name = "kiddothe2b/longformer-mini-1024"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/longformer-mini-1024 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
train_dataset = Dataset.from_pandas(train_data)
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)

eval_dataset = Dataset.from_pandas(test_data)
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True)

Map: 100%|██████████| 7191/7191 [00:09<00:00, 774.18 examples/s]
Map: 100%|██████████| 2091/2091 [00:02<00:00, 767.17 examples/s]


In [21]:
RUN = False

from transformers import TrainingArguments, Trainer
from sklearn.metrics import f1_score
from torch import nn

class WeightedTrainer(Trainer):
	def __init__(self, *args, class_weights=None, **kwargs):
		super().__init__(*args, **kwargs)
		if class_weights is not None:
			self.class_weights = torch.tensor(class_weights, dtype=torch.float).to(self.args.device)
		else:
			self.class_weights = None

	def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None, **kwargs):
		labels = inputs.get("labels")
		outputs = model(**inputs)
		logits = outputs.get("logits")
		
		if self.class_weights is not None:
			loss_fct = nn.CrossEntropyLoss(weight=self.class_weights)
			loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
		else:
			# Fallback to default loss if no weights are provided
			loss = outputs.loss if isinstance(outputs, dict) else outputs[0]
			
		return (loss, outputs) if return_outputs else loss

def compute_metrics(eval_pred):
	logits, labels = eval_pred
	predictions = np.argmax(logits, axis=-1)

	f1 = f1_score(labels, predictions, pos_label=1, average='binary')
	return {"f1_score_class_1": f1}

class_weights = [1.0, 9.0] # Weight class 0, weight class 1

training_args = TrainingArguments(
	output_dir="./artifacts",
	learning_rate=2e-4,
	per_device_train_batch_size=2,
	gradient_accumulation_steps=16,
	num_train_epochs=2,
	weight_decay=0.01,
	save_strategy="steps",
	save_steps=50,
	save_total_limit=3, 
	load_best_model_at_end=True,
	metric_for_best_model="f1_score_class_1", 
	eval_strategy="steps",
	eval_steps=50,
	greater_is_better=True,
	resume_from_checkpoint=False
)


path = "./trained_model_ex3_f1_class1_weighted"
if (not os.path.exists(path) and RUN):
	trainer = WeightedTrainer(
		model=model,
		args=training_args,
		train_dataset=tokenized_train_dataset,
		eval_dataset=tokenized_eval_dataset,
		compute_metrics=compute_metrics,
		class_weights=class_weights,
	)

	trainer.train(resume_from_checkpoint=True)

	tokenizer.save_pretrained(path)
	trainer.save_model(path)
else:
	print("Model ni treniran")

Model ni treniran


## Evaluation

In [30]:
path = "trained_model_ex3_precision_class1_v1"
if (os.path.exists(path)):
	model = AutoModelForSequenceClassification.from_pretrained(path, num_labels=2)

	trainer = Trainer(model=model)
	predictions_procentages = trainer.predict(tokenized_eval_dataset)[0]
	predictions = predictions_procentages.argmax(-1)
	report_dict = classification_report(test_data["label"], predictions, zero_division=0, output_dict=True)
	pretty_print_report_dict(report_dict)
else:
	print("Model not found")



CLASS PERFORMANCE
┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    ┃  precision  ┃  recall  ┃  f1-score  ┃  support  ┃
┣━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━╋━━━━━━━━━━━┫
┃ 0  ┃    0.981    ┃  0.972   ┃   0.976    ┃   1871    ┃
┃ 1  ┃    0.776    ┃  0.836   ┃   0.805    ┃    220    ┃
┗━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━┛

GLOBAL AVERAGES
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
┃              ┃  precision  ┃  recall  ┃  f1-score  ┃
┣━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━┫
┃ accuracy     ┃    0.957    ┃  0.957   ┃   0.957    ┃
┃ macro avg    ┃    0.878    ┃  0.904   ┃   0.891    ┃
┃ weighted avg ┃    0.959    ┃  0.957   ┃   0.958    ┃
┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┛


In [31]:
path = "trained_model_ex3_v2_macro_f1_v1"
if (os.path.exists(path)):
	model = AutoModelForSequenceClassification.from_pretrained(path, num_labels=2)

	trainer = Trainer(model=model)
	predictions_procentages = trainer.predict(tokenized_eval_dataset)[0]
	predictions = predictions_procentages.argmax(-1)
	report_dict = classification_report(test_data["label"], predictions, zero_division=0, output_dict=True)
	pretty_print_report_dict(report_dict)
else:
	print("Model not found")



CLASS PERFORMANCE
┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    ┃  precision  ┃  recall  ┃  f1-score  ┃  support  ┃
┣━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━╋━━━━━━━━━━━┫
┃ 0  ┃    0.935    ┃  0.975   ┃   0.955    ┃   1871    ┃
┃ 1  ┃    0.667    ┃  0.427   ┃   0.521    ┃    220    ┃
┗━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━┛

GLOBAL AVERAGES
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
┃              ┃  precision  ┃  recall  ┃  f1-score  ┃
┣━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━┫
┃ accuracy     ┃    0.917    ┃  0.917   ┃   0.917    ┃
┃ macro avg    ┃    0.801    ┃  0.701   ┃   0.738    ┃
┃ weighted avg ┃    0.907    ┃  0.917   ┃   0.909    ┃
┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┛


In [32]:
path = "trained_model_ex3_f1_class1_weighted"
if (os.path.exists(path)):
	model = AutoModelForSequenceClassification.from_pretrained(path, num_labels=2)

	trainer = Trainer(model=model)
	predictions_procentages = trainer.predict(tokenized_eval_dataset)[0]
	predictions = predictions_procentages.argmax(-1)
	report_dict = classification_report(test_data["label"], predictions, zero_division=0, output_dict=True)
	pretty_print_report_dict(report_dict)
else:
	print("Model not found")



CLASS PERFORMANCE
┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    ┃  precision  ┃  recall  ┃  f1-score  ┃  support  ┃
┣━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━╋━━━━━━━━━━━┫
┃ 0  ┃    0.981    ┃  0.975   ┃   0.978    ┃   1871    ┃
┃ 1  ┃    0.797    ┃  0.836   ┃   0.816    ┃    220    ┃
┗━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━┛

GLOBAL AVERAGES
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
┃              ┃  precision  ┃  recall  ┃  f1-score  ┃
┣━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━┫
┃ accuracy     ┃    0.96     ┃   0.96   ┃    0.96    ┃
┃ macro avg    ┃    0.889    ┃  0.906   ┃   0.897    ┃
┃ weighted avg ┃    0.961    ┃   0.96   ┃   0.961    ┃
┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┛


In [33]:
path = "trained_model_ex3_f1_class1_weighted_2_epoc"
if (os.path.exists(path)):
	model = AutoModelForSequenceClassification.from_pretrained(path, num_labels=2)

	trainer = Trainer(model=model)
	predictions_procentages = trainer.predict(tokenized_eval_dataset)[0]
	predictions = predictions_procentages.argmax(-1)
	report_dict = classification_report(test_data["label"], predictions, zero_division=0, output_dict=True)
	pretty_print_report_dict(report_dict)
else:
	print("Model not found")



CLASS PERFORMANCE
┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    ┃  precision  ┃  recall  ┃  f1-score  ┃  support  ┃
┣━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━╋━━━━━━━━━━━┫
┃ 0  ┃    0.982    ┃  0.974   ┃   0.978    ┃   1871    ┃
┃ 1  ┃    0.791    ┃  0.845   ┃   0.818    ┃    220    ┃
┗━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━┛

GLOBAL AVERAGES
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
┃              ┃  precision  ┃  recall  ┃  f1-score  ┃
┣━━━━━━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━╋━━━━━━━━━━━━┫
┃ accuracy     ┃    0.96     ┃   0.96   ┃    0.96    ┃
┃ macro avg    ┃    0.887    ┃   0.91   ┃   0.898    ┃
┃ weighted avg ┃    0.962    ┃   0.96   ┃   0.961    ┃
┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━┻━━━━━━━━━━━━┛
