<a href="https://colab.research.google.com/github/RyuichiSaito1/inflation-reddit-usa/blob/main/src/roberta_and_deberta_performances.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

from google.colab import auth
auth.authenticate_user()

In [None]:
!pip install transformers
!pip install torch
!pip install accelerate
!pip install --upgrade accelerate
# After installing, restart the runtime.

In [None]:

import torch

class TestDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Training + validation: 1040

In [None]:
import pandas as pd
from torch.utils.data import DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix

# # Function to read data from TSV file using pandas
# def read_tsv(file_path):
#     data = pd.read_table(file_path, names=['body', 'inflation'], header=0, dtype='object', engine='python')
#     return data

def read_csv_file(file_path):
    try:
        data = pd.read_csv(file_path, names=['body', 'inflation'], header=0, dtype='object')
        return data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test data file path (Replace with your Google Drive directory and file)
file_path = '/content/drive/MyDrive/world-inflation/data/reddit/production/test-data-200.csv'

# Read data from TSV file using pandas
test_data = read_csv_file(file_path)

# Initialize the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')

# Encode the test data
test_encodings = tokenizer(test_data['body'].tolist(), truncation=True, padding=True)

# Convert the string labels to integers
test_labels = [int(label) for label in test_data['inflation']]

# Create the test dataset
test_dataset = TestDataset(test_encodings, test_labels)

# Initialize the model
model = RobertaForSequenceClassification.from_pretrained('/content/drive/MyDrive/world-inflation/data/model/roberta-large-fine-tuning/checkpoint-392/')

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=16)

# Lists to store true and predicted labels
true_labels = []
predicted_labels = []

# Use the model to predict the labels of the test data
for batch in test_loader:
    inputs = {key: val.to(model.device) for key, val in batch.items() if key != 'labels'}
    labels = batch['labels'].to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    true_labels.extend(labels.tolist())
    predicted_labels.extend(predictions.tolist())

# Calculate and display accuracy, recall, precision, and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average=None)
precision = precision_score(true_labels, predicted_labels, average=None)
f1 = f1_score(true_labels, predicted_labels, average=None)

# Display classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

# Display metrics for each class and macro/micro averages
macro_avg = precision.mean(), recall.mean(), f1.mean()
micro_avg = precision.sum() / 3, recall.sum() / 3, f1.sum() / 3

# Display metrics for each class and macro/micro averages
print("+--------------+-----------+----------+----------+----------+")
print("|   Metric     | Accuracy  |  Recall  | Precision|  F1 Score |")
print("+--------------+-----------+----------+----------+----------+")
for i in range(3):
    print(f"| Class {i}      |    {accuracy:.2f}   |   {recall[i]:.2f}   |   {precision[i]:.2f}   |   {f1[i]:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Macro Average|    {accuracy:.2f}   |   {recall.mean():.2f}   |   {precision.mean():.2f}   |   {f1.mean():.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Micro Average|    {accuracy:.2f}   |   {recall.sum()/3:.2f}   |   {precision.sum()/3:.2f}   |   {f1.sum()/3:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")


# Training + validation: 520

In [None]:
import pandas as pd
from torch.utils.data import DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix

# # Function to read data from TSV file using pandas
# def read_tsv(file_path):
#     data = pd.read_table(file_path, names=['body', 'inflation'], header=0, dtype='object', engine='python')
#     return data

def read_csv_file(file_path):
    try:
        data = pd.read_csv(file_path, names=['body', 'inflation'], header=0, dtype='object')
        return data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test data file path (Replace with your Google Drive directory and file)
file_path = '/content/drive/MyDrive/world-inflation/data/reddit/production/test-data-200.csv'

# Read data from TSV file using pandas
test_data = read_csv_file(file_path)

# Initialize the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')

# Encode the test data
test_encodings = tokenizer(test_data['body'].tolist(), truncation=True, padding=True)

# Convert the string labels to integers
test_labels = [int(label) for label in test_data['inflation']]

# Create the test dataset
test_dataset = TestDataset(test_encodings, test_labels)

# Initialize the model
model = RobertaForSequenceClassification.from_pretrained('/content/drive/MyDrive/world-inflation/data/model/roberta-large-fine-tuning-520/checkpoint-392/')

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=16)

# Lists to store true and predicted labels
true_labels = []
predicted_labels = []

# Use the model to predict the labels of the test data
for batch in test_loader:
    inputs = {key: val.to(model.device) for key, val in batch.items() if key != 'labels'}
    labels = batch['labels'].to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    true_labels.extend(labels.tolist())
    predicted_labels.extend(predictions.tolist())

# Calculate and display accuracy, recall, precision, and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average=None)
precision = precision_score(true_labels, predicted_labels, average=None)
f1 = f1_score(true_labels, predicted_labels, average=None)

# Display classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

# Display metrics for each class and macro/micro averages
macro_avg = precision.mean(), recall.mean(), f1.mean()
micro_avg = precision.sum() / 3, recall.sum() / 3, f1.sum() / 3

# Display metrics for each class and macro/micro averages
print("+--------------+-----------+----------+----------+----------+")
print("|   Metric     | Accuracy  |  Recall  | Precision|  F1 Score |")
print("+--------------+-----------+----------+----------+----------+")
for i in range(3):
    print(f"| Class {i}      |    {accuracy:.2f}   |   {recall[i]:.2f}   |   {precision[i]:.2f}   |   {f1[i]:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Macro Average|    {accuracy:.2f}   |   {recall.mean():.2f}   |   {precision.mean():.2f}   |   {f1.mean():.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Micro Average|    {accuracy:.2f}   |   {recall.sum()/3:.2f}   |   {precision.sum()/3:.2f}   |   {f1.sum()/3:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")


# Training + validation: 260

In [None]:
import pandas as pd
from torch.utils.data import DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix

# # Function to read data from TSV file using pandas
# def read_tsv(file_path):
#     data = pd.read_table(file_path, names=['body', 'inflation'], header=0, dtype='object', engine='python')
#     return data

def read_csv_file(file_path):
    try:
        data = pd.read_csv(file_path, names=['body', 'inflation'], header=0, dtype='object')
        return data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test data file path (Replace with your Google Drive directory and file)
file_path = '/content/drive/MyDrive/world-inflation/data/reddit/production/test-data-200.csv'

# Read data from TSV file using pandas
test_data = read_csv_file(file_path)

# Initialize the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')

# Encode the test data
test_encodings = tokenizer(test_data['body'].tolist(), truncation=True, padding=True)

# Convert the string labels to integers
test_labels = [int(label) for label in test_data['inflation']]

# Create the test dataset
test_dataset = TestDataset(test_encodings, test_labels)

# Initialize the model
model = RobertaForSequenceClassification.from_pretrained('/content/drive/MyDrive/world-inflation/data/model/roberta-large-fine-tuning-260/checkpoint-147/')

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=16)

# Lists to store true and predicted labels
true_labels = []
predicted_labels = []

# Use the model to predict the labels of the test data
for batch in test_loader:
    inputs = {key: val.to(model.device) for key, val in batch.items() if key != 'labels'}
    labels = batch['labels'].to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    true_labels.extend(labels.tolist())
    predicted_labels.extend(predictions.tolist())

# Calculate and display accuracy, recall, precision, and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average=None)
precision = precision_score(true_labels, predicted_labels, average=None)
f1 = f1_score(true_labels, predicted_labels, average=None)

# Display classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

# Display metrics for each class and macro/micro averages
macro_avg = precision.mean(), recall.mean(), f1.mean()
micro_avg = precision.sum() / 3, recall.sum() / 3, f1.sum() / 3

# Display metrics for each class and macro/micro averages
print("+--------------+-----------+----------+----------+----------+")
print("|   Metric     | Accuracy  |  Recall  | Precision|  F1 Score |")
print("+--------------+-----------+----------+----------+----------+")
for i in range(3):
    print(f"| Class {i}      |    {accuracy:.2f}   |   {recall[i]:.2f}   |   {precision[i]:.2f}   |   {f1[i]:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Macro Average|    {accuracy:.2f}   |   {recall.mean():.2f}   |   {precision.mean():.2f}   |   {f1.mean():.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Micro Average|    {accuracy:.2f}   |   {recall.sum()/3:.2f}   |   {precision.sum()/3:.2f}   |   {f1.sum()/3:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")


# Training + validation: 130

In [None]:
import pandas as pd
from torch.utils.data import DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix

# # Function to read data from TSV file using pandas
# def read_tsv(file_path):
#     data = pd.read_table(file_path, names=['body', 'inflation'], header=0, dtype='object', engine='python')
#     return data

def read_csv_file(file_path):
    try:
        data = pd.read_csv(file_path, names=['body', 'inflation'], header=0, dtype='object')
        return data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test data file path (Replace with your Google Drive directory and file)
file_path = '/content/drive/MyDrive/world-inflation/data/reddit/production/test-data-200.csv'

# Read data from TSV file using pandas
test_data = read_csv_file(file_path)

# Initialize the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')

# Encode the test data
test_encodings = tokenizer(test_data['body'].tolist(), truncation=True, padding=True)

# Convert the string labels to integers
test_labels = [int(label) for label in test_data['inflation']]

# Create the test dataset
test_dataset = TestDataset(test_encodings, test_labels)

# Initialize the model
model = RobertaForSequenceClassification.from_pretrained('/content/drive/MyDrive/world-inflation/data/model/roberta-large-fine-tuning-130/checkpoint-75/')

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=16)

# Lists to store true and predicted labels
true_labels = []
predicted_labels = []

# Use the model to predict the labels of the test data
for batch in test_loader:
    inputs = {key: val.to(model.device) for key, val in batch.items() if key != 'labels'}
    labels = batch['labels'].to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    true_labels.extend(labels.tolist())
    predicted_labels.extend(predictions.tolist())

# Calculate and display accuracy, recall, precision, and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average=None)
precision = precision_score(true_labels, predicted_labels, average=None)
f1 = f1_score(true_labels, predicted_labels, average=None)

# Display classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

# Display metrics for each class and macro/micro averages
macro_avg = precision.mean(), recall.mean(), f1.mean()
micro_avg = precision.sum() / 3, recall.sum() / 3, f1.sum() / 3

# Display metrics for each class and macro/micro averages
print("+--------------+-----------+----------+----------+----------+")
print("|   Metric     | Accuracy  |  Recall  | Precision|  F1 Score |")
print("+--------------+-----------+----------+----------+----------+")
for i in range(3):
    print(f"| Class {i}      |    {accuracy:.2f}   |   {recall[i]:.2f}   |   {precision[i]:.2f}   |   {f1[i]:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Macro Average|    {accuracy:.2f}   |   {recall.mean():.2f}   |   {precision.mean():.2f}   |   {f1.mean():.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Micro Average|    {accuracy:.2f}   |   {recall.sum()/3:.2f}   |   {precision.sum()/3:.2f}   |   {f1.sum()/3:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")


# Training + validation: 65

In [None]:
import pandas as pd
from torch.utils.data import DataLoader
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix

# # Function to read data from TSV file using pandas
# def read_tsv(file_path):
#     data = pd.read_table(file_path, names=['body', 'inflation'], header=0, dtype='object', engine='python')
#     return data

def read_csv_file(file_path):
    try:
        data = pd.read_csv(file_path, names=['body', 'inflation'], header=0, dtype='object')
        return data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test data file path (Replace with your Google Drive directory and file)
file_path = '/content/drive/MyDrive/world-inflation/data/reddit/production/test-data-200.csv'

# Read data from TSV file using pandas
test_data = read_csv_file(file_path)

# Initialize the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')

# Encode the test data
test_encodings = tokenizer(test_data['body'].tolist(), truncation=True, padding=True)

# Convert the string labels to integers
test_labels = [int(label) for label in test_data['inflation']]

# Create the test dataset
test_dataset = TestDataset(test_encodings, test_labels)

# Initialize the model
model = RobertaForSequenceClassification.from_pretrained('/content/drive/MyDrive/world-inflation/data/model/roberta-large-fine-tuning-65/checkpoint-60/')

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=16)

# Lists to store true and predicted labels
true_labels = []
predicted_labels = []

# Use the model to predict the labels of the test data
for batch in test_loader:
    inputs = {key: val.to(model.device) for key, val in batch.items() if key != 'labels'}
    labels = batch['labels'].to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    true_labels.extend(labels.tolist())
    predicted_labels.extend(predictions.tolist())

# Calculate and display accuracy, recall, precision, and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average=None)
precision = precision_score(true_labels, predicted_labels, average=None)
f1 = f1_score(true_labels, predicted_labels, average=None)

# Display classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

# Display metrics for each class and macro/micro averages
macro_avg = precision.mean(), recall.mean(), f1.mean()
micro_avg = precision.sum() / 3, recall.sum() / 3, f1.sum() / 3

# Display metrics for each class and macro/micro averages
print("+--------------+-----------+----------+----------+----------+")
print("|   Metric     | Accuracy  |  Recall  | Precision|  F1 Score |")
print("+--------------+-----------+----------+----------+----------+")
for i in range(3):
    print(f"| Class {i}      |    {accuracy:.2f}   |   {recall[i]:.2f}   |   {precision[i]:.2f}   |   {f1[i]:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Macro Average|    {accuracy:.2f}   |   {recall.mean():.2f}   |   {precision.mean():.2f}   |   {f1.mean():.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Micro Average|    {accuracy:.2f}   |   {recall.sum()/3:.2f}   |   {precision.sum()/3:.2f}   |   {f1.sum()/3:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")


# DeBERTa: Training + validation: 1040

In [None]:
import pandas as pd
from torch.utils.data import DataLoader
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix

# # Function to read data from TSV file using pandas
# def read_tsv(file_path):
#     data = pd.read_table(file_path, names=['body', 'inflation'], header=0, dtype='object', engine='python')
#     return data

def read_csv_file(file_path):
    try:
        data = pd.read_csv(file_path, names=['body', 'inflation'], header=0, dtype='object')
        return data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test data file path (Replace with your Google Drive directory and file)
file_path = '/content/drive/MyDrive/world-inflation/data/reddit/production/test-data-200.csv'

# Read data from CSV file using pandas
test_data = read_csv_file(file_path)

# Initialize the tokenizer
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-large')

# Encode the test data
test_encodings = tokenizer(test_data['body'].tolist(), truncation=True, padding=True)

# Convert the string labels to integers
test_labels = [int(label) for label in test_data['inflation']]

# Create the test dataset
test_dataset = TestDataset(test_encodings, test_labels)

# Initialize the model (Update path to your DeBERTaV3 checkpoint)
model = DebertaV2ForSequenceClassification.from_pretrained('/content/drive/MyDrive/world-inflation/data/model/deberta-large-fine-tuning-1040/checkpoint-588/')

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=16)

# Lists to store true and predicted labels
true_labels = []
predicted_labels = []

# Use the model to predict the labels of the test data
for batch in test_loader:
    inputs = {key: val.to(model.device) for key, val in batch.items() if key != 'labels'}
    labels = batch['labels'].to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    true_labels.extend(labels.tolist())
    predicted_labels.extend(predictions.tolist())

# Calculate and display accuracy, recall, precision, and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average=None)
precision = precision_score(true_labels, predicted_labels, average=None)
f1 = f1_score(true_labels, predicted_labels, average=None)

# Display classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

# Display metrics for each class and macro/micro averages
macro_avg = precision.mean(), recall.mean(), f1.mean()
micro_avg = precision.sum() / 3, recall.sum() / 3, f1.sum() / 3

# Display metrics for each class and macro/micro averages
print("+--------------+-----------+----------+----------+----------+")
print("|   Metric     | Accuracy  |  Recall  | Precision|  F1 Score |")
print("+--------------+-----------+----------+----------+----------+")
for i in range(3):
    print(f"| Class {i}      |    {accuracy:.2f}   |   {recall[i]:.2f}   |   {precision[i]:.2f}   |   {f1[i]:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Macro Average|    {accuracy:.2f}   |   {recall.mean():.2f}   |   {precision.mean():.2f}   |   {f1.mean():.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Micro Average|    {accuracy:.2f}   |   {recall.sum()/3:.2f}   |   {precision.sum()/3:.2f}   |   {f1.sum()/3:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")

# DeBERTa: Training + validation: 65

In [None]:
import pandas as pd
from torch.utils.data import DataLoader
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report, confusion_matrix

# # Function to read data from TSV file using pandas
# def read_tsv(file_path):
#     data = pd.read_table(file_path, names=['body', 'inflation'], header=0, dtype='object', engine='python')
#     return data

def read_csv_file(file_path):
    try:
        data = pd.read_csv(file_path, names=['body', 'inflation'], header=0, dtype='object')
        return data
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Test data file path (Replace with your Google Drive directory and file)
file_path = '/content/drive/MyDrive/world-inflation/data/reddit/production/test-data-200.csv'

# Read data from CSV file using pandas
test_data = read_csv_file(file_path)

# Initialize the tokenizer
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-large')

# Encode the test data
test_encodings = tokenizer(test_data['body'].tolist(), truncation=True, padding=True)

# Convert the string labels to integers
test_labels = [int(label) for label in test_data['inflation']]

# Create the test dataset
test_dataset = TestDataset(test_encodings, test_labels)

# Initialize the model (Update path to your DeBERTaV3 checkpoint)
model = DebertaV2ForSequenceClassification.from_pretrained('/content/drive/MyDrive/world-inflation/data/model/deberta-large-fine-tuning-65/checkpoint-96/')

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=16)

# Lists to store true and predicted labels
true_labels = []
predicted_labels = []

# Use the model to predict the labels of the test data
for batch in test_loader:
    inputs = {key: val.to(model.device) for key, val in batch.items() if key != 'labels'}
    labels = batch['labels'].to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    true_labels.extend(labels.tolist())
    predicted_labels.extend(predictions.tolist())

# Calculate and display accuracy, recall, precision, and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels, average=None)
precision = precision_score(true_labels, predicted_labels, average=None)
f1 = f1_score(true_labels, predicted_labels, average=None)

# Display classification report and confusion matrix
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

# Display metrics for each class and macro/micro averages
macro_avg = precision.mean(), recall.mean(), f1.mean()
micro_avg = precision.sum() / 3, recall.sum() / 3, f1.sum() / 3

# Display metrics for each class and macro/micro averages
print("+--------------+-----------+----------+----------+----------+")
print("|   Metric     | Accuracy  |  Recall  | Precision|  F1 Score |")
print("+--------------+-----------+----------+----------+----------+")
for i in range(3):
    print(f"| Class {i}      |    {accuracy:.2f}   |   {recall[i]:.2f}   |   {precision[i]:.2f}   |   {f1[i]:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Macro Average|    {accuracy:.2f}   |   {recall.mean():.2f}   |   {precision.mean():.2f}   |   {f1.mean():.2f}   |")
print("+--------------+-----------+----------+----------+----------+")
print(f"| Micro Average|    {accuracy:.2f}   |   {recall.sum()/3:.2f}   |   {precision.sum()/3:.2f}   |   {f1.sum()/3:.2f}   |")
print("+--------------+-----------+----------+----------+----------+")