# **Step 1: Import necessary libraries**

In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, classification_report
import torch
import argparse
!pip install datasets



# **Step 2: Define a function to accept input parameters**

In [8]:
def parse_args():
    parser = argparse.ArgumentParser(description="Evaluate a Huggingface model on a validation dataset")
    parser.add_argument('--hf_token', type=str, required=True, help="Huggingface token for downloading the trained model")
    parser.add_argument('--validation_path', type=str, required=True, help="Path to the validation CSV file")
    args = parser.parse_args()
    return args

# **Step 3: Load input parameters (for local or Colab execution)**

In [9]:
# This section is to simulate input parameters in a notebook environment
#hf_token = "your_huggingface_token"  # Replace with your actual token
validation_path = "/content/test_data.csv"  # Replace with the actual path to the validation file

# **Step 4: Laod the validation dataset**

In [10]:
# Load the validation dataset from the CSV file
validation_df = pd.read_csv(validation_path)

# Convert the validation DataFrame to a Huggingface Dataset
validation_dataset = Dataset.from_pandas(validation_df)

# **Step 5: Load the model and tokenizer using the Huggingface token**

In [11]:
# Huggingface model and tokenizer path
model_dir = 'BharathBOLT/5class_sentimentClassifier'

# Load the tokenizer and model from the saved directory
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSequenceClassification.from_pretrained(model_dir)

# Ensure the model uses GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

# **Step 6: Tokenize the validation dataset**

In [12]:
# Function to tokenize the text data
def tokenize(batch):
    return tokenizer(batch['ticket'], padding=True, truncation=True)

# Tokenize the validation dataset
validation_encoded = validation_dataset.map(tokenize, batched=True, batch_size=None)

Map:   0%|          | 0/240 [00:00<?, ? examples/s]

# **Step 7: Define the Trainer for evaluation**

In [13]:
# Set up the Trainer for evaluation
trainer = Trainer(
    model=model,
    tokenizer=tokenizer
)

# **Step 8: Make predictions on the validation dataset**

In [14]:
# Perform predictions on the validation dataset
predictions = trainer.predict(validation_encoded)

# Extract the predicted labels and true labels
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = validation_encoded['label']

# **Step 9: Compute precision and recall**

In [15]:
# Compute micro and macro precision, recall, and F1 score
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='micro')
print(f"Micro Precision: {precision:.4f}")
print(f"Micro Recall: {recall:.4f}")

precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')
print(f"Macro Precision: {precision:.4f}")
print(f"Macro Recall: {recall:.4f}")

# Optionally, print a detailed classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=validation_df['sentiment'].unique()))

Micro Precision: 0.7958
Micro Recall: 0.7958
Macro Precision: 0.7983
Macro Recall: 0.7958

Classification Report:
                 precision    recall  f1-score   support

        Neutral       0.93      0.88      0.90        48
  Mild Negative       0.74      0.73      0.74        48
  Mild Positive       0.75      0.81      0.78        48
Strong Negative       0.76      0.77      0.76        48
Strong Positive       0.81      0.79      0.80        48

       accuracy                           0.80       240
      macro avg       0.80      0.80      0.80       240
   weighted avg       0.80      0.80      0.80       240

