In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Specify the directory where the model was saved
model_directory = "/content/drive/MyDrive/saved_model"

# Load the fine-tuned model and tokenizer
loaded_model = AutoModelForSequenceClassification.from_pretrained(model_directory)
loaded_tokenizer = AutoTokenizer.from_pretrained(model_directory)

# Create a pipeline for sequence classification
classifier = pipeline("text-classification", model=loaded_model, tokenizer=loaded_tokenizer, return_all_scores=False, device=0)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
twitter_data = pd.read_csv('Twitter_Data.csv')

# Remove rows with missing values
twitter_data.dropna(subset=['clean_text'], inplace=True)
twitter_data.dropna(subset=['category'], inplace=True)

# Split the data into training and testing sets
train_data, test_data = train_test_split(twitter_data, test_size=0.1, random_state=43)

# Test the pipeline on a subset of the dataset
test_texts = test_data['clean_text'].tolist()
test_labels = test_data['category'].tolist()

# Predict labels for the test set
predictions = classifier(test_texts)

# Extract predicted labels
predicted_labels = [pred['label'] for pred in predictions]

# Convert predicted labels to numerical values
_predicted_labels = [1.0 if label == 'LABEL_2' else (0.0 if label == 'LABEL_1' else -1.0) for label in predicted_labels]

# Evaluate the performance
accuracy = accuracy_score(test_labels, _predicted_labels)
print(f"Accuracy on the test set: {accuracy}")