In [None]:
# Install the necessary library
!pip install transformers

# Import necessary libraries
import pandas as pd
from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load the dataset (upload your dataset to Colab before running this code)
file_path = '/content/Twitter_Data.csv'
df = pd.read_csv(file_path)

# Rename the columns for clarity
df.columns = ['text', 'sentiment']

# Filter out rows with missing values
df_clean = df.dropna()

# Select a sample of 10,000 rows
df_sample = df_clean.sample(n=10000, random_state=1)

# List of Hugging Face transformer models to test
models = [
    'distilbert-base-uncased-finetuned-sst-2-english',
    'nlptown/bert-base-multilingual-uncased-sentiment',
    'cardiffnlp/twitter-roberta-base-sentiment',
    'siebert/sentiment-roberta-large-english',
    'finiteautomata/bertweet-base-sentiment-analysis'
]

# Function to map the pipeline's output to our sentiment labels
def map_sentiment(label):
    if label in ['negative', 'NEGATIVE']:
        return -1
    elif label in ['positive', 'POSITIVE']:
        return 1
    else:
        return 0

# Dictionary to store performance metrics for each model
performance_metrics = {}

# Evaluate each model
for model_name in models:
    # Load pre-trained sentiment analysis pipeline for the current model
    sentiment_pipeline = pipeline('sentiment-analysis', model=model_name, device=0)

    # Perform sentiment analysis and add predictions to the dataset
    df_sample['hf_predicted_sentiment'] = df_sample['text'].apply(
        lambda x: map_sentiment(sentiment_pipeline(x)[0]['label'])
    )

    # Calculate the accuracy
    accuracy = accuracy_score(df_sample['sentiment'], df_sample['hf_predicted_sentiment'])

    # Calculate precision, recall, and F1-score for each class
    precision, recall, f1, _ = precision_recall_fscore_support(df_sample['sentiment'], df_sample['hf_predicted_sentiment'], average='weighted')

    # Compile the metrics into a dictionary
    performance_metrics[model_name] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }

# Display the performance metrics for each model
for model_name, metrics in performance_metrics.items():
    print(f"Performance Metrics for {model_name}:")
    print(f"Accuracy: {metrics['accuracy']}")
    print(f"Precision: {metrics['precision']}")
    print(f"Recall: {metrics['recall']}")
    print(f"F1-score: {metrics['f1_score']}")
    print("\n")

# Convert performance metrics to a DataFrame for better visualization
metrics_df = pd.DataFrame(performance_metrics).transpose()

# Plot the results
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Comparison of Hugging Face Transformer Models', fontsize=16)

# Accuracy Plot
axes[0, 0].bar(metrics_df.index, metrics_df['accuracy'], color='skyblue')
axes[0, 0].set_title('Accuracy')
axes[0, 0].set_xticklabels(metrics_df.index, rotation=45, ha='right')

# Precision Plot
axes[0, 1].bar(metrics_df.index, metrics_df['precision'], color='lightgreen')
axes[0, 1].set_title('Precision')
axes[0, 1].set_xticklabels(metrics_df.index, rotation=45, ha='right')

# Recall Plot
axes[1, 0].bar(metrics_df.index, metrics_df['recall'], color='lightcoral')
axes[1, 0].set_title('Recall')
axes[1, 0].set_xticklabels(metrics_df.index, rotation=45, ha='right')

# F1 Score Plot
axes[1, 1].bar(metrics_df.index, metrics_df['f1_score'], color='lightsalmon')
axes[1, 1].set_title('F1 Score')
axes[1, 1].set_xticklabels(metrics_df.index, rotation=45, ha='right')

# Adjust layout and show plots
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()
