In [1]:
import pandas as pd
from transformers import pipeline
from sklearn.metrics import accuracy_score

# Load the CSV file (assuming the file has a 'label' column with ground truth labels)
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/T20_WorldCup/T20_cleaned_comments.csv')
tot_count = len(df['all_eng'])

# Initialize the zero-shot classification pipeline
classifier = pipeline(
    task="zero-shot-classification",
    model="facebook/bart-large-mnli"
)

comment_list = []
sentiment_list = []


# Lists to hold predicted labels and true labels
predicted_labels = []
true_labels = df['label'].tolist() if 'label' in df.columns else None  # Ground truth labels, if available

# Sentiment count dictionary to summarize sentiment distribution
sentiment_counts = {'positive': 0, 'negative': 0, 'neutral': 0}

# Define the candidate labels for classification
candidate_labels = ["positive", "negative", "neutral"]

# Loop through each comment in the DataFrame, classify, and gather results
for comment in df['all_eng']:
    # Classify the comment using zero-shot classification
    result = classifier(comment, candidate_labels, multi_label=True)

    # Get the label with the highest score
    predicted_label = result['labels'][result['scores'].index(max(result['scores']))]
    predicted_labels.append(predicted_label)

    # Count the sentiment type for summary
    sentiment_counts[predicted_label] += 1

    comment_list.append(comment)
    sentiment_list.append(predicted_label)

    # Optional: Print the original comment and predicted sentiment
    print(f"Original Comment: {comment}")
    print(f"Predicted Sentiment: {predicted_label}")
    print()

# Calculate sentiment percentages for each category
total_comments = len(df['all_eng'])
percent_positive = (sentiment_counts['positive'] / total_comments) * 100
percent_negative = (sentiment_counts['negative'] / total_comments) * 100
percent_neutral = (sentiment_counts['neutral'] / total_comments) * 100

# Print the overall sentiment summary
print(f"Overall Sentiment of Comments:")
print(f"Positive: {percent_positive:.2f}%")
print(f"Negative: {percent_negative:.2f}%")
print(f"Neutral: {percent_neutral:.2f}%")

# Calculate accuracy if true labels are provided
if true_labels is not None:
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f"Accuracy of the model: {accuracy * 100:.2f}%")
else:
    print("No ground truth labels found for accuracy calculation.")

dict = {'Cleaned_Comment': comment_list,
        'Predicted_Sentiment': sentiment_list,}

df_new = pd.DataFrame(dict)
#df_new.to_csv('final_output.csv', index=False)

df_new.to_csv('/content/drive/MyDrive/Colab Notebooks/T20WC_final_facebook.csv', index=False)
df_new

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cuda:0


Original Comment: watching today
Predicted Sentiment: positive

Original Comment: sa donated the cup to india these sa players cannot break the shackles of a choker tag
Predicted Sentiment: negative

Original Comment: i am crying
Predicted Sentiment: negative

Original Comment: uff the commentary is at its best
Predicted Sentiment: positive

Original Comment: forever india
Predicted Sentiment: positive

Original Comment: one of the most memorable final match ever
Predicted Sentiment: positive

Original Comment: epic final catch amp match ever
Predicted Sentiment: positive

Original Comment: what to typei dont know hoping for the champions trophy now
Predicted Sentiment: positive

Original Comment: came back to see that catch by surya
Predicted Sentiment: positive



You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Original Comment: sach me rona to rukh hi nehi raha he jai hind
Predicted Sentiment: negative

Original Comment: kaun kaun hardik pandya ko maaf kar chuke ho like kare
Predicted Sentiment: negative

Original Comment: yes sir
Predicted Sentiment: positive

Original Comment: thnku king kohli
Predicted Sentiment: positive

Original Comment: india ke agi koi jeet sakta hai kya
Predicted Sentiment: positive

Original Comment: love rohit
Predicted Sentiment: positive

Original Comment: blkl indai ur the bast team end i love india or indain team congratulations my indian brothers koli i miss you
Predicted Sentiment: positive

Original Comment: hardik dikh gya jaspreet dikh gya arsh ka pta nhi kisi ko
Predicted Sentiment: negative

Original Comment: congratulations indian brother from pakistan
Predicted Sentiment: positive

Original Comment: please sir kise ke upar khami mat nikaliye ga
Predicted Sentiment: positive

Original Com

Unnamed: 0,Cleaned_Comment,Predicted_Sentiment
0,watching today,positive
1,sa donated the cup to india these sa players c...,negative
2,i am crying,negative
3,uff the commentary is at its best,positive
4,forever india,positive
...,...,...
14095,cack de india,negative
14096,love you sarr,positive
14097,virat is best,positive
14098,virat retire,negative
