In [None]:
import pandas as pd
from transformers import pipeline

data = pd.read_csv("preprocessed_data.csv")


if 'English' not in data.columns:
    raise ValueError("The input CSV file must contain a column named 'English'")

twitter_roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
beto_sentiment = pipeline("sentiment-analysis", model="finiteautomata/beto-sentiment-analysis")
vader_sentiment = pipeline("sentiment-analysis", model="nlptown/vader_sentiment")
bert_base = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
bart_sentiment = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# function to map twitter-roberta labels
def map_twitter_roberta(label):
    if label == 'LABEL_2':
        return 'positive'
    elif label == 'LABEL_1':
        return 'neutral'
    elif label == 'LABEL_0':
        return 'negative'

# function to map beto sentiment labels
def map_beto_sentiment(label):
    if label == 'POS':
        return 'positive'
    elif label == 'NEG':
        return 'negative'
    elif label == 'NEU':
        return 'neutral'

# function to map bert-base star ratings 
def map_bert_base(stars):
    if '5 star' in stars or '4 star' in stars:
        return 'positive'
    elif '3 star' in stars:
        return 'neutral'
    elif '2 star' in stars or '1 star' in stars:
        return 'negative'

# function to extract the label from the model output
def extract_label(result):
    if isinstance(result, dict):
        return result['label']
    return result[0]['label']

data['twitter_roberta'] = data['English'].apply(lambda x: map_twitter_roberta(extract_label(twitter_roberta(x))))
data['beto_sentiment'] = data['English'].apply(lambda x: map_beto_sentiment(extract_label(beto_sentiment(x))))

data['vader_sentiment'] = data['English'].apply(lambda x: map_bert_base(extract_label(vader_sentiment(x))))
data['bert_base'] = data['English'].apply(lambda x: map_bert_base(extract_label(bert_base(x))))

candidate_labels = ["positive", "negative", "neutral"]
data['bart_sentiment'] = data['English'].apply(lambda x: bart_sentiment(x, candidate_labels)['labels'][0])

output_file = "data_with_all_sentiment.csv"
data.to_csv(output_file, index=False)

print(data.head())