# Import library

In [2]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np

# Class SentimentAnalysisReport

In [3]:
class SentimentAnalysisReport:
    def __init__(self, data_path):
        # Load dataset
        self.df = pd.read_csv(data_path)
        
        # Create a SentimentIntensityAnalyzer object
        self.analyzer = SentimentIntensityAnalyzer()

    def calculate_sentiment_scores(self):
        """
        Calculate sentiment scores and append them to the DataFrame.
        """
        scores = []

        # Loop through the 'tokenized_and_lemmatized' column
        for i in range(self.df['tokenized_and_lemmatized'].shape[0]):
            # Check if the entry is a valid string
            if isinstance(self.df['tokenized_and_lemmatized'][i], str):
                compound = self.analyzer.polarity_scores(self.df['tokenized_and_lemmatized'][i])["compound"]
                pos = self.analyzer.polarity_scores(self.df['tokenized_and_lemmatized'][i])["pos"]
                neu = self.analyzer.polarity_scores(self.df['tokenized_and_lemmatized'][i])["neu"]
                neg = self.analyzer.polarity_scores(self.df['tokenized_and_lemmatized'][i])["neg"]

                scores.append({
                    "Compound": compound,
                    "Positive": pos,
                    "Negative": neg,
                    "Neutral": neu
                })
            else:
                # Handle NaN or non-string values
                scores.append({
                    "Compound": 0.0,
                    "Positive": 0.0,
                    "Negative": 0.0,
                    "Neutral": 1.0
                })

        # Create a DataFrame from the sentiment scores
        sentiments_score = pd.DataFrame.from_dict(scores)

        # Join the sentiment scores with the original DataFrame
        self.df = pd.concat([self.df, sentiments_score], axis=1)

    def categorize_sentiments(self):
        """
        Categorize sentiments based on the Compound score.
        """
        conditions = [
            (self.df['Compound'] <= -0.5),
            (self.df['Compound'] > -0.5) & (self.df['Compound'] < 0.5),
            (self.df['Compound'] > 0.5)
        ]

        values = ['Negative', 'Neutral', 'Positive']

        self.df['Category'] = np.select(conditions, values)

    def display_sentiment_distribution(self):
        """
        Display the distribution of sentiment categories.
        """
        sentiment_counts = pd.DataFrame(self.df.groupby(['Category'])['Category'].count())\
                            .rename(columns={"Category": "Counts"})\
                            .assign(Percentage=lambda x: (x.Counts / x.Counts.sum()) * 100)

        print(sentiment_counts)

    def display_top_positive_texts(self, n=10):
        """
        Display the top n positive texts.
        """
        top_text = self.df.nlargest(n=n, columns=['Compound'])["tokenized_and_lemmatized"]

        for index, text in top_text.iteritems():
            print(f"Index: {index}, Text: {text}")

    def save_to_csv(self):
        """
        Save 'tokenized_and_lemmatized' and 'Category' columns to a new DataFrame and download as CSV.
        """
        result_df = self.df[['tokenized_and_lemmatized', 'Category']]
        result_df.to_csv("SentimentsTweets.csv", index=False)
        print(f"Data saved")


In [4]:
# Instantiate the SentimentAnalysisReport class
report = SentimentAnalysisReport('CleanedTweets.csv')

In [5]:
# Calculate sentiment scores
report.calculate_sentiment_scores()

In [6]:
# Categorize sentiments
report.categorize_sentiments()

# Display sentiment distribution
report.display_sentiment_distribution()

          Counts  Percentage
Category                    
Negative    4028   38.420450
Neutral     5250   50.076307
Positive    1206   11.503243


In [7]:
# Display 10 top positive texts
report.display_top_positive_texts()


Index: 3359, Text: itay perry idf soldier died fighting hamas gaza following farewell text written wife hilum word describe much love body contain sorrow happiness gave balance soothed always supported believed always optimistic saw positive aspect every situation people like one love heart win ever heartfelt smile glowing eye huge heart gold huge loved u much loved ido gil ori know never missed opportunity show u fact want reassure strong overcome take care child honestly right know want reassure got see three amazing child want thank leaving present buy god little get see grow want reassure full perfect life everything good incredibly brief want reassure took advantage every moment taken u fight war two month ago want reassure hero want hero wanted life back beautiful time october home oriki born got know well short period time knew worst going happen missing two month spoken week longing beginning allow afraid really optimistic maybe honest promised danger allow think may come back 

  for index, text in top_text.iteritems():


In [8]:
report.save_to_csv()

Data saved
