In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [12]:
# loading the dataset from the previous step
df = pd.read_csv("../data/processed_reddit_data.csv")

In [13]:
df_copy = df.copy()
df_copy.head()

Unnamed: 0,cleaned_text,topic
0,kimmel make meaning person donat kirk famili t...,Other Concern Related
1,compani never address chairman abus virginia g...,Other Concern Related
2,kneel totalitarian save skin stupid simpli mov...,Other Concern Related
3,sinclair lift suspens jimmi kimmel live statio...,Other Concern Related
4,never enough peopl appeas authoritarian polit ...,Political Related


#### starting with the 'Sentiment Analysis' using VADER which is a part of `nltk` library

In [None]:
nltk.download('vader_lexicon')

In [16]:
# initializing the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# function to get the sentiment score
def get_sentiment_score(text):
  if not isinstance(text, str):
    return None
  return analyzer.polarity_scores(text)['compound']

df_copy['sentiment_score'] = df_copy['cleaned_text'].apply(get_sentiment_score)

print(df_copy.head(10))

                                        cleaned_text                  topic  \
0  kimmel make meaning person donat kirk famili t...  Other Concern Related   
1  compani never address chairman abus virginia g...  Other Concern Related   
2  kneel totalitarian save skin stupid simpli mov...  Other Concern Related   
3  sinclair lift suspens jimmi kimmel live statio...  Other Concern Related   
4  never enough peopl appeas authoritarian polit ...      Political Related   
5                        well sinclair farright turd  Other Concern Related   
6  note jimmi didnt say anyth disparag kirk put j...      Political Related   
7  jimmi kimmel walk away sue disney remaind cont...  Other Concern Related   
8       welp never watch abc regardless happen jimmi  Other Concern Related   
9                                   guy america fuck  Other Concern Related   

   sentiment_score  
0           0.0000  
1           0.0000  
2          -0.5106  
3           0.2960  
4           0.0000  
5   

**Analysis:**
- row number 0, 1, 4, 7, 8 have positive score which indicates the positive tone
- row number 2, 6, 9 have negative score indicates negative tone
- rows with a score 0.0 indicates the netural tone

In [17]:
# grouping by 'topic' and calculating the average sentiment score
topic_sentiment = df_copy.groupby('topic')['sentiment_score'].mean().reset_index()
print(topic_sentiment)

                           topic  sentiment_score
0                     AI Related         0.046235
1         AI and Privacy Related         0.048647
2          Other Concern Related        -0.010486
3              Political Related        -0.087601
4                Privacy Related        -0.023920
5  Privacy and Political Related        -0.120059


**Analysis:**
- From the sentiment score, the `AI Related` and `AI and Privacy Related` topics have positive average sentiment scores `0.04`. This suggests community towards AI is slightly positive
- For `Privacy and Political Related` topics have the most negative scores `-0.12`. This shows the discussions about politics and privacy related in the `r/technology` subreddit are often critical or contentious
- But when its only about `Privacy Related` the tone is slightly negative to neutral.