In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('sentiment_dataset/reddit_data.csv')

In [10]:
df.describe()

Unnamed: 0,upvote_ratio
count,2198.0
mean,0.836133
std,0.202531
min,0.06
25%,0.75
50%,0.91
75%,1.0
max,1.0


In [17]:
df.groupby('neighborhood').count().std()

upvote_ratio    32.934011
text            32.934011
dtype: float64

In [5]:
df.describe()

Unnamed: 0,upvote_ratio
count,2198.0
mean,0.836133
std,0.202531
min,0.06
25%,0.75
50%,0.91
75%,1.0
max,1.0


# VaderSentiment

In [86]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Initialize the sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [87]:
# Using texts for sentiment analysis
neighborhood_score = {}
for index, row in df.iterrows():
    sentiment = analyzer.polarity_scores(row['text'])
    if row['neighborhood'] in neighborhood_score.keys():
        neighborhood_score[row['neighborhood']] += sentiment['compound']
    else:
        neighborhood_score[row['neighborhood']] = 0
        neighborhood_score[row['neighborhood']] += sentiment['compound']

In [88]:
# take the avergae
for area in neighborhood_score:
    count = df[df['neighborhood'] == area].shape[0]
    neighborhood_score[area] /= count

In [90]:
# make dataframe
df_vader = pd.DataFrame(list(neighborhood_score.items()), columns=['Neighborhood', 'OverallScore'])
df_vader = df_vader.sort_values(by='OverallScore',ascending = False).reset_index(drop=True)

In [91]:
# save
vader_path = 'sentiment_result/vader_sentiment.csv'
df_vader.to_csv(vader_path, index=False)

# TextBlob

In [73]:
from textblob import TextBlob

In [74]:
# Using texts for sentiment analysis
polarity_score = {}
subjectivity_score = {}
for index, row in df.iterrows():
    blob = TextBlob(row['text'])
    sentiment = blob.sentiment
    if row['neighborhood'] in polarity_score.keys():
        polarity_score[row['neighborhood']] += sentiment.polarity
        subjectivity_score[row['neighborhood']] += sentiment.subjectivity
    else:
        polarity_score[row['neighborhood']] = 0
        polarity_score[row['neighborhood']] += sentiment.polarity
        subjectivity_score[row['neighborhood']] = 0
        subjectivity_score[row['neighborhood']] += sentiment.subjectivity

In [75]:
# take the average
for area in polarity_score:
    count = df[df['neighborhood'] == area].shape[0]
    polarity_score[area] /= count
    subjectivity_score[area] /= count

In [76]:
area_list = []
for area in polarity_score:
    area_list.append(tuple([area,polarity_score[area],subjectivity_score[area]]))

In [77]:
# make dataframe
df_textbolb = pd.DataFrame(area_list, columns=['Neighborhood', 'PolarityScore','SubjectivityScore'])
df_textbolb = df_textbolb.sort_values(by='PolarityScore',ascending = False).reset_index(drop=True)

In [78]:
# save
textbolb_path = 'sentiment_result/textbolb_sentiment.csv'
df_textbolb.to_csv(textbolb_path, index=False)

# AFINN

In [79]:
from afinn import Afinn
# Initialize the sentiment analyzer
afinn = Afinn()

In [80]:
# Using texts for sentiment analysis
neighborhood_score = {}
for index, row in df.iterrows():
    score = afinn.score(row['text'])
    if row['neighborhood'] in neighborhood_score.keys():
        neighborhood_score[row['neighborhood']] += score
    else:
        neighborhood_score[row['neighborhood']] = 0
        neighborhood_score[row['neighborhood']] += score

In [81]:
# take the avergae
for area in neighborhood_score:
    count = df[df['neighborhood'] == area].shape[0]
    neighborhood_score[area] /= count

In [82]:
# make dataframe
df_afinn = pd.DataFrame(list(neighborhood_score.items()), columns=['Neighborhood', 'SentimentScore'])
df_afinn = df_afinn.sort_values(by='SentimentScore',ascending = False).reset_index(drop=True)

In [83]:
# save
afinn_path = 'sentiment_result/afinn_sentiment.csv'
df_afinn.to_csv(afinn_path, index=False)

# Some Idea
1. Nerual Network
- This seems feasible, but it would be an classification problem. We can weight the class get from the classfication and score from the previous analysis.
2. Transformers
- This is the most powerful one, I will try to inplement it if I have time.