### Applying Sentiment Analysis on Reddit Data

In [103]:
import flair

In [104]:
model = flair.models.TextClassifier.load('en-sentiment')

In [105]:
def get_sentiment(text):
    sentence = flair.data.Sentence(text)
    model.predict(sentence)
    sentiment = sentence.labels[0]
    return sentiment

In [106]:
import pandas as pd

In [107]:
df = pd.read_csv('ner_reddit_investing.csv', sep='|')

In [108]:
df.head()

Unnamed: 0,name,created_utc,subreddit,title,selftext,upvote_ratio,ups,downs,score,organizations
0,t3_157aure,1690107000.0,investing,Honest opinions? Good idea/bad idea,"Hi all,\n\nAbout to lock in £20,000 (yearly ta...",1.0,2.0,0.0,2.0,['ISA']
1,t3_1579rid,1690103000.0,investing,Daily General Discussion and Advice Thread - J...,Have a general question? Want to offer some c...,1.0,1.0,0.0,1.0,"['FAQ', 'wiki']"
2,t3_15791ul,1690100000.0,investing,Thoughts about TSLA as a long term investment.,Would love to hear your thoughts about TSLA as...,0.5,0.0,0.0,0.0,[]
3,t3_1573gnn,1690082000.0,investing,Warren buffet quote y’all might like,"""It is a terrible mistake for investors with l...",0.78,41.0,0.0,41.0,[]
4,t3_15714rh,1690075000.0,investing,How can I make investing into the stock market...,Hello I’ve recently been researching about inv...,0.2,0.0,0.0,0.0,[]


In [109]:
df['sentiment'] = df['selftext'].apply(get_sentiment)

In [110]:
df.head()

Unnamed: 0,name,created_utc,subreddit,title,selftext,upvote_ratio,ups,downs,score,organizations,sentiment
0,t3_157aure,1690107000.0,investing,Honest opinions? Good idea/bad idea,"Hi all,\n\nAbout to lock in £20,000 (yearly ta...",1.0,2.0,0.0,2.0,['ISA'],"Sentence[114]: ""Hi all, About to lock in £20,..."
1,t3_1579rid,1690103000.0,investing,Daily General Discussion and Advice Thread - J...,Have a general question? Want to offer some c...,1.0,1.0,0.0,1.0,"['FAQ', 'wiki']","Sentence[378]: ""Have a general question? Want..."
2,t3_15791ul,1690100000.0,investing,Thoughts about TSLA as a long term investment.,Would love to hear your thoughts about TSLA as...,0.5,0.0,0.0,0.0,[],"Sentence[78]: ""Would love to hear your thought..."
3,t3_1573gnn,1690082000.0,investing,Warren buffet quote y’all might like,"""It is a terrible mistake for investors with l...",0.78,41.0,0.0,41.0,[],"Sentence[56]: """"It is a terrible mistake for i..."
4,t3_15714rh,1690075000.0,investing,How can I make investing into the stock market...,Hello I’ve recently been researching about inv...,0.2,0.0,0.0,0.0,[],"Sentence[110]: ""Hello I’ve recently been resea..."


### Extracting Sentiment Score against each organization in the data

In [111]:
import ast

In [112]:
df['organizations'] = df['organizations'].apply(lambda x: ast.literal_eval(x))

In [113]:
sentiment = {}

for i, row in df.iterrows():
    direction = row['sentiment'].value
    score = row['sentiment'].score
    for org in row['organizations']:
        if org not in sentiment.keys():
            sentiment[org] = {'POSITIVE': [], 'NEGATIVE' : []}
        sentiment[org][direction].append(score)


In [114]:
sentiment['Apple']

{'POSITIVE': [0.5901525020599365],
 'NEGATIVE': [0.9996248483657837,
  0.9965734481811523,
  0.9994951486587524,
  0.9867174625396729,
  0.9999927282333374,
  0.5588285326957703,
  0.9994569420814514,
  0.9920511841773987,
  0.9998519420623779]}

In [115]:
avg_sentiment = []


for org in sentiment.keys():
    pos_freq = len(sentiment[org]['POSITIVE'])
    neg_freq = len (sentiment[org]['NEGATIVE'])
    for direction in ['POSITIVE', 'NEGATIVE']:
        score = sentiment[org][direction]
        if len(score) == 0:
            sentiment[org][direction] = 0.0
        else:
            sentiment[org][direction] = sum(score)
    total = sentiment[org]['POSITIVE'] - sentiment[org]['NEGATIVE']
    avg = total/(pos_freq+neg_freq)
    pos_avg = sentiment[org]['POSITIVE'] / pos_freq if pos_freq != 0 else 0
    neg_avg = sentiment[org]['NEGATIVE'] / neg_freq if neg_freq != 0 else 0


    avg_sentiment.append({
        'entity': org,
        'positive': pos_avg,
        'negative': neg_avg,
        'frequency': pos_freq + neg_freq,
        'score': avg
    })


In [116]:
avg_sentiment[:5]

[{'entity': 'ISA',
  'positive': 0.9783388376235962,
  'negative': 0.9410438438256582,
  'frequency': 7,
  'score': -0.6668463179043361},
 {'entity': 'FAQ',
  'positive': 0,
  'negative': 0.9986215829849243,
  'frequency': 47,
  'score': -0.9986215829849243},
 {'entity': 'wiki',
  'positive': 0,
  'negative': 0.9986215829849243,
  'frequency': 47,
  'score': -0.9986215829849243},
 {'entity': 'Anheuser Busch',
  'positive': 0,
  'negative': 0.9999972581863403,
  'frequency': 1,
  'score': -0.9999972581863403},
 {'entity': 'SCHD',
  'positive': 0.7488003373146057,
  'negative': 0,
  'frequency': 1,
  'score': 0.7488003373146057}]

In [117]:
sentiment_df = pd.DataFrame(avg_sentiment)

In [118]:
sentiment_df.head()

Unnamed: 0,entity,positive,negative,frequency,score
0,ISA,0.978339,0.941044,7,-0.666846
1,FAQ,0.0,0.998622,47,-0.998622
2,wiki,0.0,0.998622,47,-0.998622
3,Anheuser Busch,0.0,0.999997,1,-0.999997
4,SCHD,0.7488,0.0,1,0.7488


In [120]:
sentiment_df = sentiment_df[sentiment_df['frequency'] > 3]
sentiment_df.head(10)

Unnamed: 0,entity,positive,negative,frequency,score
0,ISA,0.978339,0.941044,7,-0.666846
1,FAQ,0.0,0.998622,47,-0.998622
2,wiki,0.0,0.998622,47,-0.998622
5,BND,0.87377,0.965144,6,-0.352173
13,Fidelity,0.817407,0.975444,44,-0.567978
14,EJ,0.920585,0.996959,4,-0.517573
17,HSA,0.571505,0.985512,12,-0.855761
19,NVDA,0.917582,0.853537,4,-0.410757
21,SPY,0.861085,0.99665,17,-0.778093
22,NAV,0.886413,0.999761,4,-0.528217


In [122]:
sentiment_df.sort_values('score', ascending=False).head(10)

Unnamed: 0,entity,positive,negative,frequency,score
163,Amazon,0.997841,0.982153,4,0.007844
250,NVIDIA,0.76488,0.999854,4,-0.117487
29,EU,0.790761,0.917415,5,-0.234145
5,BND,0.87377,0.965144,6,-0.352173
210,ATH,0.952607,0.827895,4,-0.38277
19,NVDA,0.917582,0.853537,4,-0.410757
24,ROTH,0.948825,0.997869,7,-0.44167
130,MSFT,0.907754,0.998903,7,-0.454144
65,Tesla,0.940494,0.923639,9,-0.509387
14,EJ,0.920585,0.996959,4,-0.517573
