In [1]:
import spacy

In [2]:
import flair

In [3]:
nlp=spacy.load('en_core_web_sm')



In [4]:
import pandas as pd

In [5]:
df=pd.read_csv('reddit-investing.csv/reddit-investing.csv',sep='|')

In [6]:
df.head()

Unnamed: 0,created_utc,downs,id,score,selftext,subreddit,title,ups,upvote_ratio
0,1614290000.0,0.0,t3_lshtjn,10.0,Bloomberg article: [https://www.bloomberg.com/...,investing,Fed Views Rising Yields as Bullish Sign Reflec...,10.0,0.86
1,1614286000.0,0.0,t3_lsgahw,56.0,Given the recent downturn in stocks especially...,investing,ARK ETFs implosion risk ------------------------,56.0,0.83
2,1614283000.0,0.0,t3_lsf8td,1.0,[https://twitter.com/desogames/status/13649710...,investing,The Counter-Party Risk Bubble,1.0,0.53
3,1614282000.0,0.0,t3_lsf3nh,6.0,"When you think of futures, what comes to your ...",investing,Futures were made for days like these,6.0,0.62
4,1614278000.0,0.0,t3_lsdcib,3.0,I've been on this sub for quite some time and ...,investing,Let's talk about liquidity premiums,3.0,0.67


In [9]:
def get_org(text):
    org_list=[]
    doc=nlp(text)
    for ent in doc.ents:
        if ent.label_ =='ORG':
            org_list.append(ent.text)
    org_list=list(set(org_list))
    return org_list

In [10]:
df['Organization']=df['selftext'].apply(get_org)

In [11]:
df.head()

Unnamed: 0,created_utc,downs,id,score,selftext,subreddit,title,ups,upvote_ratio,Organization
0,1614290000.0,0.0,t3_lshtjn,10.0,Bloomberg article: [https://www.bloomberg.com/...,investing,Fed Views Rising Yields as Bullish Sign Reflec...,10.0,0.86,"[the Atlanta Fed’s, Bostic, St. Louis Fed, Fed..."
1,1614286000.0,0.0,t3_lsgahw,56.0,Given the recent downturn in stocks especially...,investing,ARK ETFs implosion risk ------------------------,56.0,0.83,"[Bear, ARK]"
2,1614283000.0,0.0,t3_lsf8td,1.0,[https://twitter.com/desogames/status/13649710...,investing,The Counter-Party Risk Bubble,1.0,0.53,"[OWN, Citadel, &gt;write, ITM]"
3,1614282000.0,0.0,t3_lsf3nh,6.0,"When you think of futures, what comes to your ...",investing,Futures were made for days like these,6.0,0.62,[NQ]
4,1614278000.0,0.0,t3_lsdcib,3.0,I've been on this sub for quite some time and ...,investing,Let's talk about liquidity premiums,3.0,0.67,[]


In [12]:
model=flair.models.TextClassifier.load('en-sentiment')

2022-12-08 12:02:59,745 loading file C:\Users\ratna\.flair\models\sentiment-en-mix-distillbert_4.pt


In [13]:
def get_sentiment(text):
    sentence = flair.data.Sentence(text)
    model.predict(sentence)
    sentiment = sentence.labels[0]
    return sentiment

In [14]:
df['Sentiment']=df['selftext'].apply(get_sentiment)

In [15]:
df.head()

Unnamed: 0,created_utc,downs,id,score,selftext,subreddit,title,ups,upvote_ratio,Organization,Sentiment
0,1614290000.0,0.0,t3_lshtjn,10.0,Bloomberg article: [https://www.bloomberg.com/...,investing,Fed Views Rising Yields as Bullish Sign Reflec...,10.0,0.86,"[the Atlanta Fed’s, Bostic, St. Louis Fed, Fed...",NEGATIVE (0.9916)
1,1614286000.0,0.0,t3_lsgahw,56.0,Given the recent downturn in stocks especially...,investing,ARK ETFs implosion risk ------------------------,56.0,0.83,"[Bear, ARK]",NEGATIVE (0.9975)
2,1614283000.0,0.0,t3_lsf8td,1.0,[https://twitter.com/desogames/status/13649710...,investing,The Counter-Party Risk Bubble,1.0,0.53,"[OWN, Citadel, &gt;write, ITM]",NEGATIVE (0.9996)
3,1614282000.0,0.0,t3_lsf3nh,6.0,"When you think of futures, what comes to your ...",investing,Futures were made for days like these,6.0,0.62,[NQ],NEGATIVE (0.9999)
4,1614278000.0,0.0,t3_lsdcib,3.0,I've been on this sub for quite some time and ...,investing,Let's talk about liquidity premiums,3.0,0.67,[],NEGATIVE (0.9893)


In [19]:
sentiment={}

for i, row in df.iterrows():
    direction = row['Sentiment'].value
    score = row['Sentiment'].score
    for org in row['Organization']:
        if org not in sentiment.keys():
            sentiment[org] = {'POSITIVE':[],'NEGATIVE':[]}
        sentiment[org][direction].append(score)

In [20]:
avg_sentiment = []

for org in sentiment.keys():
    pos_freq = len(sentiment[org]['POSITIVE'])
    neg_freq = len(sentiment[org]['NEGATIVE'])
    for direction in ['POSITIVE','NEGATIVE']:
        score = sentiment[org][direction]
        if len(score) == 0:
            sentiment[org][direction] = 0.0
        else:
            sentiment[org][direction] = sum(score)
    total = sentiment[org]['POSITIVE'] - sentiment[org]['NEGATIVE']
    avg = total/(pos_freq+neg_freq)
    pos_avg = sentiment[org]['POSITIVE'] / pos_freq if pos_freq != 0 else 0
    neg_avg = sentiment[org]['NEGATIVE'] / neg_freq if neg_freq != 0 else 0
    avg_sentiment.append({
        "entity":org,
        "positive":pos_avg,
        "negative":neg_avg,
        "frequency": pos_freq + neg_freq,
        "score":avg
    })

In [21]:
avg_sentiment[:2]

[{'entity': 'the Atlanta Fed’s',
  'positive': 0,
  'negative': 0.9916453957557678,
  'frequency': 1,
  'score': -0.9916453957557678},
 {'entity': 'Bostic',
  'positive': 0,
  'negative': 0.9916453957557678,
  'frequency': 1,
  'score': -0.9916453957557678}]

In [22]:
sentiment_df=pd.DataFrame(avg_sentiment)

In [23]:
sentiment_df.head()

Unnamed: 0,entity,positive,negative,frequency,score
0,the Atlanta Fed’s,0.0,0.991645,1,-0.991645
1,Bostic,0.0,0.991645,1,-0.991645
2,St. Louis Fed,0.0,0.991645,1,-0.991645
3,Federal Reserve,0.981378,0.967832,3,-0.318095
4,Powell’s,0.0,0.991645,1,-0.991645


In [24]:
sentiment_df = sentiment_df[sentiment_df['frequency'] > 3]

In [25]:
sentiment_df.head()

Unnamed: 0,entity,positive,negative,frequency,score
6,Fed,0.775549,0.993919,14,-0.614747
7,Treasury,0.798257,0.997683,6,-0.399036
9,ARK,0.925879,0.905395,22,-0.239477
13,ITM,0.0,0.947678,4,-0.947678
15,eBay,0.939906,0.9933,5,-0.220018


In [26]:
sentiment_df.sort_values('score',ascending=False).head(10)

Unnamed: 0,entity,positive,negative,frequency,score
2718,Samsung,0.866619,0.0,4,0.866619
1507,IBM,0.988368,0.88397,4,0.520283
2447,Sony,0.97761,0.985206,7,0.416806
403,Google,0.884704,0.766915,7,0.412813
402,&,0.84915,0.982522,4,0.391232
580,Intel,0.845206,0.97697,7,0.324584
497,Company,0.879456,0.980217,13,0.307249
1134,ESG,0.994327,0.996255,5,0.198094
418,YouTube,0.978773,0.726951,6,0.125911
341,MA,0.855846,0.998887,5,0.113953


In [27]:
sentiment_df.to_csv('NER_Sentiment_using_flair.csv')