-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentiment-analysis.py
86 lines (74 loc) · 2.88 KB
/
sentiment-analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import praw
import heapq
import squarify
import pandas as pd
from data import *
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Uncomment this for preloaded results
# from contextlib import redirect_stdout
# from out import *
reddit = praw.Reddit(
user_agent="Comment Extraction",
client_id="",
client_secret="",
)
PICKS_NUMBER = 10;
subs = ['superstonk', 'wallstreetbets', 'stocks', 'investing']
flairs = ['Company Analysis','Gain','Discussion', 'Daily Discussion', 'Weekend Discussion', 'DD', 'Fundamentals/DD', "Technical Analysis", "YOLO", "Industry Discussion"]
post_min_upvotes = 20
comment_min_upvote = 3
count = {}
ticker_mentions, ticker_comments = {} , {}
for sub in subs:
subreddit = reddit.subreddit(sub)
hot_sub = subreddit.hot()
for submission in hot_sub:
flair = submission.link_flair_text
id = submission.id
if flair in flairs and submission.ups > post_min_upvotes:
submission.comment_sort = 'new'
comments = submission.comments
submission.comments.replace_more(limit=10)
for comment in comments:
if comment.score > comment_min_upvote:
body = comment.body
words = body.split()
for word in words:
if word in tickers and word not in skip and word.isupper():
if word in ticker_mentions:
ticker_mentions[word] += 1
ticker_comments[word].append(comment.body)
else:
ticker_mentions[word] = 1
ticker_comments[word] = [comment.body]
# with open('out.py', 'w') as f:
# with redirect_stdout(f):
# print(ticker_comments)
# print(ticker_mentions)
top_picks = heapq.nlargest(PICKS_NUMBER, ticker_mentions, key=ticker_mentions.get)
print(top_picks)
scores = {}
count = {ticker:0 for ticker in top_picks}
vader = SentimentIntensityAnalyzer()
vader.lexicon.update(words_to_update)
for stock in top_picks:
for comment in ticker_comments[stock]:
score = vader.polarity_scores(comment)
count[stock] += 1
if stock in scores:
for key in score.keys():
scores[stock][key] += score[key]
else:
scores[stock] = score
for key in score:
scores[stock][key] = scores[stock][key] / count[stock]
scores[stock][key] = "{pol:.3f}".format(pol=scores[stock][key])
df = pd.DataFrame(scores)
df.index = ['Bearish', 'Neutral', 'Bullish', 'Compound']
df = df.T
print(df)
squarify.plot(sizes=[ticker_mentions[i] for i in top_picks], label=[t for t in top_picks], alpha=.7 )
plt.axis('off')
plt.title(f"{PICKS_NUMBER} most mentioned picks")
plt.show()