In [5]:
!pip install -U plotly
!pip install text2emotion

Requirement already up-to-date: plotly in /usr/local/lib/python3.7/dist-packages (4.14.3)


In [6]:
# Import Data

import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import text2emotion as te
from time import sleep

In [8]:
posts = pd.read_csv("reddit_wsb.csv")

In [9]:
# Get day and hour of each post, and create dummy column for post count (for aggregation)

posts["date"] = pd.to_datetime(posts.timestamp).dt.date
posts["hour"] = pd.to_datetime(posts.timestamp).dt.hour
posts["post_count"] = 1
posts

Unnamed: 0,title,score,id,url,comms_num,created,body,timestamp,date,hour,post_count
0,"It's not about the money, it's about sending a...",55,l6ulcx,https://v.redd.it/6j75regs72e61,6,1.611863e+09,,2021-01-28 21:37:41,2021-01-28,21,1
1,Math Professor Scott Steiner says the numbers ...,110,l6uibd,https://v.redd.it/ah50lyny62e61,23,1.611862e+09,,2021-01-28 21:32:10,2021-01-28,21,1
2,Exit the system,0,l6uhhn,https://www.reddit.com/r/wallstreetbets/commen...,47,1.611862e+09,The CEO of NASDAQ pushed to halt trading “to g...,2021-01-28 21:30:35,2021-01-28,21,1
3,NEW SEC FILING FOR GME! CAN SOMEONE LESS RETAR...,29,l6ugk6,https://sec.report/Document/0001193125-21-019848/,74,1.611862e+09,,2021-01-28 21:28:57,2021-01-28,21,1
4,"Not to distract from GME, just thought our AMC...",71,l6ufgy,https://i.redd.it/4h2sukb662e61.jpg,156,1.611862e+09,,2021-01-28 21:26:56,2021-01-28,21,1
...,...,...,...,...,...,...,...,...,...,...,...
36785,"A M C YOLO Update — Feb 25, 2021",110,lsj29c,https://i.redd.it/dcq98dqccpj61.jpg,57,1.614322e+09,,2021-02-26 08:43:14,2021-02-26,8,1
36786,Hold the line you diamond-handed apes!,17,lsizzx,https://v.redd.it/7vc3xe5tbpj61,5,1.614322e+09,,2021-02-26 08:40:30,2021-02-26,8,1
36787,Did I do it right by not selling when I was up...,670,lsiy4p,https://i.redd.it/f3vg8ttfbpj61.jpg,89,1.614321e+09,,2021-02-26 08:38:05,2021-02-26,8,1
36788,"Rocket Companies ($RKT), who owns Rocket Mortg...",198,lsiwsi,https://www.reddit.com/r/wallstreetbets/commen...,79,1.614321e+09,Rocket Companies (ticker RKT) is Rocket Mortga...,2021-02-26 08:36:22,2021-02-26,8,1


In [10]:
# Aggregate by day, and by hour

by_day = posts.groupby("date").aggregate({
    "score": "mean", 
    "comms_num": "mean", 
    "post_count": "sum"
    })
by_hour = posts.groupby(["date", "hour"]).aggregate({
    "score": "mean", 
    "comms_num": "mean", 
    "post_count": "sum"
    })

In [11]:
# Plot mean comment count, mean reddit post score (a metric of engagement on a post), and the number of total posts by hour

import plotly.express as px
pd.options.plotting.backend = "plotly"
by_hour.reset_index().plot(y = "comms_num")

In [12]:
by_hour.reset_index().plot(y = "score")

In [13]:
by_hour.reset_index().plot(y = "post_count")

In [14]:
# Plot mean comment count, mean reddit post score (a metric of engagement on a post), 
# and the number of total posts by day

by_day.plot(y = "comms_num")

In [15]:
by_day.plot(y = "score")

In [16]:
by_day.plot(y = "post_count")

In [17]:
# Combine post title and body text for each post

posts["alltexts"] = posts.title + " "+posts.body.astype(str)

In [18]:
# Get ratio of each emotion

emotions = []
from tqdm.notebook import tqdm
tqdm().pandas()
for text in tqdm(posts.alltexts):
    emotions.append(te.get_emotion(str(text)))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version



HBox(children=(FloatProgress(value=0.0, max=36790.0), HTML(value='')))




KeyboardInterrupt: ignored

In [19]:
emotion_data = pd.DataFrame.from_dict(emotions)
posts['happy'] = emotion_data.Happy
posts['angry'] = emotion_data.Angry
posts['surprise'] = emotion_data.Surprise
posts['sad'] = emotion_data.Sad
posts['fear'] = emotion_data.Fear

In [20]:
posts

Unnamed: 0,title,score,id,url,comms_num,created,body,timestamp,date,hour,post_count,alltexts,happy,angry,surprise,sad,fear
0,"It's not about the money, it's about sending a...",55,l6ulcx,https://v.redd.it/6j75regs72e61,6,1.611863e+09,,2021-01-28 21:37:41,2021-01-28,21,1,"It's not about the money, it's about sending a...",0.0,0.00,0.00,0.00,1.00
1,Math Professor Scott Steiner says the numbers ...,110,l6uibd,https://v.redd.it/ah50lyny62e61,23,1.611862e+09,,2021-01-28 21:32:10,2021-01-28,21,1,Math Professor Scott Steiner says the numbers ...,0.0,0.25,0.25,0.25,0.25
2,Exit the system,0,l6uhhn,https://www.reddit.com/r/wallstreetbets/commen...,47,1.611862e+09,The CEO of NASDAQ pushed to halt trading “to g...,2021-01-28 21:30:35,2021-01-28,21,1,Exit the system The CEO of NASDAQ pushed to ha...,0.0,0.07,0.00,0.25,0.68
3,NEW SEC FILING FOR GME! CAN SOMEONE LESS RETAR...,29,l6ugk6,https://sec.report/Document/0001193125-21-019848/,74,1.611862e+09,,2021-01-28 21:28:57,2021-01-28,21,1,NEW SEC FILING FOR GME! CAN SOMEONE LESS RETAR...,0.0,0.00,0.00,1.00,0.00
4,"Not to distract from GME, just thought our AMC...",71,l6ufgy,https://i.redd.it/4h2sukb662e61.jpg,156,1.611862e+09,,2021-01-28 21:26:56,2021-01-28,21,1,"Not to distract from GME, just thought our AMC...",0.0,0.00,0.50,0.50,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36785,"A M C YOLO Update — Feb 25, 2021",110,lsj29c,https://i.redd.it/dcq98dqccpj61.jpg,57,1.614322e+09,,2021-02-26 08:43:14,2021-02-26,8,1,"A M C YOLO Update — Feb 25, 2021 nan",,,,,
36786,Hold the line you diamond-handed apes!,17,lsizzx,https://v.redd.it/7vc3xe5tbpj61,5,1.614322e+09,,2021-02-26 08:40:30,2021-02-26,8,1,Hold the line you diamond-handed apes! nan,,,,,
36787,Did I do it right by not selling when I was up...,670,lsiy4p,https://i.redd.it/f3vg8ttfbpj61.jpg,89,1.614321e+09,,2021-02-26 08:38:05,2021-02-26,8,1,Did I do it right by not selling when I was up...,,,,,
36788,"Rocket Companies ($RKT), who owns Rocket Mortg...",198,lsiwsi,https://www.reddit.com/r/wallstreetbets/commen...,79,1.614321e+09,Rocket Companies (ticker RKT) is Rocket Mortga...,2021-02-26 08:36:22,2021-02-26,8,1,"Rocket Companies ($RKT), who owns Rocket Mortg...",,,,,


In [25]:
posts.to_csv('5_emotions.csv')

In [21]:
# Aggregate emotional word data by day and hour

by_day = posts.groupby("date").mean()
by_hour = posts.groupby(["date", "hour"]).mean()

In [23]:
by_hour

Unnamed: 0_level_0,Unnamed: 1_level_0,score,comms_num,created,post_count,happy,angry,surprise,sad,fear
date,hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-09-29,3,4.000000,11.000000,1.601340e+09,1.0,,,,,
2021-01-28,9,140.176471,26.254902,1.611818e+09,1.0,0.089804,0.056471,0.195098,0.169020,0.313137
2021-01-28,10,24384.785714,2935.285714,1.611824e+09,1.0,0.110714,0.103571,0.120000,0.230000,0.360000
2021-01-28,11,6259.631579,632.105263,1.611826e+09,1.0,0.118947,0.063158,0.178421,0.219474,0.367368
2021-01-28,12,12820.333333,674.750000,1.611830e+09,1.0,0.147500,0.040000,0.158333,0.168333,0.485000
...,...,...,...,...,...,...,...,...,...,...
2021-03-01,13,29.000000,36.500000,1.614599e+09,1.0,,,,,
2021-03-01,14,6421.250000,137.750000,1.614602e+09,1.0,,,,,
2021-03-01,15,79.166667,22.500000,1.614605e+09,1.0,,,,,
2021-03-01,16,59.500000,29.000000,1.614609e+09,1.0,,,,,


In [24]:
# Plot ratio of words of each emotion by hour

fig = by_hour.reset_index().plot(y = "happy", labels={
                     "happy": "Ratio of words of each emotion", 
                     "index": "Hours since 9:00am on 1/28/2021"
                 },
                title="Trends in emotions expressed in r/WallStreetBets posts by hour")
fig.add_scatter(y=by_hour['sad'], mode='lines', name = "Sad")
fig.add_scatter(y=by_hour['angry'], mode='lines', name = "Angry")
fig.add_scatter(y=by_hour['surprise'], mode='lines', name = "Surprise")
fig.add_scatter(y=by_hour['fear'], mode='lines', name = "Fear")
fig.show()

In [None]:
# Plot ratio of words of each emotion by day

fig = by_day.reset_index().plot(y = "happy", labels={
                     "happy": "Ratio of words of each emotion", 
                     "index": "Days since 1/28/2021"
                 },
title="Trends in emotions expressed in r/WallStreetBets posts by day")
fig.add_scatter(y=by_day['sad'], mode='lines', name = "Sad")
fig.add_scatter(y=by_day['angry'], mode='lines', name = "Angry")
fig.add_scatter(y=by_day['surprise'], mode='lines', name = "Surprise")
fig.add_scatter(y=by_day['fear'], mode='lines', name = "Fear")
fig.show()