In [1]:
import beneath
import pandas as pd
import plotly.express as px
from datetime import datetime

In [2]:
BLACKLIST = {
    "FOMO",
    "DD",
    "EOD",
    "TA",
    "PT",
    "RSI",
    "HUGE",
    "ATH",
    "USA",
    "AI",
    "IMO",
    "AM",
    "UK",
    "BIG",
    "SO",
    "OR",
    "FOR",
    "ALL",
    "IT",
    "BE",
    "ARE",
    "NOW",
    "ON",
    "ME",
    "CAN",
    "VERY",
    "SI",
    "TV",
    "BY",
    "NEW",
    "OUT",
    "LOVE",
    "GO",
    "PM",
    "NEXT",
    "ANY",
    "ET",
    "HAS",
    "ONE",
    "PLAY",
    "LOW",
    "III",
    "CASH",
    "RNG",
    "GOOD",
    "REAL",
    "SEE",
    "RE"
}

In [71]:
MODERATORS = {
    'OPINION_IS_UNPOPULAR',
    'CHAINSAW_VASECTOMY',
    'WallStreetBot',
    'bawse1',
    'zjz',
    'VisualMod',
    'premier_',
    'notmikjaash',
    'WaterCups69',
    'XvGTM17',
    'AutoModerator'
}

In [18]:
pd.set_option('max_colwidth', 100)

# June report

## Posts

In [33]:
posts = await beneath.query_warehouse("""
with
    posts as (
        select 
            created_on, 
            id, 
            author, 
            title, 
            text, 
            flair, 
            permalink,
            length(text) as post_length,
            array_length(regexp_extract_all(title, r"\\x{1F680}")) + array_length(regexp_extract_all(text, r"\\x{1F680}")) as num_rockets,
            array_length(regexp_extract_all(title, r"\\x{1F48E}")) + array_length(regexp_extract_all(text, r"\\x{1F48E}")) as num_diamonds,
        from `examples/reddit/r-wallstreetbets-posts`
        where timestamp_trunc(created_on, month) = "2021-06-01"
    ),
    comments as (
        select 
            created_on, 
            id, 
            post_id, 
            author, 
            text,
            length(text) as comment_length,
            array_length(regexp_extract_all(c.text, r"\\x{1F680}")) as num_rockets,
            array_length(regexp_extract_all(c.text, r"\\x{1F48E}")) as num_diamonds,
        from `examples/reddit/r-wallstreetbets-comments` c
        where timestamp_trunc(created_on, month) = "2021-06-01"
    )
select 
    p.created_on, 
    p.title, 
    p.text, 
    p.author, 
    p.flair, 
    p.permalink,
    p.post_length,
    count(c.id) as num_comments,
    sum(c.comment_length) as sum_comments_length, 
    sum(c.comment_length)/count(c.id) as avg_comment_length,
    p.num_rockets + sum(c.num_rockets) as num_rockets,
    p.num_diamonds + sum(c.num_diamonds) as num_diamonds,
    count(distinct c.author) as nunique_commenters,
from posts p
join comments c on p.id = c.post_id
group by p.created_on, p.title, p.text, p.author, p.flair, p.permalink, p.post_length, p.num_rockets, p.num_diamonds
""")

Post with most comments

In [34]:
posts.sort_values('num_comments', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
371,2021-06-11 20:00:17+00:00,"Weekend Discussion Thread for the Weekend of June 11, 2021",Your weekend discussion thread. Please keep the shitposting to a maximum!\n\nFollow [@Official_W...,OPINION_IS_UNPOPULAR,Weekend Discussion,/r/wallstreetbets/comments/nxosm3/weekend_discussion_thread_for_the_weekend_of_june/,181,63166,4528066,71.685179,3028,440,4668,2021-07-13 14:27:25.308000+00:00
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
210,2021-06-02 10:00:15+00:00,"Daily Discussion Thread for June 02, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nqi9f6/daily_discussion_thread_for_june_02_2021/,2359,40962,2273866,55.511596,5528,437,8586,2021-07-13 14:27:25.307000+00:00


Longest discussion

In [35]:
posts.sort_values('sum_comments_length', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
371,2021-06-11 20:00:17+00:00,"Weekend Discussion Thread for the Weekend of June 11, 2021",Your weekend discussion thread. Please keep the shitposting to a maximum!\n\nFollow [@Official_W...,OPINION_IS_UNPOPULAR,Weekend Discussion,/r/wallstreetbets/comments/nxosm3/weekend_discussion_thread_for_the_weekend_of_june/,181,63166,4528066,71.685179,3028,440,4668,2021-07-13 14:27:25.308000+00:00
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
2674,2021-06-04 20:00:15+00:00,"Weekend Discussion Thread for the Weekend of June 04, 2021",Your weekend discussion thread. Please keep the shitposting to a maximum!\n\nFollow [@Official_W...,OPINION_IS_UNPOPULAR,Weekend Discussion,/r/wallstreetbets/comments/nse1dg/weekend_discussion_thread_for_the_weekend_of_june/,181,37246,2797369,75.105219,1263,239,4761,2021-07-13 14:27:28.740000+00:00


Post with the highest avg comment length (at least 5 comments)

In [26]:
posts[posts['num_comments'] >= 5].sort_values('avg_comment_length', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,nunique_commenters,@meta.timestamp
24581,2021-06-17 18:25:55+00:00,$DKNG YOLOO. Still holding. Still bullish. 💎🙌 -> 🚀🚀,,arjunav,YOLO,/r/wallstreetbets/comments/o241a9/dkng_yoloo_still_holding_still_bullish/,0,5,6974,1394.8,2,5,2021-07-13 14:10:38.334000+00:00
28707,2021-06-08 21:00:43+00:00,Sorrento Therapeutics Bull Case = EZ Squeeze.,"""Wall Street analysts also predicted that in 2021, the company’s y-o-y revenues would reach $1.1...",Siphen_,Discussion,/r/wallstreetbets/comments/nvecr9/sorrento_therapeutics_bull_case_ez_squeeze/,5008,13,9433,725.615385,0,8,2021-07-13 14:10:42.956000+00:00
26653,2021-06-21 20:07:22+00:00,Be objective and look data and take your seat,Times like these illustrate why having a cool head in a crisis is so important. The ability to b...,Kimaxw,DD,/r/wallstreetbets/comments/o546n6/be_objective_and_look_data_and_take_your_seat/,3226,8,5414,676.75,3,6,2021-07-13 14:10:40.689000+00:00


Post with the most rockets (including comments)

In [39]:
posts.sort_values('num_rockets', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
210,2021-06-02 10:00:15+00:00,"Daily Discussion Thread for June 02, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nqi9f6/daily_discussion_thread_for_june_02_2021/,2359,40962,2273866,55.511596,5528,437,8586,2021-07-13 14:27:25.307000+00:00
0,2021-06-08 11:07:59+00:00,"Daily Popular Ticker Thread for June 08, 2021 - BB | AMC | CLOV",Apologies for the delay. The global Fastly outage knocked Reddit offline.,OPINION_IS_UNPOPULAR,,/r/wallstreetbets/comments/nv1ig9/daily_popular_ticker_thread_for_june_08_2021_bb/,73,26140,1572924,60.173068,5487,481,6103,2021-07-13 14:27:25.305000+00:00


Post with the most diamonds (including comments)

In [38]:
posts.sort_values('num_diamonds', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
3334,2021-06-11 10:00:19+00:00,"Daily Popular Tickers Thread for June 11, 2021 - AMC | BB | CLOV",\nYour daily hype thread. Please keep the shitposting to a maximum.\n\n^Navigate ^WSB |^We ^reco...,VisualMod,,/r/wallstreetbets/comments/nxcb5h/daily_popular_tickers_thread_for_june_11_2021_amc/,1782,9492,670418,70.629794,3163,1167,2523,2021-07-13 14:27:30.164000+00:00
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
950,2021-06-02 20:00:20+00:00,"What Are Your Moves Tomorrow, June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nquubf/what_are_your_moves_tomorrow_june_03_2021/,2357,35642,2113514,59.298412,4250,706,8936,2021-07-13 14:27:25.313000+00:00


## Authors

In [54]:
authors = await beneath.query_warehouse("""
with
    posts_enhanced as (
        select *,
            array_length(split(title, " ")) as num_words_title,
            array_length(split(text, " ")) as num_words_body,
            array_length(regexp_extract_all(title, r"\\x{1F680}")) as num_rockets_title,
            array_length(regexp_extract_all(text, r"\\x{1F680}")) as num_rockets_body,
            array_length(regexp_extract_all(title, r"\\x{1F48E}")) as num_diamonds_title,
            array_length(regexp_extract_all(text, r"\\x{1F48E}")) as num_diamonds_body
        from `examples/reddit/r-wallstreetbets-posts`
        where timestamp_trunc(created_on, month) = "2021-06-01"
    ),
    comments_enhanced as (
        select *,
            array_length(split(text, " ")) as num_words,
            array_length(regexp_extract_all(text, r"\\x{1F680}")) as num_rockets,
            array_length(regexp_extract_all(text, r"\\x{1F48E}")) as num_diamonds
        from `examples/reddit/r-wallstreetbets-comments`
        where timestamp_trunc(created_on, month) = "2021-06-01"
    ),
    author_posts_stats as (
        select 
            author, 
            count(*) as num_posts,
            sum(num_words_title) + sum(num_words_body) as num_words,
            sum(num_rockets_title) + sum(num_rockets_body) as num_rockets,
            sum(num_diamonds_title) + sum(num_diamonds_body) as num_diamonds,
        from posts_enhanced
        group by author
    ),
    author_comments_stats as (
        select
            author,
            count(*) as num_comments,
            sum(num_words) as num_words,
            sum(num_rockets) as num_rockets,
            sum(num_diamonds) as num_diamonds
        from comments_enhanced
        group by author
    )
select 
    coalesce(p.author, c.author) as author,
    ifnull(p.num_posts, 0) as num_posts,
    ifnull(c.num_comments, 0) as num_comments,
    ifnull(p.num_words, 0) + ifnull(c.num_words, 0) as num_words,
    ifnull(p.num_rockets, 0) + ifnull(c.num_rockets, 0) as num_rockets,
    ifnull(p.num_diamonds, 0) + ifnull(c.num_diamonds, 0) as num_diamonds,
from author_posts_stats p
full join author_comments_stats c on p.author = c.author
""")

In [72]:
authors_no_mods = authors.loc[[author not in MODERATORS for author in authors['author']]]

Author with the most posts

In [73]:
authors_no_mods.sort_values('num_posts', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
15518,BackgroundProgram389,45,323,2280,0,0,2021-07-14 10:15:56.625000+00:00
7314,DerekZ1985,37,393,6352,0,0,2021-07-14 10:15:53.283000+00:00
3098,Z3r0Confidence,37,276,12328,49,0,2021-07-14 10:15:50.287000+00:00


Author with the most comments

In [74]:
authors_no_mods.sort_values('num_comments', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
1607,MoonArmy1977,0,3458,43605,25,32,2021-07-14 10:15:48.965000+00:00
5051,Dinosaur_Eats_Pizza,1,3034,34533,82,0,2021-07-14 10:15:52.077000+00:00
13272,toydan,1,2811,45763,0,0,2021-07-14 10:15:55.755000+00:00


Author who wrote the most words

In [75]:
authors_no_mods.sort_values('num_words', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
13272,toydan,1,2811,45763,0,0,2021-07-14 10:15:55.755000+00:00
1607,MoonArmy1977,0,3458,43605,25,32,2021-07-14 10:15:48.965000+00:00
5051,Dinosaur_Eats_Pizza,1,3034,34533,82,0,2021-07-14 10:15:52.077000+00:00


Author who posted the most rockets

In [76]:
authors_no_mods.sort_values('num_rockets', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
6907,mateace,0,27,295,2717,0,2021-07-14 10:15:52.885000+00:00
11097,unipaulie,2,377,3032,1760,23,2021-07-14 10:15:54.937000+00:00
4766,LordTender,0,372,2931,1039,0,2021-07-14 10:15:51.527000+00:00


Author who posted the most diamonds

In [77]:
authors_no_mods.sort_values('num_diamonds', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
15373,Sc0ttyMinz,1,335,2026,590,710,2021-07-14 10:15:56.624000+00:00
8323,Tazzer57,0,875,8127,926,385,2021-07-14 10:15:53.750000+00:00
107348,FriendlyhoodKomrad,3,2,246,0,330,2021-07-14 10:16:33.776000+00:00


Author that spurred the most discussion

In [90]:
posts.loc[[author not in MODERATORS for author in posts['author']]] \
    .groupby('author')['num_comments'] \
    .sum() \
    .reset_index() \
    .sort_values('num_comments', ascending=False) \
    .rename(columns={'num_comments': 'num_comments_on_posts'}) \
    .head(3)

Unnamed: 0,author,num_comments_on_posts
17874,pittluke,8297
19883,yolocallking,5311
13198,Your_Boy_Roy_,4566


## Stock mentions

In [109]:
mentions = await beneath.query_warehouse("""
with
    stock_mentions_posts as (
        select 
            symbol, 
            timestamp_trunc(timestamp, day) as day, 
            count(*) as num_mentions
        from `examples/wallstreetbets-analytics/r-wallstreetbets-posts-stock-mentions`
        where timestamp_trunc(timestamp, month) = "2021-06-01"
        group by symbol, timestamp_trunc(timestamp, day)
    ),
    stock_mentions_comments as (
        select 
            symbol, 
            timestamp_trunc(timestamp, day) as day, 
            count(*) as num_mentions
        from `examples/wallstreetbets-analytics/r-wallstreetbets-comments-stock-mentions`
        where timestamp_trunc(timestamp, month) = "2021-06-01"
        group by symbol, timestamp_trunc(timestamp, day)
    )
select 
    coalesce(p.symbol, c.symbol) as symbol,
    coalesce(p.day, c.day) as day,
    ifnull(p.num_mentions, 0) + ifnull(c.num_mentions,0) as num_mentions
from stock_mentions_posts p
full join stock_mentions_comments c on p.symbol = c.symbol and p.day = c.day
order by symbol, day
""")

In [110]:
mentions = mentions[~mentions['symbol'].isin(BLACKLIST)]

In [111]:
top_symbols_df = mentions \
    .groupby('symbol')['num_mentions'] \
    .sum() \
    .reset_index() \
    .sort_values('num_mentions', ascending=False)
top_symbols = top_symbols_df['symbol'][:100]

In [112]:
fig = px.bar(top_symbols_df[0:10], x="num_mentions", y="symbol", text="num_mentions", color='symbol', orientation='h', title='Most mentioned symbols in June', labels={'num_mentions': 'Mentions', 'symbol': 'Symbol'})
fig.update_layout(showlegend=False)
fig.show()

In [113]:
fig = px.line(mentions[mentions['symbol'].isin(top_symbols[:10])], 
              x="day", y="num_mentions", line_group="symbol", color="symbol",
              title="Mentions by day", labels={'num_mentions': 'mentions'})
fig.show()

In [126]:
# TODO: clean up (naming, rename on merge)
total_mentions = mentions.groupby('day')['num_mentions'].sum().reset_index()
tmp = mentions \
    .merge(total_mentions, on="day") \
    .rename(columns={'num_mentions_x': 'num_stock_mentions', 'num_mentions_y': 'num_total_mentions'})
tmp['mention_perc'] = tmp['num_stock_mentions'] / tmp['num_total_mentions']
tmp[tmp['symbol'].isin(top_symbols[0:10])].head(100)

Unnamed: 0,symbol,day,num_stock_mentions,@meta.timestamp,num_total_mentions,mention_perc
30,AMC,2021-06-01 00:00:00+00:00,3986,2021-07-14 12:49:21.290000+00:00,14007,0.284572
71,BB,2021-06-01 00:00:00+00:00,3421,2021-07-14 12:49:21.661000+00:00,14007,0.244235
136,CLF,2021-06-01 00:00:00+00:00,3,2021-07-14 12:49:22.696000+00:00,14007,0.000214
137,CLNE,2021-06-01 00:00:00+00:00,8,2021-07-14 12:49:22.696000+00:00,14007,0.000571
138,CLOV,2021-06-01 00:00:00+00:00,59,2021-07-14 12:49:22.696000+00:00,14007,0.004212
...,...,...,...,...,...,...
8438,TLRY,2021-06-07 00:00:00+00:00,616,2021-07-14 12:49:29.413000+00:00,25576,0.024085
8487,UWMC,2021-06-07 00:00:00+00:00,122,2021-07-14 12:49:29.769000+00:00,25576,0.004770
8522,WISH,2021-06-07 00:00:00+00:00,45,2021-07-14 12:49:30.168000+00:00,25576,0.001759
8524,WKHS,2021-06-07 00:00:00+00:00,594,2021-07-14 12:49:30.168000+00:00,25576,0.023225


In [128]:
fig = px.line(tmp[tmp['symbol'].isin(top_symbols[0:10])].sort_values('day', ascending=False), 
              x="day", y="mention_perc", line_group="symbol", color="symbol",
              title="Percent mentions by day")
fig.show()

In [None]:
# TODO: 
# - find the weekly peak fraction of every stock
# - re-align/normalize the curves so you can see how stocks trended +/- n weeks from their peak
# - programmatically segment the stocks into rising stars / fading / holding steady

# Extra

In [None]:
mentions_weekly = await beneath.query_warehouse("""
    SELECT symbol, timestamp_trunc(timestamp, week) as week, count(*) as num_mentions
    FROM `examples/wallstreetbets-analytics/r-wallstreetbets-posts-stock-mentions`
    GROUP BY symbol, week
    ORDER BY week, num_mentions desc
""")

In [None]:
top_symbols = mentions_weekly.loc[mentions_weekly['num_mentions'] > 100]['symbol'].unique()
top_mentions_weekly = mentions_weekly.loc[mentions_weekly['symbol'].isin(top_symbols)]
fig = px.line(top_mentions_weekly, x="week", y="num_mentions", line_group="symbol", color="symbol", title="Weekly mentions of most popular companies")
fig.show()

### Mentions & Flair

In [279]:
symbol_flair = await beneath.query_warehouse("""
    SELECT symbol, timestamp_trunc(created_on, day) as day, flair, count(*) as num_posts
    FROM `examples/wallstreetbets-analytics/r-wallstreetbets-posts-stock-mentions` m
    JOIN `examples/reddit/r-wallstreetbets-posts` p ON m.post_id = p.id
    WHERE timestamp_trunc(created_on, month) = "2021-06-01"
    GROUP BY symbol, day, flair
""")

In [295]:
flair_dist = symbol_flair.groupby('flair').sum('num_posts').reset_index().sort_values('num_posts', ascending=False)
fig = px.bar(flair_dist, x="num_posts", y="flair", color='flair', orientation="h", title='Flair distribution')
fig.show()

In [340]:
flair_daily = symbol_flair.groupby(['day', 'flair']).sum('num_posts').reset_index()
fig = px.line(flair_daily, x="day", y="num_posts", color='flair', line_group='flair', title='Flair by day')
fig.show()

In [319]:
# filter for the month's top symbols
symbol_flair = symbol_flair[symbol_flair['symbol'].isin(top_symbols)]

In [306]:
wide_flair = symbol_flair.groupby(['symbol', 'flair']) \
    .sum() \
    .reset_index() \
    .pivot_table(index=['symbol'],columns='flair', values='num_posts') \
    .fillna(0)
wide_flair['total_posts'] = wide_flair.sum(axis=1)
wide_flair['DD_percent'] = wide_flair['DD']/wide_flair['total_posts']
wide_flair['Meme_percent'] = wide_flair['Meme']/wide_flair['total_posts']
wide_flair['Gain_percent'] = wide_flair['Gain']/wide_flair['total_posts']
wide_flair['Loss_percent'] = wide_flair['Loss']/wide_flair['total_posts']
wide_flair

flair,DD,Discussion,Gain,Loss,Meme,News,Shitpost,Technical Analysis,YOLO,total_posts,DD_percent,Meme_percent,Gain_percent,Loss_percent
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AAPL,21.0,17.0,1.0,0.0,3.0,0.0,0.0,3.0,5.0,50.0,0.420000,0.060000,0.020000,0.000000
AHT,4.0,11.0,18.0,0.0,3.0,3.0,0.0,2.0,12.0,53.0,0.075472,0.056604,0.339623,0.000000
AMC,521.0,2302.0,472.0,112.0,472.0,222.0,3.0,234.0,875.0,5213.0,0.099942,0.090543,0.090543,0.021485
AMD,27.0,44.0,5.0,3.0,0.0,10.0,0.0,2.0,18.0,109.0,0.247706,0.000000,0.045872,0.027523
AMZN,34.0,31.0,10.0,0.0,2.0,2.0,0.0,0.0,4.0,83.0,0.409639,0.024096,0.120482,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WOOF,26.0,40.0,14.0,1.0,13.0,1.0,0.0,4.0,36.0,135.0,0.192593,0.096296,0.103704,0.007407
WPG,5.0,9.0,5.0,0.0,5.0,1.0,0.0,3.0,11.0,39.0,0.128205,0.128205,0.128205,0.000000
WWE,26.0,18.0,3.0,0.0,2.0,3.0,0.0,4.0,7.0,63.0,0.412698,0.031746,0.047619,0.000000
XL,21.0,16.0,5.0,0.0,1.0,3.0,0.0,1.0,13.0,60.0,0.350000,0.016667,0.083333,0.000000


In [307]:
dd_symbols = wide_flair[wide_flair.index.isin(top_symbols)] \
    .sort_values(by=['DD_percent'], ascending=False) \
    .reset_index()
dd_symbols.head(10)

flair,symbol,DD,Discussion,Gain,Loss,Meme,News,Shitpost,Technical Analysis,YOLO,total_posts,DD_percent,Meme_percent,Gain_percent,Loss_percent
0,UPS,40.0,19.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,60.0,0.666667,0.0,0.0,0.0
1,BP,34.0,18.0,2.0,0.0,2.0,0.0,0.0,0.0,2.0,58.0,0.586207,0.034483,0.034483,0.0
2,MA,27.0,11.0,2.0,0.0,1.0,0.0,0.0,5.0,1.0,47.0,0.574468,0.021277,0.042553,0.0
3,OI,34.0,24.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,63.0,0.539683,0.0,0.0,0.0
4,GM,55.0,26.0,5.0,0.0,3.0,8.0,0.0,2.0,5.0,104.0,0.528846,0.028846,0.048077,0.0
5,TWNK,34.0,8.0,3.0,1.0,14.0,0.0,0.0,1.0,4.0,65.0,0.523077,0.215385,0.046154,0.015385
6,IP,22.0,15.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,43.0,0.511628,0.0,0.046512,0.023256
7,JP,25.0,22.0,1.0,0.0,0.0,0.0,0.0,5.0,0.0,53.0,0.471698,0.0,0.018868,0.0
8,LNG,21.0,13.0,4.0,0.0,0.0,3.0,0.0,5.0,3.0,49.0,0.428571,0.0,0.081633,0.0
9,SP,21.0,18.0,1.0,0.0,0.0,0.0,0.0,3.0,7.0,50.0,0.42,0.0,0.02,0.0


In [309]:
fig = px.bar(dd_symbols[0:20], x="DD_percent", y="symbol", color='symbol', orientation='h', text='DD', title='Top DD Stocks')
fig.show()

In [320]:
meme_symbols = wide_flair[wide_flair.index.isin(top_symbols)] \
    .sort_values(by=['Meme_percent'], ascending=False) \
    .reset_index()
meme_symbols.head(10)

flair,symbol,DD,Discussion,Gain,Loss,Meme,News,Shitpost,Technical Analysis,YOLO,total_posts,DD_percent,Meme_percent,Gain_percent,Loss_percent
0,BLNK,7.0,5.0,1.0,1.0,9.0,2.0,0.0,1.0,10.0,36.0,0.194444,0.25,0.027778,0.027778
1,TWNK,34.0,8.0,3.0,1.0,14.0,0.0,0.0,1.0,4.0,65.0,0.523077,0.215385,0.046154,0.015385
2,BABA,18.0,7.0,2.0,0.0,7.0,1.0,0.0,5.0,13.0,53.0,0.339623,0.132075,0.037736,0.0
3,WPG,5.0,9.0,5.0,0.0,5.0,1.0,0.0,3.0,11.0,39.0,0.128205,0.128205,0.128205,0.0
4,VIAC,11.0,18.0,9.0,0.0,9.0,2.0,0.0,9.0,15.0,73.0,0.150685,0.123288,0.123288,0.0
5,NOK,17.0,133.0,14.0,8.0,32.0,11.0,0.0,11.0,62.0,288.0,0.059028,0.111111,0.048611,0.027778
6,NKLA,13.0,23.0,4.0,0.0,6.0,4.0,0.0,3.0,5.0,58.0,0.224138,0.103448,0.068966,0.0
7,SNDL,33.0,185.0,40.0,5.0,47.0,11.0,0.0,20.0,123.0,464.0,0.071121,0.101293,0.086207,0.010776
8,SDC,5.0,18.0,6.0,3.0,6.0,2.0,0.0,2.0,19.0,61.0,0.081967,0.098361,0.098361,0.04918
9,WOOF,26.0,40.0,14.0,1.0,13.0,1.0,0.0,4.0,36.0,135.0,0.192593,0.096296,0.103704,0.007407


In [321]:
fig = px.bar(meme_symbols[0:10], x="Meme_percent", y="symbol", color='symbol', orientation='h', title='Top Meme Stocks')
fig.show()

In [332]:
# get BLNK memes
blnk_memes = await beneath.query_warehouse("""
    SELECT symbol, timestamp_trunc(created_on, day) as day, flair, id, permalink
    FROM `examples/wallstreetbets-analytics/r-wallstreetbets-posts-stock-mentions` m
    JOIN `examples/reddit/r-wallstreetbets-posts` p ON m.post_id = p.id
    WHERE timestamp_trunc(created_on, month) = "2021-06-01" AND
    symbol="BLNK" AND 
    flair="Meme"
""")

In [335]:
blnk_memes
# most (all?) of these were deleted by mods

Unnamed: 0,symbol,day,flair,id,permalink,@meta.timestamp
0,BLNK,2021-06-03 00:00:00+00:00,Meme,nrcewp,/r/wallstreetbets/comments/nrcewp/shorting_bln...,2021-06-29 17:33:09.526000+00:00
1,BLNK,2021-06-03 00:00:00+00:00,Meme,nremfo,/r/wallstreetbets/comments/nremfo/time_to_shor...,2021-06-29 17:33:09.526000+00:00
2,BLNK,2021-06-03 00:00:00+00:00,Meme,nrftwz,/r/wallstreetbets/comments/nrftwz/anyone_ready...,2021-06-29 17:33:09.526000+00:00
3,BLNK,2021-06-06 00:00:00+00:00,Meme,ntnxie,/r/wallstreetbets/comments/ntnxie/amcx_still_o...,2021-06-29 17:33:09.526000+00:00
4,BLNK,2021-06-07 00:00:00+00:00,Meme,nufcgg,/r/wallstreetbets/comments/nufcgg/stop_buying_...,2021-06-29 17:33:09.526000+00:00
5,BLNK,2021-06-08 00:00:00+00:00,Meme,nvbmhi,/r/wallstreetbets/comments/nvbmhi/blnk_is_at_3...,2021-06-29 17:33:09.526000+00:00
6,BLNK,2021-06-08 00:00:00+00:00,Meme,nvbp01,/r/wallstreetbets/comments/nvbp01/blnk_is_at_3...,2021-06-29 17:33:09.526000+00:00
7,BLNK,2021-06-02 00:00:00+00:00,Meme,nqtj2o,/r/wallstreetbets/comments/nqtj2o/the_meme_etf...,2021-06-29 17:33:09.526000+00:00
8,BLNK,2021-06-02 00:00:00+00:00,Meme,nqwrwz,/r/wallstreetbets/comments/nqwrwz/puts_on_blnk...,2021-06-29 17:33:09.526000+00:00


In [322]:
gain_symbols = wide_flair[wide_flair.index.isin(top_symbols)] \
    .sort_values(by=['Gain_percent'], ascending=False) \
    .reset_index()
gain_symbols.head(10)

flair,symbol,DD,Discussion,Gain,Loss,Meme,News,Shitpost,Technical Analysis,YOLO,total_posts,DD_percent,Meme_percent,Gain_percent,Loss_percent
0,AHT,4.0,11.0,18.0,0.0,3.0,3.0,0.0,2.0,12.0,53.0,0.075472,0.056604,0.339623,0.0
1,GTT,3.0,13.0,9.0,0.0,1.0,3.0,0.0,1.0,9.0,39.0,0.076923,0.025641,0.230769,0.0
2,TRCH,9.0,31.0,19.0,3.0,1.0,6.0,0.0,5.0,16.0,90.0,0.1,0.011111,0.211111,0.033333
3,RMED,10.0,14.0,10.0,0.0,3.0,2.0,0.0,2.0,7.0,48.0,0.208333,0.0625,0.208333,0.0
4,SPCE,28.0,103.0,50.0,4.0,14.0,11.0,0.0,11.0,38.0,259.0,0.108108,0.054054,0.19305,0.015444
5,GEO,8.0,26.0,16.0,0.0,5.0,5.0,0.0,6.0,21.0,87.0,0.091954,0.057471,0.183908,0.0
6,SENS,37.0,44.0,27.0,1.0,5.0,7.0,0.0,9.0,32.0,162.0,0.228395,0.030864,0.166667,0.006173
7,IVR,5.0,26.0,11.0,1.0,1.0,1.0,0.0,1.0,21.0,67.0,0.074627,0.014925,0.164179,0.014925
8,NVDA,18.0,18.0,9.0,1.0,0.0,2.0,0.0,2.0,5.0,55.0,0.327273,0.0,0.163636,0.018182
9,ASTS,6.0,9.0,8.0,2.0,2.0,1.0,0.0,8.0,14.0,50.0,0.12,0.04,0.16,0.04


In [329]:
fig = px.bar(gain_symbols[0:5], x="Gain_percent", y="symbol", color='symbol', orientation='h', title='Top Gain Stocks')
fig.show()

In [326]:
loss_symbols = wide_flair[wide_flair.index.isin(top_symbols)] \
    .sort_values(by=['Loss_percent'], ascending=False) \
    .reset_index()
loss_symbols.head(10)

flair,symbol,DD,Discussion,Gain,Loss,Meme,News,Shitpost,Technical Analysis,YOLO,total_posts,DD_percent,Meme_percent,Gain_percent,Loss_percent
0,ORPH,0.0,27.0,6.0,4.0,1.0,0.0,0.0,1.0,10.0,49.0,0.0,0.020408,0.122449,0.081633
1,QS,6.0,15.0,2.0,2.0,1.0,2.0,0.0,3.0,5.0,36.0,0.166667,0.027778,0.055556,0.055556
2,TD,20.0,57.0,2.0,5.0,1.0,7.0,0.0,4.0,4.0,100.0,0.2,0.01,0.02,0.05
3,SDC,5.0,18.0,6.0,3.0,6.0,2.0,0.0,2.0,19.0,61.0,0.081967,0.098361,0.098361,0.04918
4,GSAT,4.0,14.0,3.0,2.0,4.0,4.0,0.0,2.0,10.0,43.0,0.093023,0.093023,0.069767,0.046512
5,PSFE,16.0,9.0,2.0,3.0,1.0,0.0,0.0,2.0,32.0,65.0,0.246154,0.015385,0.030769,0.046154
6,BB,246.0,1085.0,230.0,130.0,252.0,66.0,0.0,114.0,845.0,2968.0,0.082884,0.084906,0.077493,0.043801
7,NAKD,3.0,41.0,15.0,4.0,8.0,2.0,0.0,5.0,20.0,98.0,0.030612,0.081633,0.153061,0.040816
8,ASTS,6.0,9.0,8.0,2.0,2.0,1.0,0.0,8.0,14.0,50.0,0.12,0.04,0.16,0.04
9,UWMC,86.0,135.0,35.0,19.0,20.0,9.0,0.0,45.0,142.0,491.0,0.175153,0.040733,0.071283,0.038697


In [328]:
fig = px.bar(loss_symbols[0:5], x="Loss_percent", y="symbol", color='symbol', orientation='h', title='Top Loss Stocks')
fig.show()