# Reddit "COVID" Monitoring Dashboard
Testing Jupyter Notebook deployment to Heroku using Voila.

*Yose Marthin Giyay*

In [31]:
# import libraries

import requests
import pandas
import textblob
import plotly.express as px
import nbformat
import nltk
nltk.download('punkt')
pandas.set_option('display.max_colwidth', None) # don't cut my pandas dataframes
import plotly.io as pio


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [32]:
# define variables

COMMENT_COLOR         = "blueviolet"
SUBMISSION_COLOR      = "darkorange"
TEXT_PREVIEW_SIZE     = 240
TERM_OF_INTEREST      = "covid"
SUBREDDIT_OF_INTEREST = "covid"
TIMEFRAME             = "48h" # see more options in the pushshift api docs: https://github.com/pushshift/api
SIZE                  = 500 #no of records to return

In [33]:
def get_reddit_data(data_type, **kwargs):
    """
    Gets data from the pushshift api and returns it as json.

    data_type can be 'comment' or 'submission'
    The rest of the args are interpreted as payload.
    Read more: https://github.com/pushshift/api
    """

    base_url = f"https://api.pushshift.io/reddit/search/{data_type}/"
    payload = kwargs
    request = requests.get(base_url, params=payload)
    return request.json()
    

def make_clickable(val):
    """
    Makes a pandas column clickable.
    """
    
    return '<a href="{}">Link</a>'.format(val)

In [34]:
data = get_reddit_data(data_type="submission",
                          q="covid",
                          after="48h",
                          size=1000,
                          aggs="subreddit").get("data")
df = pandas.DataFrame(data)
df_plt= df["subreddit"].value_counts()[0:10]
x = df_plt.keys() 
y = df_plt.values 
px.bar(df_plt,
       x=x,
       y=y,
       template="plotly_dark",
       title= f"Subreddits with most submissions having the term covid in the last 48h",
       labels={"x": "Subreddits", "y": "# comments"},
       color_discrete_sequence=[COMMENT_COLOR],
       height=500,
       width=800)

In [35]:
df_plt= df["subreddit"].value_counts()[0:10] #top 10 subreddits and their counts
x = df_plt.keys() #subreddits
y = df_plt.values #counts
px.bar(df_plt,
       x=x,
       y=y,
       template="plotly_dark",
       title=f"Subreddits with most comments having the term covid in the last 48h",
       labels={"x": "Subreddits", "y": "# comments"},
       color_discrete_sequence=[COMMENT_COLOR],
       height=500,
       width=800)



## Comments Overview

In [36]:
data = get_reddit_data(data_type="comment", q="covid", after="48h", size=10, sort_type="score", sort="desc").get("data")

# retain only the relevant columns
df = pandas.DataFrame(data)[["author", "subreddit", "score", "body", "permalink"]]

# we only keep the first 100 characters of the body 
df.body = df.body.str[0:100] + "..."

# we append the string to all the permalink entries
df.permalink = "https://reddit.com" + df.permalink.astype(str)
print(f"\nTop 10 most upvoted comments having term covid in the last 48 hours\n")

# style the last column to be clickable and print
df.style.format({'permalink': make_clickable})


Top 10 most upvoted comments having term covid in the last 48 hours



Unnamed: 0,author,subreddit,score,body,permalink
0,GonzoPunchi,kpop,7,"people forget how fun award shows are, its just covid ruining it...",Link
1,Just-an-MP,PoliticalCompassMemes,2,COVID is more of a state of mind. One day we’ll realize the real COVID was inside us all along. Wait...,Link
2,dvusMynd,GavinNewsomAward,2,Wtf!? How can anyone watch this shit and think it’s totally ok to have no end point with boosters. T...,Link
3,Kiramiraa,kpop,2,yeah it's probably not the best idea to get a whole bunch of members from the most popular groups to...,Link
4,archfeybaby,straykids,2,"they are there, but they can't scream bc of covid regulations...",Link
5,MmaOverSportsball,MMA,2,Post covid nunes...,Link
6,Tiptip4me,doordash_drivers,2,Covid would be the least of your worries with that situation 😂 😂 💩...,Link
7,Rbfam8191,nfl,2,Packers spreading Aaron's COVID Lambeau leaping...,Link
8,Aggressive-Toe9807,CasualUK,2,"The show really is just light hearted fluff but it’s always a nice form of escapism on these dark, w...",Link
9,BeMurlala,houston,2,I just mentioned today while driving.. how did we go from Be Someone to Deadbaybees and Covid 1984.....,Link


## --

In [37]:
data = get_reddit_data(data_type="submission", 
                        q="covid", 
                        after="48h", 
                        size=10, 
                        sort_type="num_comments", 
                        sort="desc").get("data")

# retain only the relevant columns
df = pandas.DataFrame(data)[['author', 'subreddit', 'num_comments', 'title', 'permalink']]


# we append the string to all the permalink entries
df.permalink = "https://reddit.com" + df.permalink.astype(str)
print(f"\nTop 10 most commented submissions having the term covid in the last 48 hours\n")

# style the last column to be clickable and print
df.style.format({'permalink': make_clickable})


Top 10 most commented submissions having the term covid in the last 48 hours



Unnamed: 0,author,subreddit,num_comments,title,permalink
0,transcribersofreddit,TranscribersOfReddit,3,"CasualUK | Image | ""I’m going to rock down (for my covid booster)""",Link
1,transcribersofreddit,TranscribersOfReddit,3,"britposting | Image | ""CoVID iS pOlitiCAL""",Link
2,Insane212,LegalAdviceUK,3,Can i drive with a french licence in the Uk if im banned?,Link
3,P4TR10T_03,CanadaPolitics,2,Ontario set to announce stronger COVID-19 measures for proof of vaccination,Link
4,doublefirstname,LockdownSkepticism,2,"The Big Pivot: Merck Falters on Covid-19 Vaccine, Then Makes One for Rival J&J (Wall Street Journal, 12/11/2021)",Link
5,recipe881,LockdownSkepticism,2,FDA expands authorization for Pfizer’s Covid-19 booster to cover 16- and 17-year-olds,Link
6,Topadmin77,conspiracy,2,"Dr. Robert Malone, inventor of the mRNA technology opposes Covid vaccines!",Link
7,BBJackie,conspiracy,2,Covid-19: Researcher blows the whistle on data integrity issues in Pfizer’s vaccine trial,Link
8,Pure_Film6680,LegalAdviceUK,2,"As a worker who is off with suspected covid-19, upon receiving my test result, do I need to share the resit with my manager as proof that I do in fact have the illness?",Link
9,allenbyNY,conspiracy,2,BOMBSHELL: Harvard Study (Finally) Shows that mRNA Therapeutic Drugs do NOT slow the Spread of Covid-19,Link
