In [1]:

# Load .env enviroment variables
import os
from dotenv import load_dotenv
load_dotenv("/Users/ludovicschneider/Bootcamp/LS.env")

import pandas as pd


## Retrieving the data via Reddit API directly ##
It is not the best way to retrieve large dataset but it is a good way to stream and update new feeds. 

In [2]:
# Set Alpaca API key and secret
client_id = os.getenv("REDDIT_CLIENT_ID")
reddit_secret_key = os.getenv("REDDIT_SECRET_KEY")
reddit_pw = os.getenv('REDDIT_PW')

# Request a temporary OAuth token from Reddit. We need our username and password for this:

In [3]:
import requests

In [4]:
auth = requests.auth.HTTPBasicAuth(client_id, reddit_secret_key)

In [5]:
# Setting up your Reddit logins
data_id = {'grant_type': 'password',
          'username': 'diveride',
          'password': reddit_pw
}

In [6]:
headers = {'User-Agent': 'API_project'}

In [7]:
# Retrieve the access_token
res = requests.post('https://www.reddit.com/api/v1/access_token', auth=auth, data=data_id, headers=headers)

In [8]:
# Store the access_token to a variable
access_token = res.json()['access_token']

In [9]:
# adding the access token to our headers and format it as a string
headers['authorization'] = f'bearer {access_token}'
# Print the access_token to make sure it worked in a proper format
headers

{'User-Agent': 'API_project',
 'authorization': 'bearer 656422675188-DoZfaJ89SEeeQ0clhdhGfHjLvH2Hkg'}

In [10]:
# Checking that we can access the website with a result 200
requests.get('https://oauth.reddit.com/api/v1/me', headers=headers)


<Response [200]>

In [12]:
# Retreiving the data from WSB
last_id=""
max_batch= 2
num=float()
df_request= pd.DataFrame()
while num <=max_batch:
    num += 1
    wsb_data1 = requests.get('https://oauth.reddit.com/r/CryptoCurrency', headers=headers, params={'limit': '100', 'after': 'last_id'})
    for post in wsb_data1.json()['data']['children']:
        df_request= df_request.append({
            'author': post['data']['author'],
            'title': post['data']['title'],
            'selftext': post['data']['selftext'],
            'score': post['data']['score'],
            'time': post['data']['created_utc']
        }, ignore_index=True)
    last_id= post['kind'] + '_' + post['data']['id']


In [95]:

df_request['time'] = df_request['time'].apply(lambda x: dt.datetime.utcfromtimestamp(x).date())
df_request

Unnamed: 0,author,score,selftext,time,title
0,CryptoMaximalist,139.0,"Hello everyone,\n\n​\n\nPer the passing of the...",2021-07-06,Moon Week Reminder - Submit governance polls b...
1,AutoModerator,83.0,**Welcome to the Daily Discussion. Please read...,2021-07-07,"Daily Discussion - July 7, 2021 (GMT+0)"
2,SatOnMyBalls_,6809.0,,2021-07-06,Exposed Congressman Rep Brad Sherman Who is Tr...
3,AutisticDalekOnSpeed,2000.0,,2021-07-06,Swiss Bank Sygnum Becomes First Bank to Offer ...
4,MAGICwhiteMICE,1162.0,When I send my cryptos too or from a exchange ...,2021-07-06,Does anyone else ever get scared transferring ...
5,r0ughnex,2351.0,# Updates\n\n* Swap **staking contract may all...,2021-07-06,SCAM Alert! Very concerned about the new ShibS...
6,tct2274,1281.0,,2021-07-06,Shibaswap: Staked funds are NOT in a smart con...
7,graytleapforward,949.0,"I'm 60 years old, broke ass bartender for 40 y...",2021-07-06,Any Boomers out there stacking Crypto for reti...
8,Set1Less,855.0,The double standards are just incredible. Bill...,2021-07-06,China banned Didi (Chinese Uber) just 2 days a...
9,Cookiesnap,1917.0,I’ve seen there is a bit of confusion on under...,2021-07-06,Be aware of burned supply coins


## Retrieving the data via the Pushift API ##
This is a better way to extract large amount of data

In [14]:
from psaw import PushshiftAPI
api = PushshiftAPI()

In [89]:
import datetime as dt

start_time=int(dt.datetime(2021, 7, 5).timestamp())

wsb_data = api.search_submissions(after=start_time,
                            #subreddit='wallstreetbets',
                            subreddit= 'CryptoCurrency',
                            filter=['author', 'title', 'selftext', 'score'],
                            limit= 5000
                            )

In [90]:
print(wsb_data)

<generator object PushshiftAPIMinimal._search at 0x7f8a98f31750>


In [91]:
# Use "thing.d_" a dict containing all of the data attributes attached to the "thing" (which otherwise would be accessed via dot notation). 
# One specific convenience this enables is simplifying pushing results into a pandas dataframe 

df = pd.DataFrame([thing.d_ for thing in wsb_data])

In [92]:
df

Unnamed: 0,author,created,created_utc,score,selftext,title
0,erdal_mutlu,1.625646e+09,1625631921,1,,Cryptocurrency Could Buy You a 101-Carat Diamo...
1,Esk0l,1.625646e+09,1625631872,1,[removed],What do you think it's happening first?
2,buddyfake,1.625646e+09,1625631797,1,,DeFi Coins Surge by Double Digits as Bitcoin N...
3,ComradeSuphi,1.625646e+09,1625631768,1,,One of the biggest Turkish bank's servers are ...
4,timestampmagazine,1.625646e+09,1625631632,1,,KuCoin the Latest to Come Under Scanner by Ont...
5,Esk0l,1.625646e+09,1625631596,1,[removed],"Being aware of the market state, what do you t..."
6,xam66,1.625646e+09,1625631205,1,[removed],Binance still safe uk?
7,SnoBoCho,1.625646e+09,1625631199,1,Hey wonderful community!\n\nI got lucky a few ...,Crypto Baby Needs Help
8,buddyfake,1.625646e+09,1625631162,1,,Banking Giant UBS Warns That Regulatory Crackd...
9,MrBleubols,1.625646e+09,1625631157,1,[removed],I just completed a free transfer to Robin Hood...


In [93]:
# transforming utc_date
df_clean = df.drop(columns='created')
df_clean['created_utc'] = df['created_utc'].apply(lambda x: dt.datetime.utcfromtimestamp(x).date())
df_clean

Unnamed: 0,author,created_utc,score,selftext,title
0,erdal_mutlu,2021-07-07,1,,Cryptocurrency Could Buy You a 101-Carat Diamo...
1,Esk0l,2021-07-07,1,[removed],What do you think it's happening first?
2,buddyfake,2021-07-07,1,,DeFi Coins Surge by Double Digits as Bitcoin N...
3,ComradeSuphi,2021-07-07,1,,One of the biggest Turkish bank's servers are ...
4,timestampmagazine,2021-07-07,1,,KuCoin the Latest to Come Under Scanner by Ont...
5,Esk0l,2021-07-07,1,[removed],"Being aware of the market state, what do you t..."
6,xam66,2021-07-07,1,[removed],Binance still safe uk?
7,SnoBoCho,2021-07-07,1,Hey wonderful community!\n\nI got lucky a few ...,Crypto Baby Needs Help
8,buddyfake,2021-07-07,1,,Banking Giant UBS Warns That Regulatory Crackd...
9,MrBleubols,2021-07-07,1,[removed],I just completed a free transfer to Robin Hood...
