## Data Collecting

### Imports

In [1]:
import requests 
import pandas as pd

### Collecting Posts from Reddit API

In [2]:
def get_submission(subreddit,num): 
    titles = [] # empty list to store 'titles'
    subreddits = [] #empty list to store 'subreddits'
    df = pd.DataFrame(columns = ['title', 'subreddit']) # create a dataframe to store API data
    url = 'https://api.pushshift.io/reddit/search/submission' # setting up API url 
    min_time = None # setup after as none to start the looping

    for j in range(num//100): # setting up iterations based on number of records to pull. 
        if min_time == None:
            para = {
                'subreddit' : subreddit,
                'size' :num
                }
        else:
            para = {
                'subreddit' : subreddit,
                'before' : min_time,
                'size' :num
                } 
        res = requests.get(url, params = para) # request pulling the data. 
        if res.status_code == 200: 
            data = res.json()
            list_post = data['data'] # getting the data. 
            min_time = None # setting the parameters.
            for i in range (len(list_post)):
                current_post = list_post[i]
                titles.append(current_post['title']) #adding titles to the empty list.
                subreddits.append(current_post['subreddit']) #adding subreddits to the empty list
                current_time = current_post['created_utc'] # extract the utc timing. 
                if min_time is None or min_time > current_time: 
                    min_time = current_time # finding the min time. 
    df['title'] = titles # adding title data to the dataframe
    df['subreddit'] = subreddit # adding subreddit data to the dataframe
    return df

In [3]:
stock = get_submission('StockMarket', 3000)

In [4]:
crypto = get_submission('CryptoCurrency', 3000)

### Combining & Saving Dataframe

In [5]:
data = pd.concat([stock, crypto])

In [6]:
data.to_csv("../data/reddit_data.csv", index=False) # saving the dataframe to a csv file 

### continued on next workbook `Cleaning_EDA`