In [2]:
import os
import sys
import time
from datetime import date
from datetime import datetime
import time
import json
import platform

import stoneburner
#//*** Custom Functions:
#//*** mr_clean_text(input_series)
#//*** tokenize_series(input_series)
#//*** remove_stop_words(input_series)

# //*** Imports and Load Data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#//*** Use the whole window in the IPYNB editor
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
subreddits = ["wallstreetbets", "stocks", "wallstreetbetsOGs", "spacs", "investing", "pennystocks", "stockmarket", "options", "robinhoodpennystocks", "wallstreetbetsnew", "smallstreetbets"]
filepath = ".\\data\\"
filename_suffix = "_comments.csv.zip"
#//*** Maximize columns and rows displayed by pandas
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [27]:
#//*** Input_filename: Comments to Process.
#//*** This will eventually be a list of files
#input_filename  =".\\data\\wallstreetbets_comments.csv.zip"

#//*** Path to processed files
output_filename = ".\\data\\processed_reddit_basic_v3.csv.zip"

#//*** Path to the stock ticker JSON file
stock_ticker_filename = ".\\data\\stock_tickers.json"

#//*** Convert Path to Mac formatting if needed
if platform.system() == 'Darwin':
    output_filename = output_filename.replace("\\","/")
    stock_ticker_filename = stock_ticker_filename.replace("\\","/")

#//*** Load the Stock Tickers
f = open(stock_ticker_filename, "r")
symbols = json.loads(f.read())['symbols']
f.close()

print(symbols)
#//*** Convert symbols to lower case
symbols = [x.lower() for x in symbols]



raw_df = pd.DataFrame()

start_time = time.time()

#//*** Load each Subreddit for Aggregation
for subreddit in subreddits:
    #//*** Filepath + subreddit name + csv.zip
    input_filename = filepath+subreddit+filename_suffix

    #//*** Convert Path to Mac formatting if needed
    if platform.system() == 'Darwin':
        input_filename = input_filename.replace("\\","/")
   
    print(f"Reading Compressed CSV: {input_filename}")
    
    #//*** Read Each DataFrame and combine with raw_df
    raw_df = pd.concat([raw_df,pd.read_csv(input_filename,compression='zip' )])

#//*** Reset the index, since multiple indexes have been combined
raw_df.reset_index(drop=True, inplace=True)

print(f"Files Loaded: {round(time.time()-start_time,2)}s")
print(f"Total Records: {len(raw_df)}")
    

['clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
Reading Compressed CSV: .\data\wallstreetbets_comments.csv.zip
Reading Compressed CSV: .\data\stocks_comments.csv.zip
Reading Compressed CSV: .\data\wallstreetbetsOGs_comments.csv.zip
Reading Compressed CSV: .\data\spacs_comments.csv.zip
Reading Compressed CSV: .\data\investing_comments.csv.zip
Reading Compressed CSV: .\data\pennystocks_comments.csv.zip
Reading Compressed CSV: .\data\stockmarket_comments.csv.zip
Reading Compressed CSV: .\data\options_comments.csv.zip
Reading Compressed CSV: .\data\robinhoodpennystocks_comments.csv.zip
Reading Compressed CSV: .\data\wallstreetbetsnew_comments.csv.zip
Reading Compressed CSV: .\data\smallstreetbets_comments.csv.zip
Files Loaded: 60.8s
Total Records: 4432533


In [29]:
#raw_df[raw_df['body'].str.len() == 0]

raw_df['body'] = raw_df['body'].astype('str')

In [56]:
#//*** Convert UTC to date (not datetime)
#//** Second pass goes from 12-21 to 4-19
try:
    raw_df['created_utc'] = raw_df['created_utc'].apply(lambda x: date.fromtimestamp(x))
except:
    print()

#//*************************************************************************
#//*** Clean the Body Text, Tokenize and Remove Stop Words.
#//*************************************************************************
raw_df['clean'] = stoneburner.remove_stop_words(stoneburner.tokenize_series(stoneburner.mr_clean_text(raw_df['body'],{"remove_empty":False})))

#//*** Detokenize the clean column as tfidf
raw_df['tfidf'] = raw_df['clean'].apply(lambda x: ' '.join(x))

raw_df


remove_empty False
Text Cleaning Time: 79.25578355789185
Tokenize Time: 1215.794196844101
Stop Words Time: 219.91515040397644


Unnamed: 0,score,total_awards_received,created_utc,is_submitter,author_fullname,body,id,link_id,parent_id,stickied,permalink,retrieved_on,subreddit,subreddit_id,hash,clean,amd,x,clne,mu,bb,amc,nio,gme,spce,wkhs,sofi,clov,tfidf
0,2,0.0,2012-04-11,False,t2_59t5b,This is a fantastic idea! I'll toss mine up in...,c4b0pvu,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,6827bc9e2385d87ecf7e53c54baab15186a20b47d0dde0...,"[is, fantastic, idea, ill, toss, mine, in, bit...",0,0,0,0,0,0,0,0,0,0,0,0,is fantastic idea ill toss mine in bit jpm wel...
1,2,0.0,2012-04-11,False,t2_3zydq,INTC is on 4/17,c4b1fpf,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,4fed03e0fa9c5ba63e56efab305bcc5e553b7b7a025a10...,"[intc, on, 417]",0,0,0,0,0,0,0,0,0,0,0,0,intc on 417
2,1,0.0,2012-04-11,False,t2_5u44p,"straddle, call, straddle, put, put, put, strad...",c4b1rmm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,94668e4fae99d099b9e2e26dead327ff6e4a27f4461807...,"[straddle, call, straddle, put, put, put, stra...",0,0,0,0,0,0,0,0,0,0,0,0,straddle call straddle put put put straddle ca...
3,6,0.0,2012-04-11,False,t2_54yfv,"GMCR falls, GOOG falls *slightly*, GRPN will g...",c4b2egm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,696323be0ebe2d321ff84012dfb14fe4cdd711b1091328...,"[gmcr, falls, goog, falls, slightly, grpn, go,...",0,0,0,0,0,0,0,0,0,0,0,0,gmcr falls goog falls slightly grpn go a death...
4,1,0.0,2012-04-11,False,t2_69krh,CROX 4/26\n\nBZH 5/1\n\ni'm expecting both to ...,c4b389t,t3_s4jw1,t3_s4jw1,False,0,1.428701e+09,wallstreetbets,t5_2th52,87f6c0c26cb7d161801dabd02515a01c059528cb9b5c86...,"[crox, 426bzh, 51im, expecting, to, beat, esti...",0,0,0,0,0,0,0,0,0,0,0,0,crox 426bzh 51im expecting to beat estimates
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4432528,1,0.0,2021-06-30,True,t2_8kmwyads,"Jun 30, 9.30pm EST.\n\n \nI just saw this pos...",h3mqxmd,t3_ob3sgh,t3_ob3sgh,False,/r/smallstreetbets/comments/ob3sgh/crnt_cerago...,1.625356e+09,smallstreetbets,t5_29phiw,885ed1dadf72a4c0c0f9b068d3d76c1df65bbca1e938aa...,"[jun, 30, 9, 30pm, est, just, saw, posted, yah...",0,0,0,0,0,0,0,0,0,0,0,0,jun 30 9 30pm est just saw posted yahoofinance...
4432529,1,0.0,2021-06-30,False,t2_ah5dhj6a,Good run through. The mill is running and we’r...,h3mv22t,t3_nccfsi,t3_nccfsi,False,/r/smallstreetbets/comments/nccfsi/possibly_th...,1.625358e+09,smallstreetbets,t5_29phiw,b11e15159ffd02cc8ec526090e2718dbcd97dd2323efa6...,"[good, run, mill, running, were, to, roll, tha...",0,0,0,0,0,0,0,0,0,0,0,0,good run mill running were to roll thanks the ...
4432530,0,0.0,2021-06-30,False,t2_6ypqa,Back to normal not there yet,h3n2dw6,t3_oaj3cj,t1_h3n2a0q,False,/r/smallstreetbets/comments/oaj3cj/poww_blew_a...,1.625361e+09,smallstreetbets,t5_29phiw,fb485aa07854235476bc643e08520cb03c3fe6eb81b198...,"[back, normal, there, yet]",0,0,0,0,0,0,0,0,0,0,0,0,back normal there yet
4432531,1,0.0,2021-06-30,False,t2_9ypzayi0,💎👐🚀🚀🚀🚀,h3n3265,t3_nwlrf9,t3_nwlrf9,False,/r/smallstreetbets/comments/nwlrf9/sabr_dd_19_...,1.625362e+09,smallstreetbets,t5_29phiw,d6a8d682761db330ccbcf19e97135365f2c39569bd2a80...,[💎👐🚀🚀🚀🚀],0,0,0,0,0,0,0,0,0,0,0,0,💎👐🚀🚀🚀🚀


In [57]:
#raw_df['clean'].apply(x lambda for y in x: y.join(x) + " " )



In [58]:
#//*************************************************************
#//*** Load the Encode_comments Function
#//*** Counts the Stock mentions in each Post.
#//*** Adds the stock as a column to the Dataframe
#//*************************************************************

def encode_comments(input_df):
    import time
    
    print("Begin dataframe ticker symbol coding")
    start_time = time.time()
       
    
    
    #//*** Count each Stock mention add it to a dictionary of lists. Each list is filled with 0s. The Specific row index is updated with the relevant count. 
    #//*** This Generates a word count matrix
    stock_dict = {}

    #//*** Keep Track of Rows
    index = 0

    for row in input_df.iterrows():

        #//*** Get the cleaned body text
        body = row[1]['clean']

        #//*** For Each Stock Symbol
        for stock in symbols:
            
            #//*** Check if Stock exists in Body
            if stock in body:

                #//*** Reset the stock counter
                count = 0

                #//*** Loop through body and county ticker mentions
                for word in body:
                    #//*** If word found increment count
                    if stock == word:
                        count += 1

                #//*** Check if symbol is in stock_dict
                if stock not in stock_dict.keys():    

                    #//*** If not, then build it
                    stock_dict[stock] = np.zeros(len(raw_df))

                #//*** Update the stock value at the 
                stock_dict[stock][index] = count

        #//*** Increment Index to keep with row index
        index +=1   

    #//*** Loop through the dictionary key and lists
    for col,values in stock_dict.items():

        #//*** Add each key (which is a stock ticker symbol) as a column using the list of ticker counts for Data
        raw_df[col] = values.astype('int')

    print(f"Encoding Time: {round(time.time()-start_time,2)}s")
    
    return input_df

In [59]:
#//*** Encodes the dataframe with a count of Ticker symbols in each comment.
#//*** Called from update_subreddit(). This is broken out since we will likely need to adjust encoding parameters
def aggregate_comments(input_df):
    
    from sklearn.feature_extraction.text import TfidfVectorizer
    
    to_sum_cols = ['score','total_awards_received']
    to_count_col = ['author_fullname','link_id']
    
    
    df_cols = ['date','total_posts','tfidf']
    
    rename_cols = {
        'total_awards_received' : 'awards',
        'author_fullname' : 'authors',
        'link_id' : 'threads'
    }
    
    #//*** Build the OUtput Dataframe Column names from the Columns to sum, the columns to count, and the stock ticker columns
    #//*** Loop through each list
    for cols in [ to_sum_cols, to_count_col, symbols ]:
        
        #//*** Get individual column name from each column list
        for col in cols:
            print
            #//*** Rename the column if in rename_col
            #//*** Add col to df_cols....The out_df column names
            if col in rename_cols.keys():
                df_cols.append(rename_cols[col])
            else:
                df_cols.append(col)
                
    print(df_cols)
    
    out_df = pd.DataFrame(columns = df_cols)
    
   
    #//*** Group 
    for group in input_df.groupby('created_utc'):
        #//*** Start Timing the process
        start_time = time.time()

        loop_df = group[1].copy()
        
        loop_list = []

        
        #//*** Build the aggregated row for the Dataframe.
        #//*** 5 Parts: 
        #//******** 1.) Date & Total Posts
        #//******** 2.) tfidf - Bag of Words for the Day
        #//******** 2.) Columns to sum
        #//******** 3.) Columns to count
        #//******** 4.) Stock Ticker columns to sum
        
        #//********************************************
        #//******** 1.) Date & Total Posts
        #//********************************************
        #//*** Add the Date
        loop_list.append(group[0])
        
        #//*** Add Total number of posts
        loop_list.append(len(loop_df))
        
        #//********************************************
        #//******** 2.) Build tfidf
        #//********************************************
        
        
        
        #//*** Initialize the Vectorizer
        tfidf = TfidfVectorizer()

        #//*** Build the feature matrix, which is a weighted sparse matrix
        loop_list.append(tfidf.fit_transform(input_df['tfidf']))
        
        #//********************************************
        #//******** 2.) Columns to sum
        #//********************************************
        for col in to_sum_cols:
            loop_list.append(loop_df[col].sum())

            
        #//********************************************
        #//******** 3.) Columns to count
        #//********************************************
        for col in to_count_col:
            loop_list.append(len(loop_df[col].unique()))
    
        
        #//********************************************
        #//******** 4.) Stock Ticker columns to sum
        #//********************************************
        for col in symbols:
            loop_list.append(loop_df[col].sum())

        #print(len(out_df.columns),len(loop_list))
        #print(out_df.columns)
        out_df.loc[len(out_df.index)] = loop_list 
        
        print(f"{group[0]} {len(loop_df)} Comments in {round(time.time() - start_time,2)}s")
    print("Aggregation Complete!")
    return out_df

#for col in df.columns[16:]:
#    print(df[df[col] > 0 ].iloc[0]['created_utc'],col)

In [60]:
#//*** Encode Comments
df = encode_comments(raw_df)

df

Begin dataframe ticker symbol coding
Encoding Time: 944.65s


Unnamed: 0,score,total_awards_received,created_utc,is_submitter,author_fullname,body,id,link_id,parent_id,stickied,permalink,retrieved_on,subreddit,subreddit_id,hash,clean,amd,x,clne,mu,bb,amc,nio,gme,spce,wkhs,sofi,clov,tfidf
0,2,0.0,2012-04-11,False,t2_59t5b,This is a fantastic idea! I'll toss mine up in...,c4b0pvu,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,6827bc9e2385d87ecf7e53c54baab15186a20b47d0dde0...,"[is, fantastic, idea, ill, toss, mine, in, bit...",0,0,0,0,0,0,0,0,0,0,0,0,is fantastic idea ill toss mine in bit jpm wel...
1,2,0.0,2012-04-11,False,t2_3zydq,INTC is on 4/17,c4b1fpf,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,4fed03e0fa9c5ba63e56efab305bcc5e553b7b7a025a10...,"[intc, on, 417]",0,0,0,0,0,0,0,0,0,0,0,0,intc on 417
2,1,0.0,2012-04-11,False,t2_5u44p,"straddle, call, straddle, put, put, put, strad...",c4b1rmm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,94668e4fae99d099b9e2e26dead327ff6e4a27f4461807...,"[straddle, call, straddle, put, put, put, stra...",0,0,0,0,0,0,0,0,0,0,0,0,straddle call straddle put put put straddle ca...
3,6,0.0,2012-04-11,False,t2_54yfv,"GMCR falls, GOOG falls *slightly*, GRPN will g...",c4b2egm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,696323be0ebe2d321ff84012dfb14fe4cdd711b1091328...,"[gmcr, falls, goog, falls, slightly, grpn, go,...",0,0,0,0,0,0,0,0,0,0,0,0,gmcr falls goog falls slightly grpn go a death...
4,1,0.0,2012-04-11,False,t2_69krh,CROX 4/26\n\nBZH 5/1\n\ni'm expecting both to ...,c4b389t,t3_s4jw1,t3_s4jw1,False,0,1.428701e+09,wallstreetbets,t5_2th52,87f6c0c26cb7d161801dabd02515a01c059528cb9b5c86...,"[crox, 426bzh, 51im, expecting, to, beat, esti...",0,0,0,0,0,0,0,0,0,0,0,0,crox 426bzh 51im expecting to beat estimates
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4432528,1,0.0,2021-06-30,True,t2_8kmwyads,"Jun 30, 9.30pm EST.\n\n \nI just saw this pos...",h3mqxmd,t3_ob3sgh,t3_ob3sgh,False,/r/smallstreetbets/comments/ob3sgh/crnt_cerago...,1.625356e+09,smallstreetbets,t5_29phiw,885ed1dadf72a4c0c0f9b068d3d76c1df65bbca1e938aa...,"[jun, 30, 9, 30pm, est, just, saw, posted, yah...",0,0,0,0,0,0,0,0,0,0,0,0,jun 30 9 30pm est just saw posted yahoofinance...
4432529,1,0.0,2021-06-30,False,t2_ah5dhj6a,Good run through. The mill is running and we’r...,h3mv22t,t3_nccfsi,t3_nccfsi,False,/r/smallstreetbets/comments/nccfsi/possibly_th...,1.625358e+09,smallstreetbets,t5_29phiw,b11e15159ffd02cc8ec526090e2718dbcd97dd2323efa6...,"[good, run, mill, running, were, to, roll, tha...",0,0,0,0,0,0,0,0,0,0,0,0,good run mill running were to roll thanks the ...
4432530,0,0.0,2021-06-30,False,t2_6ypqa,Back to normal not there yet,h3n2dw6,t3_oaj3cj,t1_h3n2a0q,False,/r/smallstreetbets/comments/oaj3cj/poww_blew_a...,1.625361e+09,smallstreetbets,t5_29phiw,fb485aa07854235476bc643e08520cb03c3fe6eb81b198...,"[back, normal, there, yet]",0,0,0,0,0,0,0,0,0,0,0,0,back normal there yet
4432531,1,0.0,2021-06-30,False,t2_9ypzayi0,💎👐🚀🚀🚀🚀,h3n3265,t3_nwlrf9,t3_nwlrf9,False,/r/smallstreetbets/comments/nwlrf9/sabr_dd_19_...,1.625362e+09,smallstreetbets,t5_29phiw,d6a8d682761db330ccbcf19e97135365f2c39569bd2a80...,[💎👐🚀🚀🚀🚀],0,0,0,0,0,0,0,0,0,0,0,0,💎👐🚀🚀🚀🚀


In [34]:
df

Unnamed: 0,score,total_awards_received,created_utc,is_submitter,author_fullname,body,id,link_id,parent_id,stickied,permalink,retrieved_on,subreddit,subreddit_id,hash,clean,amd,x,clne,mu,bb,amc,nio,gme,spce,wkhs,sofi,clov
0,2,0.0,2012-04-11,False,t2_59t5b,This is a fantastic idea! I'll toss mine up in...,c4b0pvu,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,6827bc9e2385d87ecf7e53c54baab15186a20b47d0dde0...,"[is, fantastic, idea, ill, toss, mine, in, bit...",0,0,0,0,0,0,0,0,0,0,0,0
1,2,0.0,2012-04-11,False,t2_3zydq,INTC is on 4/17,c4b1fpf,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,4fed03e0fa9c5ba63e56efab305bcc5e553b7b7a025a10...,"[intc, on, 417]",0,0,0,0,0,0,0,0,0,0,0,0
2,1,0.0,2012-04-11,False,t2_5u44p,"straddle, call, straddle, put, put, put, strad...",c4b1rmm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,94668e4fae99d099b9e2e26dead327ff6e4a27f4461807...,"[straddle, call, straddle, put, put, put, stra...",0,0,0,0,0,0,0,0,0,0,0,0
3,6,0.0,2012-04-11,False,t2_54yfv,"GMCR falls, GOOG falls *slightly*, GRPN will g...",c4b2egm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,696323be0ebe2d321ff84012dfb14fe4cdd711b1091328...,"[gmcr, falls, goog, falls, slightly, grpn, go,...",0,0,0,0,0,0,0,0,0,0,0,0
4,1,0.0,2012-04-11,False,t2_69krh,CROX 4/26\n\nBZH 5/1\n\ni'm expecting both to ...,c4b389t,t3_s4jw1,t3_s4jw1,False,0,1.428701e+09,wallstreetbets,t5_2th52,87f6c0c26cb7d161801dabd02515a01c059528cb9b5c86...,"[crox, 426bzh, 51im, expecting, to, beat, esti...",0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4432528,1,0.0,2021-06-30,True,t2_8kmwyads,"Jun 30, 9.30pm EST.\n\n \nI just saw this pos...",h3mqxmd,t3_ob3sgh,t3_ob3sgh,False,/r/smallstreetbets/comments/ob3sgh/crnt_cerago...,1.625356e+09,smallstreetbets,t5_29phiw,885ed1dadf72a4c0c0f9b068d3d76c1df65bbca1e938aa...,"[jun, 30, 9, 30pm, est, just, saw, posted, yah...",0,0,0,0,0,0,0,0,0,0,0,0
4432529,1,0.0,2021-06-30,False,t2_ah5dhj6a,Good run through. The mill is running and we’r...,h3mv22t,t3_nccfsi,t3_nccfsi,False,/r/smallstreetbets/comments/nccfsi/possibly_th...,1.625358e+09,smallstreetbets,t5_29phiw,b11e15159ffd02cc8ec526090e2718dbcd97dd2323efa6...,"[good, run, mill, running, were, to, roll, tha...",0,0,0,0,0,0,0,0,0,0,0,0
4432530,0,0.0,2021-06-30,False,t2_6ypqa,Back to normal not there yet,h3n2dw6,t3_oaj3cj,t1_h3n2a0q,False,/r/smallstreetbets/comments/oaj3cj/poww_blew_a...,1.625361e+09,smallstreetbets,t5_29phiw,fb485aa07854235476bc643e08520cb03c3fe6eb81b198...,"[back, normal, there, yet]",0,0,0,0,0,0,0,0,0,0,0,0
4432531,1,0.0,2021-06-30,False,t2_9ypzayi0,💎👐🚀🚀🚀🚀,h3n3265,t3_nwlrf9,t3_nwlrf9,False,/r/smallstreetbets/comments/nwlrf9/sabr_dd_19_...,1.625362e+09,smallstreetbets,t5_29phiw,d6a8d682761db330ccbcf19e97135365f2c39569bd2a80...,[💎👐🚀🚀🚀🚀],0,0,0,0,0,0,0,0,0,0,0,0


In [61]:
#//*** Aggregate and Process Comments
ag_df = aggregate_comments(df)
ag_df

['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
2012-04-11 17 Comments in 93.91s
2012-04-12 1 Comments in 90.51s
2012-04-13 2 Comments in 90.19s
2012-04-16 13 Comments in 92.16s
2012-05-02 44 Comments in 89.57s
2012-05-03 43 Comments in 90.17s
2012-05-04 15 Comments in 89.96s
2012-05-05 6 Comments in 89.82s
2012-05-06 1 Comments in 92.02s
2012-05-21 30 Comments in 89.72s
2012-05-22 42 Comments in 89.97s
2012-06-05 8 Comments in 89.56s
2012-06-06 35 Comments in 88.96s
2012-06-07 64 Comments in 88.5s
2012-06-08 20 Comments in 89.98s
2012-06-09 2 Comments in 90.02s
2012-06-10 7 Comments in 89.92s


MemoryError: 

In [25]:
#output_filename = ".\\data\\processed_reddit_basic_v2.csv.zip"

In [None]:
#//*** Write File to disk
ag_df.to_csv(output_filename,compression="zip",index=False) 