In [1]:
import os
import sys
import time
from datetime import date
from datetime import datetime
import time
import json
import platform

import stoneburner
#//*** Custom Functions:
#//*** mr_clean_text(input_series)
#//*** tokenize_series(input_series)
#//*** remove_stop_words(input_series)

# //*** Imports and Load Data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#//*** Use the whole window in the IPYNB editor
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
subreddits = ["wallstreetbets", "stocks", "wallstreetbetsOGs", "spacs", "investing", "pennystocks", "stockmarket", "options", "robinhoodpennystocks", "wallstreetbetsnew", "smallstreetbets"]
filepath = ".\\data\\"
filename_suffix = "_comments.csv.zip"
#//*** Maximize columns and rows displayed by pandas
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [16]:
#//*** Input_filename: Comments to Process.
#//*** This will eventually be a list of files
#input_filename  =".\\data\\wallstreetbets_comments.csv.zip"

#//*** Path to processed files
output_filename = ".\\data\\processed_reddit_basic_v3_tfidf.csv.zip"

#//*** Path to the stock ticker JSON file
stock_ticker_filename = ".\\data\\stock_tickers.json"

#//*** Convert Path to Mac formatting if needed
if platform.system() == 'Darwin':
    output_filename = output_filename.replace("\\","/")
    stock_ticker_filename = stock_ticker_filename.replace("\\","/")

#//*** Load the Stock Tickers
f = open(stock_ticker_filename, "r")
symbols = json.loads(f.read())['symbols']
f.close()

process_tfidf = True

print(symbols)
#//*** Convert symbols to lower case
symbols = [x.lower() for x in symbols]



raw_df = pd.DataFrame()

start_time = time.time()

#//*** Load each Subreddit for Aggregation
for subreddit in subreddits:
    #//*** Filepath + subreddit name + csv.zip
    input_filename = filepath+subreddit+filename_suffix

    #//*** Convert Path to Mac formatting if needed
    if platform.system() == 'Darwin':
        input_filename = input_filename.replace("\\","/")
   
    print(f"Reading Compressed CSV: {input_filename}")
    
    #//*** Read Each DataFrame and combine with raw_df
    raw_df = pd.concat([raw_df,pd.read_csv(input_filename,compression='zip' )])

#//*** Reset the index, since multiple indexes have been combined
raw_df.reset_index(drop=True, inplace=True)

print(f"Files Loaded: {round(time.time()-start_time,2)}s")
print(f"Total Records: {len(raw_df)}")
    

['clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
Reading Compressed CSV: .\data\wallstreetbets_comments.csv.zip
Reading Compressed CSV: .\data\stocks_comments.csv.zip
Reading Compressed CSV: .\data\wallstreetbetsOGs_comments.csv.zip
Reading Compressed CSV: .\data\spacs_comments.csv.zip
Reading Compressed CSV: .\data\investing_comments.csv.zip
Reading Compressed CSV: .\data\pennystocks_comments.csv.zip
Reading Compressed CSV: .\data\stockmarket_comments.csv.zip
Reading Compressed CSV: .\data\options_comments.csv.zip
Reading Compressed CSV: .\data\robinhoodpennystocks_comments.csv.zip
Reading Compressed CSV: .\data\wallstreetbetsnew_comments.csv.zip
Reading Compressed CSV: .\data\smallstreetbets_comments.csv.zip
Files Loaded: 67.01s
Total Records: 4432533


In [17]:
#raw_df[raw_df['body'].str.len() == 0]

raw_df['body'] = raw_df['body'].astype('str')

#//*** Convert UTC to date (not datetime)
#//** Second pass goes from 12-21 to 4-19
try:
    raw_df['created_utc'] = raw_df['created_utc'].apply(lambda x: date.fromtimestamp(x))
except:
    print()

#//*************************************************************************
#//*** Clean the Body Text, Tokenize and Remove Stop Words.
#//*************************************************************************
raw_df['clean'] = stoneburner.remove_stop_words(stoneburner.tokenize_series(stoneburner.mr_clean_text(raw_df['body'],{"remove_empty":False})))

if process_tfidf == True:
    #//*** Detokenize the clean column as tfidf
    raw_df['tfidf'] = raw_df['clean'].apply(lambda x: ' '.join(x))

raw_df

remove_empty False
Text Cleaning Time: 61.89023971557617
Tokenize Time: 974.1647193431854
Stop Words Time: 151.5292809009552


Unnamed: 0,score,total_awards_received,created_utc,is_submitter,author_fullname,body,id,link_id,parent_id,stickied,permalink,retrieved_on,subreddit,subreddit_id,hash,clean
0,2,0.0,2012-04-11,False,t2_59t5b,This is a fantastic idea! I'll toss mine up in...,c4b0pvu,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,6827bc9e2385d87ecf7e53c54baab15186a20b47d0dde0...,"[is, fantastic, idea, ill, toss, mine, in, bit..."
1,2,0.0,2012-04-11,False,t2_3zydq,INTC is on 4/17,c4b1fpf,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,4fed03e0fa9c5ba63e56efab305bcc5e553b7b7a025a10...,"[intc, on, 417]"
2,1,0.0,2012-04-11,False,t2_5u44p,"straddle, call, straddle, put, put, put, strad...",c4b1rmm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,94668e4fae99d099b9e2e26dead327ff6e4a27f4461807...,"[straddle, call, straddle, put, put, put, stra..."
3,6,0.0,2012-04-11,False,t2_54yfv,"GMCR falls, GOOG falls *slightly*, GRPN will g...",c4b2egm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,696323be0ebe2d321ff84012dfb14fe4cdd711b1091328...,"[gmcr, falls, goog, falls, slightly, grpn, go,..."
4,1,0.0,2012-04-11,False,t2_69krh,CROX 4/26\n\nBZH 5/1\n\ni'm expecting both to ...,c4b389t,t3_s4jw1,t3_s4jw1,False,0,1.428701e+09,wallstreetbets,t5_2th52,87f6c0c26cb7d161801dabd02515a01c059528cb9b5c86...,"[crox, 426bzh, 51im, expecting, to, beat, esti..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4432528,1,0.0,2021-06-30,True,t2_8kmwyads,"Jun 30, 9.30pm EST.\n\n \nI just saw this pos...",h3mqxmd,t3_ob3sgh,t3_ob3sgh,False,/r/smallstreetbets/comments/ob3sgh/crnt_cerago...,1.625356e+09,smallstreetbets,t5_29phiw,885ed1dadf72a4c0c0f9b068d3d76c1df65bbca1e938aa...,"[jun, 30, 9, 30pm, est, just, saw, posted, yah..."
4432529,1,0.0,2021-06-30,False,t2_ah5dhj6a,Good run through. The mill is running and we’r...,h3mv22t,t3_nccfsi,t3_nccfsi,False,/r/smallstreetbets/comments/nccfsi/possibly_th...,1.625358e+09,smallstreetbets,t5_29phiw,b11e15159ffd02cc8ec526090e2718dbcd97dd2323efa6...,"[good, run, mill, running, were, to, roll, tha..."
4432530,0,0.0,2021-06-30,False,t2_6ypqa,Back to normal not there yet,h3n2dw6,t3_oaj3cj,t1_h3n2a0q,False,/r/smallstreetbets/comments/oaj3cj/poww_blew_a...,1.625361e+09,smallstreetbets,t5_29phiw,fb485aa07854235476bc643e08520cb03c3fe6eb81b198...,"[back, normal, there, yet]"
4432531,1,0.0,2021-06-30,False,t2_9ypzayi0,💎👐🚀🚀🚀🚀,h3n3265,t3_nwlrf9,t3_nwlrf9,False,/r/smallstreetbets/comments/nwlrf9/sabr_dd_19_...,1.625362e+09,smallstreetbets,t5_29phiw,d6a8d682761db330ccbcf19e97135365f2c39569bd2a80...,[💎👐🚀🚀🚀🚀]


In [None]:
#raw_df['clean'].apply(x lambda for y in x: y.join(x) + " " )



In [4]:
#//*************************************************************
#//*** Load the Encode_comments Function
#//*** Counts the Stock mentions in each Post.
#//*** Adds the stock as a column to the Dataframe
#//*************************************************************

def encode_comments(input_df):
    import time
    
    print("Begin dataframe ticker symbol coding")
    start_time = time.time()
       
    
    
    #//*** Count each Stock mention add it to a dictionary of lists. Each list is filled with 0s. The Specific row index is updated with the relevant count. 
    #//*** This Generates a word count matrix
    stock_dict = {}

    #//*** Keep Track of Rows
    index = 0

    for row in input_df.iterrows():

        #//*** Get the cleaned body text
        body = row[1]['clean']

        #//*** For Each Stock Symbol
        for stock in symbols:
            
            #//*** Check if Stock exists in Body
            if stock in body:

                #//*** Reset the stock counter
                count = 0

                #//*** Loop through body and county ticker mentions
                for word in body:
                    #//*** If word found increment count
                    if stock == word:
                        count += 1

                #//*** Check if symbol is in stock_dict
                if stock not in stock_dict.keys():    

                    #//*** If not, then build it
                    stock_dict[stock] = np.zeros(len(raw_df))

                #//*** Update the stock value at the 
                stock_dict[stock][index] = count

        #//*** Increment Index to keep with row index
        index +=1   

    #//*** Loop through the dictionary key and lists
    for col,values in stock_dict.items():

        #//*** Add each key (which is a stock ticker symbol) as a column using the list of ticker counts for Data
        raw_df[col] = values.astype('int')

    print(f"Encoding Time: {round(time.time()-start_time,2)}s")
    
    return input_df

In [None]:
#//*** Encodes the dataframe with a count of Ticker symbols in each comment.
#//*** Called from update_subreddit(). This is broken out since we will likely need to adjust encoding parameters
def aggregate_comments(input_df,is_tfidf):
    
    from sklearn.feature_extraction.text import TfidfVectorizer
    
    to_sum_cols = ['score','total_awards_received']
    to_count_col = ['author_fullname','link_id']
    
    
    df_cols = ['date','total_posts','tfidf']
    
    if is_tfidf == False:
        df_cols = ['date','total_posts']
    
    rename_cols = {
        'total_awards_received' : 'awards',
        'author_fullname' : 'authors',
        'link_id' : 'threads'
    }
    
    #//*** Build the OUtput Dataframe Column names from the Columns to sum, the columns to count, and the stock ticker columns
    #//*** Loop through each list
    for cols in [ to_sum_cols, to_count_col, symbols ]:
        
        #//*** Get individual column name from each column list
        for col in cols:
            print
            #//*** Rename the column if in rename_col
            #//*** Add col to df_cols....The out_df column names
            if col in rename_cols.keys():
                df_cols.append(rename_cols[col])
            else:
                df_cols.append(col)
                
    print(df_cols)
    
    out_df = pd.DataFrame(columns = df_cols)
    
    output_counter = 0
   
    #//*** Group 
    for group in input_df.groupby('created_utc'):
        
        output_counter += 1
        
        #//*** Start Timing the process
        start_time = time.time()

        loop_df = group[1].copy()
        
        loop_list = []

        
        #//*** Build the aggregated row for the Dataframe.
        #//*** 5 Parts: 
        #//******** 1.) Date & Total Posts
        #//******** 2.) tfidf - Bag of Words for the Day
        #//******** 2.) Columns to sum
        #//******** 3.) Columns to count
        #//******** 4.) Stock Ticker columns to sum
        
        #//********************************************
        #//******** 1.) Date & Total Posts
        #//********************************************
        #//*** Add the Date
        loop_list.append(group[0])
        
        #//*** Add Total number of posts
        loop_list.append(len(loop_df))
        
        #//********************************************
        #//******** 2.) Build tfidf
        #//********************************************
        

        #//*** Perform tfidf if True
        if is_tfidf == True:
        
            #//*** Initialize the Vectorizer
            tfidf = TfidfVectorizer()

            #//*** Build the feature matrix, which is a weighted sparse matrix
            loop_list.append(tfidf.fit_transform(input_df['tfidf']))
            #loop_list.append("TEST")
        #//********************************************
        #//******** 2.) Columns to sum
        #//********************************************
        for col in to_sum_cols:
            loop_list.append(loop_df[col].sum())

            
        #//********************************************
        #//******** 3.) Columns to count
        #//********************************************
        for col in to_count_col:
            loop_list.append(len(loop_df[col].unique()))
    
        
        #//********************************************
        #//******** 4.) Stock Ticker columns to sum
        #//********************************************
        for col in symbols:
            loop_list.append(loop_df[col].sum())

        if is_tfidf == False:
            #print(len(out_df.columns),len(loop_list))
            #print(out_df.columns)
            out_df.loc[len(out_df.index)] = loop_list 
        else:
            out_df = pd.DataFrame(columns = df_cols)
            print(out_df)
            print(df_cols)
            
            out_df.loc[len(out_df.index)] = loop_list
            
            print(out_df)
            
            outfile_name = f".\\data\\ag\\{output_counter}_ag_csv.zip"
            print(f"{group[0]} - Writing File: {outfile_name}")
            out_df.to_csv(outfile_name,compression="zip",index=False) 

        
        print(f"{group[0]} {len(loop_df)} Comments in {round(time.time() - start_time,2)}s")
    print("Aggregation Complete!")
    return out_df

#//*** Aggregate and Process Comments
ag_df = aggregate_comments(df,True)
ag_df
#for col in df.columns[16:]:
#    print(df[df[col] > 0 ].iloc[0]['created_utc'],col)

['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2012-04-11          17    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    28     0.0      12       1    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2012-04-11 - Writing File: .\data\ag\1_ag_csv.zip
2012-04-11 17 Comments in 96.66s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, 

2012-05-22 42 Comments in 98.67s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2012-06-05           8    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    20     0.0       4       2    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2012-06-05 - Writing File: .\data\ag\12_ag_csv.zip
2012-06-05 8 Comments in 99.61s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads',

         date total_posts                                              tfidf  \
0  2012-07-13          11    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    13     0.0      10       7    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2012-07-13 - Writing File: .\data\ag\23_ag_csv.zip
2012-07-13 11 Comments in 97.17s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2012-07-14           6    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0     7     0.0       6

2012-08-25 19 Comments in 92.22s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2012-08-26          13    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    11     0.0       9       5    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2012-08-26 - Writing File: .\data\ag\35_ag_csv.zip
2012-08-26 13 Comments in 92.0s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads',

         date total_posts                                              tfidf  \
0  2014-04-11          65    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   173     0.0      35      11    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2014-04-11 - Writing File: .\data\ag\46_ag_csv.zip
2014-04-11 65 Comments in 91.22s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2014-07-30          20    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    41     0.0      13

2016-02-05 84 Comments in 92.25s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2016-12-30          86    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   560     0.0      55      29    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2016-12-30 - Writing File: .\data\ag\58_ag_csv.zip
2016-12-30 86 Comments in 91.62s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads'

         date total_posts                                              tfidf  \
0  2018-04-19          41    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   230     0.0      30      18    0    0    0   1   0  0   0    0   0  1   

  spce bb  
0    0  0  
2018-04-19 - Writing File: .\data\ag\69_ag_csv.zip
2018-04-19 41 Comments in 91.22s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-05-25          91    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   476     0.0      55

2018-07-12 134 Comments in 91.24s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-07-13          16    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    31     0.0      11       7    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2018-07-13 - Writing File: .\data\ag\81_ag_csv.zip
2018-07-13 16 Comments in 91.14s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads

         date total_posts                                              tfidf  \
0  2018-07-26         181    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   902     0.0     128      27    0    0    0   0   0  2   2    0   0  0   

  spce bb  
0    0  0  
2018-07-26 - Writing File: .\data\ag\92_ag_csv.zip
2018-07-26 181 Comments in 90.95s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-07-27           4    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0     6     0.0       

2018-08-06 340 Comments in 93.08s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-08-07          70    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   250     0.0      43      16    0    0    0   2   0  0   0    0   0  0   

  spce bb  
0    0  0  
2018-08-07 - Writing File: .\data\ag\104_ag_csv.zip
2018-08-07 70 Comments in 92.59s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thread

         date total_posts                                              tfidf  \
0  2018-08-19          39    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   203     0.0      29       7    0    0    0   1   0  2   0    0   0  1   

  spce bb  
0    0  0  
2018-08-19 - Writing File: .\data\ag\115_ag_csv.zip
2018-08-19 39 Comments in 88.28s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-08-20         311    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio  mu  \
0  1623     0.0     2

2018-09-01 23 Comments in 100.92s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-09-02          64    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   330     0.0      51       8    0    0    0   1   0  0   0    0   0  3   

  spce bb  
0    0  0  
2018-09-02 - Writing File: .\data\ag\127_ag_csv.zip
2018-09-02 64 Comments in 94.09s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thread

         date total_posts                                              tfidf  \
0  2018-09-14         296    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   849     0.0     193      48    0    0    0   2   0  0   0    0   2  5   

  spce bb  
0    0  0  
2018-09-14 - Writing File: .\data\ag\138_ag_csv.zip
2018-09-14 296 Comments in 90.18s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-09-15          81    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   334     0.0      

2018-09-28 13 Comments in 89.88s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-09-29          83    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   503     0.0      69      15    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2018-09-29 - Writing File: .\data\ag\150_ag_csv.zip
2018-09-29 83 Comments in 90.03s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads

         date total_posts                                              tfidf  \
0  2018-10-11         351    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1479     0.0     253      55    0    0    0   3   0  3   0    0   0  0   

  spce bb  
0    0  0  
2018-10-11 - Writing File: .\data\ag\161_ag_csv.zip
2018-10-11 351 Comments in 91.52s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-10-12          13    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    20     0.0      

2018-10-22 5 Comments in 90.29s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-10-23         179    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   854     0.0     140      45    0    0    0   1   0  1   0    0   0  3   

  spce bb  
0    0  0  
2018-10-23 - Writing File: .\data\ag\173_ag_csv.zip
2018-10-23 179 Comments in 92.91s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads

         date total_posts                                              tfidf  \
0  2018-11-05          55    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   610     0.0      50      13    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2018-11-05 - Writing File: .\data\ag\184_ag_csv.zip
2018-11-05 55 Comments in 103.78s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-11-06          86    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   307     0.0      

2018-11-16 106 Comments in 99.89s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-11-17         165    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   756     0.0      88      21    0    0    0   1   0  0   0    0   0  1   

  spce bb  
0    0  0  
2018-11-17 - Writing File: .\data\ag\196_ag_csv.zip
2018-11-17 165 Comments in 100.23s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thre

         date total_posts                                              tfidf  \
0  2018-11-28         256    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1312     0.0     175      60    0    0    0   0   0  0   0    0   0  4   

  spce bb  
0    0  0  
2018-11-28 - Writing File: .\data\ag\207_ag_csv.zip
2018-11-28 256 Comments in 101.49s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-11-29           3    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    32     0.0     

2018-12-10 84 Comments in 99.73s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-12-11           8    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    26     0.0       7       7    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2018-12-11 - Writing File: .\data\ag\219_ag_csv.zip
2018-12-11 8 Comments in 100.73s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads

         date total_posts                                              tfidf  \
0  2018-12-25         178    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   719     0.0     119      35    0    0    0   2   0  0   0    0   0  2   

  spce bb  
0    0  0  
2018-12-25 - Writing File: .\data\ag\230_ag_csv.zip
2018-12-25 178 Comments in 100.02s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2018-12-26          85    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   315     0.0     

2019-01-07 31 Comments in 102.43s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-01-10          90    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    90     0.0      53      19    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2019-01-10 - Writing File: .\data\ag\242_ag_csv.zip
2019-01-10 90 Comments in 105.61s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-01-24          98    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   269     0.0      84      18    0    0    0   1   0  1   0    0   0  0   

  spce bb  
0    0  0  
2019-01-24 - Writing File: .\data\ag\253_ag_csv.zip
2019-01-24 98 Comments in 101.76s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-01-28         109    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   326     0.0      

2019-02-12 69 Comments in 101.55s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-02-15          92    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    92     0.0      73      19    0    0    0   0   0  0   0    0   0  1   

  spce bb  
0    0  0  
2019-02-15 - Writing File: .\data\ag\265_ag_csv.zip
2019-02-15 92 Comments in 101.27s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-02-27         186    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   186     0.0     118      38    0    0    0   1   0  0   0    0   1  0   

  spce bb  
0    0  0  
2019-02-27 - Writing File: .\data\ag\276_ag_csv.zip
2019-02-27 186 Comments in 105.63s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-03-02          98    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    98     0.0     

2019-03-17 1 Comments in 102.93s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-03-18          93    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    93     0.0      71      19    0    0    0   0   0  0   0    0   1  3   

  spce bb  
0    0  0  
2019-03-18 - Writing File: .\data\ag\288_ag_csv.zip
2019-03-18 93 Comments in 103.23s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thread

         date total_posts                                              tfidf  \
0  2019-04-11         144    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   465     0.0      92      29    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2019-04-11 - Writing File: .\data\ag\299_ag_csv.zip
2019-04-11 144 Comments in 92.12s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-04-12         137    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   317     0.0     1

2019-05-01 38 Comments in 90.98s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-05-02         189    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1115     0.0     102      40    0    0    0   3   0  0   0    0   0  0   

  spce bb  
0    0  0  
2019-05-02 - Writing File: .\data\ag\311_ag_csv.zip
2019-05-02 189 Comments in 91.72s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thread

         date total_posts                                              tfidf  \
0  2019-05-20         151    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   624     0.0      89      38    0    0    0   1   0  1   0    0   0  2   

  spce bb  
0    0  0  
2019-05-20 - Writing File: .\data\ag\322_ag_csv.zip
2019-05-20 151 Comments in 90.43s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-05-21         133    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   388     0.0      

2019-06-01 82 Comments in 90.95s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-06-02           9    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    15     0.0       9       6    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2019-06-02 - Writing File: .\data\ag\334_ag_csv.zip
2019-06-02 9 Comments in 90.65s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads'

         date total_posts                                              tfidf  \
0  2019-06-20           7    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0    13     0.0       6       3    0    0    0   0   0  0   0    0   0  0   

  spce bb  
0    0  0  
2019-06-20 - Writing File: .\data\ag\345_ag_csv.zip
2019-06-20 7 Comments in 90.93s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-06-21          79    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   390     0.0      58

2019-07-05 683 Comments in 90.92s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-07-06         445    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1495     0.0     244      65    0    0    0   3   0  1   0    0   0  0   

  spce bb  
0    0  0  
2019-07-06 - Writing File: .\data\ag\357_ag_csv.zip
2019-07-06 445 Comments in 90.43s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-07-17         479    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1522     0.0     278      75    0    0    2  21   0  8   0    0   0  2   

  spce bb  
0    0  0  
2019-07-17 - Writing File: .\data\ag\368_ag_csv.zip
2019-07-17 479 Comments in 90.68s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-07-18         706    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  3084     0.0     4

2019-07-28 618 Comments in 91.47s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-07-29         620    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   620     0.0     390      91    0    0    1   3   0  8   0    0   2  0   

  spce bb  
0    0  0  
2019-07-29 - Writing File: .\data\ag\380_ag_csv.zip
2019-07-29 620 Comments in 90.41s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-08-09         686    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   687     0.0     451     141    0    0    2   6   0  1   0    0   0  0   

  spce bb  
0    0  1  
2019-08-09 - Writing File: .\data\ag\391_ag_csv.zip
2019-08-09 686 Comments in 90.62s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-08-10         704    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   704     0.0     3

2019-08-20 867 Comments in 90.22s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-08-21         482    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   483     0.0     280      80    0    0    3   1   0  0   0    0   0  0   

  spce bb  
0    0  0  
2019-08-21 - Writing File: .\data\ag\403_ag_csv.zip
2019-08-21 482 Comments in 89.93s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-09-01         270    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   270     0.0     153      49    0    0    0   1   0  0   0    0   0  0   

  spce bb  
0    0  0  
2019-09-01 - Writing File: .\data\ag\414_ag_csv.zip
2019-09-01 270 Comments in 90.16s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-09-02         342    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   342     0.0     2

2019-09-12 605 Comments in 90.4s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-09-13         464    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   464     0.0     302      80    0    0    0   0   2  6   0    0   0  0   

  spce bb  
0    0  0  
2019-09-13 - Writing File: .\data\ag\426_ag_csv.zip
2019-09-13 464 Comments in 90.03s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thread

         date total_posts                                              tfidf  \
0  2019-09-24         447    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   447     0.0     234      71    0    0    0   9   1  1   0    0   5  0   

  spce bb  
0    0  0  
2019-09-24 - Writing File: .\data\ag\437_ag_csv.zip
2019-09-24 447 Comments in 90.67s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-09-25         590    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   590     0.0     2

2019-10-05 505 Comments in 89.74s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-10-06         368    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   988     0.0     232      46    0    0    0   0   0  3   0    0   0  0   

  spce bb  
0    0  0  
2019-10-06 - Writing File: .\data\ag\449_ag_csv.zip
2019-10-06 368 Comments in 91.96s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-10-17         523    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   636     0.0     293      72    0    0    0   7   0  3   0    0   0  0   

  spce bb  
0    0  0  
2019-10-17 - Writing File: .\data\ag\460_ag_csv.zip
2019-10-17 523 Comments in 90.27s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-10-18         538    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   541     0.0     3

2019-10-28 489 Comments in 90.64s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-10-29         489    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   491     0.0     276      89    0    0    0  12   0  2   0    0   0  0   

  spce bb  
0    0  0  
2019-10-29 - Writing File: .\data\ag\472_ag_csv.zip
2019-10-29 489 Comments in 90.35s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-11-09         417    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   454     0.0     237      63    0    0    0   2   0  5   0    0   4  0   

  spce bb  
0    0  0  
2019-11-09 - Writing File: .\data\ag\483_ag_csv.zip
2019-11-09 417 Comments in 90.88s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-11-10         517    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   638     0.0     2

2019-11-20 526 Comments in 91.48s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-11-21         710    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   710     0.0     381      86    0    0    0   1   0  6   0    0   0  0   

  spce bb  
0    0  0  
2019-11-21 - Writing File: .\data\ag\495_ag_csv.zip
2019-11-21 710 Comments in 90.79s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-12-02         589    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   591     0.0     309      83    0    0    0   0   0  1   0    0   0  0   

  spce bb  
0    0  0  
2019-12-02 - Writing File: .\data\ag\506_ag_csv.zip
2019-12-02 589 Comments in 90.66s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-12-03         657    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   657     0.0     3

2019-12-13 588 Comments in 91.25s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-12-14         459    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   595     0.0     264      59    0    0    0   0   0  3   0    0   0  4   

  spce bb  
0    0  0  
2019-12-14 - Writing File: .\data\ag\518_ag_csv.zip
2019-12-14 459 Comments in 90.44s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2019-12-25         409    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   409     0.0     237      74    0    0    0   9   0  4   0    0   0  0   

  spce bb  
0    0  0  
2019-12-25 - Writing File: .\data\ag\529_ag_csv.zip
2019-12-25 409 Comments in 92.55s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2019-12-26         794    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   794     0.0     4

2020-01-05 424 Comments in 90.87s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-01-06        1852    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  6189     2.0     709     223    0    0    0   6   0  5   0    0   0  0   

  spce bb  
0    0  0  
2020-01-06 - Writing File: .\data\ag\541_ag_csv.zip
2020-01-06 1852 Comments in 90.94s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thre

         date total_posts                                              tfidf  \
0  2020-01-17         786    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   860     0.0     430     169    0    0    1  16   0  6   0    0   5  0   

  spce bb  
0    5  0  
2020-01-17 - Writing File: .\data\ag\552_ag_csv.zip
2020-01-17 786 Comments in 91.02s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-01-18         443    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   709     0.0     2

2020-01-28 768 Comments in 91.19s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-01-29         859    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   863     0.0     513     128    0    0    0  14   0  2   2    0   2  1   

  spce bb  
0    4  0  
2020-01-29 - Writing File: .\data\ag\564_ag_csv.zip
2020-01-29 859 Comments in 90.92s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threa

         date total_posts                                              tfidf  \
0  2020-02-09         619    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0   877     0.0     405      97    0    0    0   5   0  7   0    0   0  0   

  spce bb  
0    3  0  
2020-02-09 - Writing File: .\data\ag\575_ag_csv.zip
2020-02-09 619 Comments in 90.33s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-02-10         985    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1063     0.0     4

2020-02-20 1194 Comments in 90.92s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-02-21        1418    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1420     0.0     673     208    0    0    0  23   0  5   0    0   0  1   

  spce bb  
0   15  0  
2020-02-21 - Writing File: .\data\ag\587_ag_csv.zip
2020-02-21 1418 Comments in 90.9s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thre

Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-03-03        3762    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  6408     3.0    1531     507    0    1    0   4   0  13   0    0   2  0   

  spce bb  
0   10  0  
2020-03-03 - Writing File: .\data\ag\598_ag_csv.zip
2020-03-03 3762 Comments in 90.92s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'a

         date total_posts                                              tfidf  \
0  2020-03-14        1548    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  1559     0.0     813     197    0    0    0   3   0  12   2    0   0  0   

  spce bb  
0    0  0  
2020-03-14 - Writing File: .\data\ag\609_ag_csv.zip
2020-03-14 1548 Comments in 89.87s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-03-15        2322    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  2457     0.0   

2020-03-25 2266 Comments in 90.21s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-03-26        3041    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  3432     2.0    1418     286    0    0    0   2  10  8   0    0   0  0   

  spce bb  
0    4  0  
2020-03-26 - Writing File: .\data\ag\621_ag_csv.zip
2020-03-26 3041 Comments in 90.48s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thr

Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-04-06        2264    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  2741     0.0    1193     245    0    0    0   9  11  6   0    0   0  0   

  spce bb  
0    0  0  
2020-04-06 - Writing File: .\data\ag\632_ag_csv.zip
2020-04-06 2264 Comments in 90.7s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd'

         date total_posts                                              tfidf  \
0  2020-04-17        2343    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  3011     1.0    1222     255    0    0    0   8   4  4  10    0   0  0   

  spce bb  
0    8  0  
2020-04-17 - Writing File: .\data\ag\643_ag_csv.zip
2020-04-17 2343 Comments in 89.99s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-04-18        1389    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1411     0.0     

2020-04-28 2558 Comments in 90.82s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-04-29        3322    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  4899     0.0    1566     383    0    0    0  11   0  9   9    0   2  0   

  spce bb  
0    1  0  
2020-04-29 - Writing File: .\data\ag\655_ag_csv.zip
2020-04-29 3322 Comments in 91.69s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thr

Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-05-10        1636    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  2125     0.0     872     172    0    0    2   3   0  6  16    0   0  0   

  spce bb  
0    1  0  
2020-05-10 - Writing File: .\data\ag\666_ag_csv.zip
2020-05-10 1636 Comments in 89.97s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd

         date total_posts                                              tfidf  \
0  2020-05-21        2168    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  3124     1.0    1219     263    0    0    0   2   1  8   6    0   6  0   

  spce bb  
0    1  0  
2020-05-21 - Writing File: .\data\ag\677_ag_csv.zip
2020-05-21 2168 Comments in 90.84s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-05-22        2168    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  2800     0.0    1

2020-06-01 1851 Comments in 92.31s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-06-02        2518    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  4055     1.0    1241     283    0    0    0   2   1  6   0    0   0  0   

  spce bb  
0    3  1  
2020-06-02 - Writing File: .\data\ag\689_ag_csv.zip
2020-06-02 2518 Comments in 92.54s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thr

Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-06-13        1657    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1658     0.0     948     250    0    0    0   7   0  6   0    0   0  0   

  spce bb  
0    1  0  
2020-06-13 - Writing File: .\data\ag\700_ag_csv.zip
2020-06-13 1657 Comments in 90.99s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd

         date total_posts                                              tfidf  \
0  2020-06-24        2612    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  3479     2.0    1301     284    0    0   12   3   0  7   0    0   1  0   

  spce bb  
0    7  0  
2020-06-24 - Writing File: .\data\ag\711_ag_csv.zip
2020-06-24 2612 Comments in 94.1s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-06-25        3004    (0, 449455)\t0.19297662695869383\n  (0, 2429...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  5160     1.0    1

In [20]:
#//*** Encode Comments
df = encode_comments(raw_df)

df

Begin dataframe ticker symbol coding
Encoding Time: 721.56s


Unnamed: 0,score,total_awards_received,created_utc,is_submitter,author_fullname,body,id,link_id,parent_id,stickied,permalink,retrieved_on,subreddit,subreddit_id,hash,clean,amd,x,clne,mu,bb,amc,nio,gme,spce,wkhs,sofi,clov
0,2,0.0,2012-04-11,False,t2_59t5b,This is a fantastic idea! I'll toss mine up in...,c4b0pvu,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,6827bc9e2385d87ecf7e53c54baab15186a20b47d0dde0...,"[is, fantastic, idea, ill, toss, mine, in, bit...",0,0,0,0,0,0,0,0,0,0,0,0
1,2,0.0,2012-04-11,False,t2_3zydq,INTC is on 4/17,c4b1fpf,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,4fed03e0fa9c5ba63e56efab305bcc5e553b7b7a025a10...,"[intc, on, 417]",0,0,0,0,0,0,0,0,0,0,0,0
2,1,0.0,2012-04-11,False,t2_5u44p,"straddle, call, straddle, put, put, put, strad...",c4b1rmm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,94668e4fae99d099b9e2e26dead327ff6e4a27f4461807...,"[straddle, call, straddle, put, put, put, stra...",0,0,0,0,0,0,0,0,0,0,0,0
3,6,0.0,2012-04-11,False,t2_54yfv,"GMCR falls, GOOG falls *slightly*, GRPN will g...",c4b2egm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,696323be0ebe2d321ff84012dfb14fe4cdd711b1091328...,"[gmcr, falls, goog, falls, slightly, grpn, go,...",0,0,0,0,0,0,0,0,0,0,0,0
4,1,0.0,2012-04-11,False,t2_69krh,CROX 4/26\n\nBZH 5/1\n\ni'm expecting both to ...,c4b389t,t3_s4jw1,t3_s4jw1,False,0,1.428701e+09,wallstreetbets,t5_2th52,87f6c0c26cb7d161801dabd02515a01c059528cb9b5c86...,"[crox, 426bzh, 51im, expecting, to, beat, esti...",0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4432528,1,0.0,2021-06-30,True,t2_8kmwyads,"Jun 30, 9.30pm EST.\n\n \nI just saw this pos...",h3mqxmd,t3_ob3sgh,t3_ob3sgh,False,/r/smallstreetbets/comments/ob3sgh/crnt_cerago...,1.625356e+09,smallstreetbets,t5_29phiw,885ed1dadf72a4c0c0f9b068d3d76c1df65bbca1e938aa...,"[jun, 30, 9, 30pm, est, just, saw, posted, yah...",0,0,0,0,0,0,0,0,0,0,0,0
4432529,1,0.0,2021-06-30,False,t2_ah5dhj6a,Good run through. The mill is running and we’r...,h3mv22t,t3_nccfsi,t3_nccfsi,False,/r/smallstreetbets/comments/nccfsi/possibly_th...,1.625358e+09,smallstreetbets,t5_29phiw,b11e15159ffd02cc8ec526090e2718dbcd97dd2323efa6...,"[good, run, mill, running, were, to, roll, tha...",0,0,0,0,0,0,0,0,0,0,0,0
4432530,0,0.0,2021-06-30,False,t2_6ypqa,Back to normal not there yet,h3n2dw6,t3_oaj3cj,t1_h3n2a0q,False,/r/smallstreetbets/comments/oaj3cj/poww_blew_a...,1.625361e+09,smallstreetbets,t5_29phiw,fb485aa07854235476bc643e08520cb03c3fe6eb81b198...,"[back, normal, there, yet]",0,0,0,0,0,0,0,0,0,0,0,0
4432531,1,0.0,2021-06-30,False,t2_9ypzayi0,💎👐🚀🚀🚀🚀,h3n3265,t3_nwlrf9,t3_nwlrf9,False,/r/smallstreetbets/comments/nwlrf9/sabr_dd_19_...,1.625362e+09,smallstreetbets,t5_29phiw,d6a8d682761db330ccbcf19e97135365f2c39569bd2a80...,[💎👐🚀🚀🚀🚀],0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
df['tfidf'] = df['clean'].apply(lambda x: ' '.join(x))

In [30]:
#//*** Aggregate and Process Comments
ag_df = aggregate_comments(df,True)
ag_df

['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']


ValueError: Length mismatch: Expected axis has 1 elements, new values have 19 elements

In [None]:
#output_filename = ".\\data\\processed_reddit_basic_v2.csv.zip"

In [12]:
#//*** Write File to disk
ag_df.to_csv(output_filename,compression="zip",index=False) 

In [15]:
ag_df

Unnamed: 0,date,total_posts,score,awards,authors,threads,clov,sofi,wkhs,amd,gme,x,amc,clne,nio,mu,spce,bb
0,2012-04-11,17,28,0.0,12,1,0,0,0,0,0,0,0,0,0,0,0,0
1,2012-04-12,1,1,0.0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
2,2012-04-13,2,2,0.0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
3,2012-04-16,13,20,0.0,8,1,0,0,0,0,0,0,0,0,0,0,0,0
4,2012-05-02,44,117,0.0,11,6,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1054,2021-06-26,6848,33891,38.0,3758,623,23,14,20,22,153,39,158,6,12,6,47,35
1055,2021-06-27,8054,41278,52.0,4412,683,41,18,24,13,164,30,157,13,8,4,44,24
1056,2021-06-28,10927,46375,64.0,4959,754,19,201,31,38,258,32,169,12,52,7,21,17
1057,2021-06-29,11201,57036,72.0,5289,787,43,204,27,95,164,44,296,12,39,6,31,7
