In [1]:
import os
import sys
import time
from datetime import date
from datetime import datetime
import time
import json
import platform

import stoneburner
#//*** Custom Functions:
#//*** mr_clean_text(input_series)
#//*** tokenize_series(input_series)
#//*** remove_stop_words(input_series)

# //*** Imports and Load Data
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#//*** Use the whole window in the IPYNB editor
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
subreddits = ["wallstreetbets", "stocks", "wallstreetbetsOGs", "spacs", "investing", "pennystocks", "stockmarket", "options", "robinhoodpennystocks", "wallstreetbetsnew", "smallstreetbets"]
filepath = ".\\data\\"
filename_suffix = "_comments.csv.zip"
#//*** Maximize columns and rows displayed by pandas
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [16]:
#//*** Input_filename: Comments to Process.
#//*** This will eventually be a list of files
#input_filename  =".\\data\\wallstreetbets_comments.csv.zip"

#//*** Path to processed files
output_filename = ".\\data\\processed_reddit_basic_v3_tfidf.csv.zip"

#//*** Path to the stock ticker JSON file
stock_ticker_filename = ".\\data\\stock_tickers.json"

#//*** Convert Path to Mac formatting if needed
if platform.system() == 'Darwin':
    output_filename = output_filename.replace("\\","/")
    stock_ticker_filename = stock_ticker_filename.replace("\\","/")

#//*** Load the Stock Tickers
f = open(stock_ticker_filename, "r")
symbols = json.loads(f.read())['symbols']
f.close()

process_tfidf = True

print(symbols)
#//*** Convert symbols to lower case
symbols = [x.lower() for x in symbols]



raw_df = pd.DataFrame()

start_time = time.time()

#//*** Load each Subreddit for Aggregation
for subreddit in subreddits:
    #//*** Filepath + subreddit name + csv.zip
    input_filename = filepath+subreddit+filename_suffix

    #//*** Convert Path to Mac formatting if needed
    if platform.system() == 'Darwin':
        input_filename = input_filename.replace("\\","/")
   
    print(f"Reading Compressed CSV: {input_filename}")
    
    #//*** Read Each DataFrame and combine with raw_df
    raw_df = pd.concat([raw_df,pd.read_csv(input_filename,compression='zip' )])

#//*** Reset the index, since multiple indexes have been combined
raw_df.reset_index(drop=True, inplace=True)

print(f"Files Loaded: {round(time.time()-start_time,2)}s")
print(f"Total Records: {len(raw_df)}")
    

['clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
Reading Compressed CSV: .\data\wallstreetbets_comments.csv.zip
Reading Compressed CSV: .\data\stocks_comments.csv.zip
Reading Compressed CSV: .\data\wallstreetbetsOGs_comments.csv.zip
Reading Compressed CSV: .\data\spacs_comments.csv.zip
Reading Compressed CSV: .\data\investing_comments.csv.zip
Reading Compressed CSV: .\data\pennystocks_comments.csv.zip
Reading Compressed CSV: .\data\stockmarket_comments.csv.zip
Reading Compressed CSV: .\data\options_comments.csv.zip
Reading Compressed CSV: .\data\robinhoodpennystocks_comments.csv.zip
Reading Compressed CSV: .\data\wallstreetbetsnew_comments.csv.zip
Reading Compressed CSV: .\data\smallstreetbets_comments.csv.zip
Files Loaded: 67.01s
Total Records: 4432533


In [17]:
#raw_df[raw_df['body'].str.len() == 0]

raw_df['body'] = raw_df['body'].astype('str')

#//*** Convert UTC to date (not datetime)
#//** Second pass goes from 12-21 to 4-19
try:
    raw_df['created_utc'] = raw_df['created_utc'].apply(lambda x: date.fromtimestamp(x))
except:
    print()

#//*************************************************************************
#//*** Clean the Body Text, Tokenize and Remove Stop Words.
#//*************************************************************************
raw_df['clean'] = stoneburner.remove_stop_words(stoneburner.tokenize_series(stoneburner.mr_clean_text(raw_df['body'],{"remove_empty":False})))

if process_tfidf == True:
    #//*** Detokenize the clean column as tfidf
    raw_df['tfidf'] = raw_df['clean'].apply(lambda x: ' '.join(x))

raw_df

remove_empty False
Text Cleaning Time: 61.89023971557617
Tokenize Time: 974.1647193431854
Stop Words Time: 151.5292809009552


Unnamed: 0,score,total_awards_received,created_utc,is_submitter,author_fullname,body,id,link_id,parent_id,stickied,permalink,retrieved_on,subreddit,subreddit_id,hash,clean
0,2,0.0,2012-04-11,False,t2_59t5b,This is a fantastic idea! I'll toss mine up in...,c4b0pvu,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,6827bc9e2385d87ecf7e53c54baab15186a20b47d0dde0...,"[is, fantastic, idea, ill, toss, mine, in, bit..."
1,2,0.0,2012-04-11,False,t2_3zydq,INTC is on 4/17,c4b1fpf,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,4fed03e0fa9c5ba63e56efab305bcc5e553b7b7a025a10...,"[intc, on, 417]"
2,1,0.0,2012-04-11,False,t2_5u44p,"straddle, call, straddle, put, put, put, strad...",c4b1rmm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,94668e4fae99d099b9e2e26dead327ff6e4a27f4461807...,"[straddle, call, straddle, put, put, put, stra..."
3,6,0.0,2012-04-11,False,t2_54yfv,"GMCR falls, GOOG falls *slightly*, GRPN will g...",c4b2egm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,696323be0ebe2d321ff84012dfb14fe4cdd711b1091328...,"[gmcr, falls, goog, falls, slightly, grpn, go,..."
4,1,0.0,2012-04-11,False,t2_69krh,CROX 4/26\n\nBZH 5/1\n\ni'm expecting both to ...,c4b389t,t3_s4jw1,t3_s4jw1,False,0,1.428701e+09,wallstreetbets,t5_2th52,87f6c0c26cb7d161801dabd02515a01c059528cb9b5c86...,"[crox, 426bzh, 51im, expecting, to, beat, esti..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4432528,1,0.0,2021-06-30,True,t2_8kmwyads,"Jun 30, 9.30pm EST.\n\n \nI just saw this pos...",h3mqxmd,t3_ob3sgh,t3_ob3sgh,False,/r/smallstreetbets/comments/ob3sgh/crnt_cerago...,1.625356e+09,smallstreetbets,t5_29phiw,885ed1dadf72a4c0c0f9b068d3d76c1df65bbca1e938aa...,"[jun, 30, 9, 30pm, est, just, saw, posted, yah..."
4432529,1,0.0,2021-06-30,False,t2_ah5dhj6a,Good run through. The mill is running and we’r...,h3mv22t,t3_nccfsi,t3_nccfsi,False,/r/smallstreetbets/comments/nccfsi/possibly_th...,1.625358e+09,smallstreetbets,t5_29phiw,b11e15159ffd02cc8ec526090e2718dbcd97dd2323efa6...,"[good, run, mill, running, were, to, roll, tha..."
4432530,0,0.0,2021-06-30,False,t2_6ypqa,Back to normal not there yet,h3n2dw6,t3_oaj3cj,t1_h3n2a0q,False,/r/smallstreetbets/comments/oaj3cj/poww_blew_a...,1.625361e+09,smallstreetbets,t5_29phiw,fb485aa07854235476bc643e08520cb03c3fe6eb81b198...,"[back, normal, there, yet]"
4432531,1,0.0,2021-06-30,False,t2_9ypzayi0,💎👐🚀🚀🚀🚀,h3n3265,t3_nwlrf9,t3_nwlrf9,False,/r/smallstreetbets/comments/nwlrf9/sabr_dd_19_...,1.625362e+09,smallstreetbets,t5_29phiw,d6a8d682761db330ccbcf19e97135365f2c39569bd2a80...,[💎👐🚀🚀🚀🚀]


In [None]:
#raw_df['clean'].apply(x lambda for y in x: y.join(x) + " " )



In [4]:
#//*************************************************************
#//*** Load the Encode_comments Function
#//*** Counts the Stock mentions in each Post.
#//*** Adds the stock as a column to the Dataframe
#//*************************************************************

def encode_comments(input_df):
    import time
    
    print("Begin dataframe ticker symbol coding")
    start_time = time.time()
       
    
    
    #//*** Count each Stock mention add it to a dictionary of lists. Each list is filled with 0s. The Specific row index is updated with the relevant count. 
    #//*** This Generates a word count matrix
    stock_dict = {}

    #//*** Keep Track of Rows
    index = 0

    for row in input_df.iterrows():

        #//*** Get the cleaned body text
        body = row[1]['clean']

        #//*** For Each Stock Symbol
        for stock in symbols:
            
            #//*** Check if Stock exists in Body
            if stock in body:

                #//*** Reset the stock counter
                count = 0

                #//*** Loop through body and county ticker mentions
                for word in body:
                    #//*** If word found increment count
                    if stock == word:
                        count += 1

                #//*** Check if symbol is in stock_dict
                if stock not in stock_dict.keys():    

                    #//*** If not, then build it
                    stock_dict[stock] = np.zeros(len(raw_df))

                #//*** Update the stock value at the 
                stock_dict[stock][index] = count

        #//*** Increment Index to keep with row index
        index +=1   

    #//*** Loop through the dictionary key and lists
    for col,values in stock_dict.items():

        #//*** Add each key (which is a stock ticker symbol) as a column using the list of ticker counts for Data
        raw_df[col] = values.astype('int')

    print(f"Encoding Time: {round(time.time()-start_time,2)}s")
    
    return input_df

In [56]:
#//*** Encodes the dataframe with a count of Ticker symbols in each comment.
#//*** Called from update_subreddit(). This is broken out since we will likely need to adjust encoding parameters
def aggregate_comments(input_df,is_tfidf,output_counter=0):
    
    from sklearn.feature_extraction.text import TfidfVectorizer
    
    to_sum_cols = ['score','total_awards_received']
    to_count_col = ['author_fullname','link_id']
    
    
    df_cols = ['date','total_posts','tfidf']
    
    if is_tfidf == False:
        df_cols = ['date','total_posts']
    
    rename_cols = {
        'total_awards_received' : 'awards',
        'author_fullname' : 'authors',
        'link_id' : 'threads'
    }
    
    #//*** Build the OUtput Dataframe Column names from the Columns to sum, the columns to count, and the stock ticker columns
    #//*** Loop through each list
    for cols in [ to_sum_cols, to_count_col, symbols ]:
        
        #//*** Get individual column name from each column list
        for col in cols:
            print
            #//*** Rename the column if in rename_col
            #//*** Add col to df_cols....The out_df column names
            if col in rename_cols.keys():
                df_cols.append(rename_cols[col])
            else:
                df_cols.append(col)
                
    print(df_cols)
    
    out_df = pd.DataFrame(columns = df_cols)
    
    
   
    #//*** Group 
    for group in input_df.groupby('created_utc'):
        
        output_counter += 1
        
        #//*** Start Timing the process
        start_time = time.time()

        loop_df = group[1].copy()
        
        loop_list = []

        
        #//*** Build the aggregated row for the Dataframe.
        #//*** 5 Parts: 
        #//******** 1.) Date & Total Posts
        #//******** 2.) tfidf - Bag of Words for the Day
        #//******** 2.) Columns to sum
        #//******** 3.) Columns to count
        #//******** 4.) Stock Ticker columns to sum
        
        #//********************************************
        #//******** 1.) Date & Total Posts
        #//********************************************
        #//*** Add the Date
        loop_list.append(group[0])
        
        #//*** Add Total number of posts
        loop_list.append(len(loop_df))
        
        #//********************************************
        #//******** 2.) Build tfidf
        #//********************************************
        

        #//*** Perform tfidf if True
        if is_tfidf == True:
        
            #//*** Initialize the Vectorizer
            tfidf = TfidfVectorizer()

            #//*** Build the feature matrix, which is a weighted sparse matrix
            loop_list.append(tfidf.fit_transform(input_df['tfidf']))
            #loop_list.append("TEST")
        #//********************************************
        #//******** 2.) Columns to sum
        #//********************************************
        for col in to_sum_cols:
            loop_list.append(loop_df[col].sum())

            
        #//********************************************
        #//******** 3.) Columns to count
        #//********************************************
        for col in to_count_col:
            loop_list.append(len(loop_df[col].unique()))
    
        
        #//********************************************
        #//******** 4.) Stock Ticker columns to sum
        #//********************************************
        for col in symbols:
            loop_list.append(loop_df[col].sum())

        if is_tfidf == False:
            #print(len(out_df.columns),len(loop_list))
            #print(out_df.columns)
            out_df.loc[len(out_df.index)] = loop_list 
        else:
            out_df = pd.DataFrame(columns = df_cols)
            print(out_df)
            print(df_cols)
            
            out_df.loc[len(out_df.index)] = loop_list
            
            print(out_df)
            
            outfile_name = f".\\data\\ag\\{output_counter}_ag_csv.zip"
            print(f"{group[0]} - Writing File: {outfile_name}")
            out_df.to_csv(outfile_name,compression="zip",index=False) 

        
        print(f"{group[0]} {len(loop_df)} Comments in {round(time.time() - start_time,2)}s")
    print("Aggregation Complete!")
    return out_df

#//*** Aggregate and Process Comments
#ag_df = aggregate_comments(df,True)
#ag_df
#for col in df.columns[16:]:
#    print(df[df[col] > 0 ].iloc[0]['created_utc'],col)

In [20]:
#//*** Encode Comments
df = encode_comments(raw_df)

df

Begin dataframe ticker symbol coding
Encoding Time: 721.56s


Unnamed: 0,score,total_awards_received,created_utc,is_submitter,author_fullname,body,id,link_id,parent_id,stickied,permalink,retrieved_on,subreddit,subreddit_id,hash,clean,amd,x,clne,mu,bb,amc,nio,gme,spce,wkhs,sofi,clov
0,2,0.0,2012-04-11,False,t2_59t5b,This is a fantastic idea! I'll toss mine up in...,c4b0pvu,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,6827bc9e2385d87ecf7e53c54baab15186a20b47d0dde0...,"[is, fantastic, idea, ill, toss, mine, in, bit...",0,0,0,0,0,0,0,0,0,0,0,0
1,2,0.0,2012-04-11,False,t2_3zydq,INTC is on 4/17,c4b1fpf,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,4fed03e0fa9c5ba63e56efab305bcc5e553b7b7a025a10...,"[intc, on, 417]",0,0,0,0,0,0,0,0,0,0,0,0
2,1,0.0,2012-04-11,False,t2_5u44p,"straddle, call, straddle, put, put, put, strad...",c4b1rmm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,94668e4fae99d099b9e2e26dead327ff6e4a27f4461807...,"[straddle, call, straddle, put, put, put, stra...",0,0,0,0,0,0,0,0,0,0,0,0
3,6,0.0,2012-04-11,False,t2_54yfv,"GMCR falls, GOOG falls *slightly*, GRPN will g...",c4b2egm,t3_s4jw1,t3_s4jw1,False,0,1.428700e+09,wallstreetbets,t5_2th52,696323be0ebe2d321ff84012dfb14fe4cdd711b1091328...,"[gmcr, falls, goog, falls, slightly, grpn, go,...",0,0,0,0,0,0,0,0,0,0,0,0
4,1,0.0,2012-04-11,False,t2_69krh,CROX 4/26\n\nBZH 5/1\n\ni'm expecting both to ...,c4b389t,t3_s4jw1,t3_s4jw1,False,0,1.428701e+09,wallstreetbets,t5_2th52,87f6c0c26cb7d161801dabd02515a01c059528cb9b5c86...,"[crox, 426bzh, 51im, expecting, to, beat, esti...",0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4432528,1,0.0,2021-06-30,True,t2_8kmwyads,"Jun 30, 9.30pm EST.\n\n \nI just saw this pos...",h3mqxmd,t3_ob3sgh,t3_ob3sgh,False,/r/smallstreetbets/comments/ob3sgh/crnt_cerago...,1.625356e+09,smallstreetbets,t5_29phiw,885ed1dadf72a4c0c0f9b068d3d76c1df65bbca1e938aa...,"[jun, 30, 9, 30pm, est, just, saw, posted, yah...",0,0,0,0,0,0,0,0,0,0,0,0
4432529,1,0.0,2021-06-30,False,t2_ah5dhj6a,Good run through. The mill is running and we’r...,h3mv22t,t3_nccfsi,t3_nccfsi,False,/r/smallstreetbets/comments/nccfsi/possibly_th...,1.625358e+09,smallstreetbets,t5_29phiw,b11e15159ffd02cc8ec526090e2718dbcd97dd2323efa6...,"[good, run, mill, running, were, to, roll, tha...",0,0,0,0,0,0,0,0,0,0,0,0
4432530,0,0.0,2021-06-30,False,t2_6ypqa,Back to normal not there yet,h3n2dw6,t3_oaj3cj,t1_h3n2a0q,False,/r/smallstreetbets/comments/oaj3cj/poww_blew_a...,1.625361e+09,smallstreetbets,t5_29phiw,fb485aa07854235476bc643e08520cb03c3fe6eb81b198...,"[back, normal, there, yet]",0,0,0,0,0,0,0,0,0,0,0,0
4432531,1,0.0,2021-06-30,False,t2_9ypzayi0,💎👐🚀🚀🚀🚀,h3n3265,t3_nwlrf9,t3_nwlrf9,False,/r/smallstreetbets/comments/nwlrf9/sabr_dd_19_...,1.625362e+09,smallstreetbets,t5_29phiw,d6a8d682761db330ccbcf19e97135365f2c39569bd2a80...,[💎👐🚀🚀🚀🚀],0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
df['tfidf'] = df['clean'].apply(lambda x: ' '.join(x))

In [30]:
#//*** Aggregate and Process Comments
ag_df = aggregate_comments(df,True)
ag_df

['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']


ValueError: Length mismatch: Expected axis has 1 elements, new values have 19 elements

In [None]:
#output_filename = ".\\data\\processed_reddit_basic_v2.csv.zip"

In [12]:
#//*** Write File to disk
ag_df.to_csv(output_filename,compression="zip",index=False) 

In [41]:
#//*** Process Stopped due to memory error.
pickup_filename = "./data/processed_reddit_basic_v3_tfidf.csv.zip"

#//*** Write File to disk
pickup_df = pd.read_csv(pickup_filename,compression="zip") 
pickup_df

Unnamed: 0,date,total_posts,tfidf,score,awards,authors,threads,clov,sofi,wkhs,amd,gme,x,amc,clne,nio,mu,spce,bb
0,2012-04-11,17,"(0, 449455)\t0.19297662695869383\n (0, 2429...",28,0.0,12,1,0,0,0,0,0,0,0,0,0,0,0,0
1,2012-04-12,1,"(0, 449455)\t0.19297662695869383\n (0, 2429...",1,0.0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
2,2012-04-13,2,"(0, 449455)\t0.19297662695869383\n (0, 2429...",2,0.0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
3,2012-04-16,13,"(0, 449455)\t0.19297662695869383\n (0, 2429...",20,0.0,8,1,0,0,0,0,0,0,0,0,0,0,0,0
4,2012-05-02,44,"(0, 449455)\t0.19297662695869383\n (0, 2429...",117,0.0,11,6,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,2020-07-29,8229,"(0, 449455)\t0.19297662695869383\n (0, 2429...",15684,9.0,4327,1215,0,0,4,131,0,28,5,0,9,4,8,1
746,2020-07-30,8726,"(0, 449455)\t0.19297662695869383\n (0, 2429...",16956,10.0,4691,1238,0,0,4,72,0,25,3,0,14,2,4,1
747,2020-07-31,7676,"(0, 449455)\t0.19297662695869383\n (0, 2429...",14308,10.0,4234,1084,0,0,3,42,0,33,0,0,9,1,11,2
748,2020-08-01,6411,"(0, 449455)\t0.19297662695869383\n (0, 2429...",7760,13.0,3403,887,0,1,1,37,0,28,1,0,8,1,7,2


In [57]:
output_index = len(pickup_df)
target_date = datetime.strptime(pickup_df['date'].max(), "%Y-%m-%d").date()
print(output_index)
print(target_date)
#//*** Trim up the dataframe
#df = df[df['created_utc'] > target_date]

#//*** Check to see of we lost a day around 2020-08-03
ag_df = aggregate_comments(df,True,output_index)
ag_df

750
2020-08-02
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-08-03        7902    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  13787     3.0    4251    1067    0    2   25  39   0  21   0    0  32  0   

  spce bb  
0    8  1  
2020-08-03 - Writing File: .\data\ag\751_ag_csv.zip
2020-08-03 7902 Comments in 80.42s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, thre

2020-08-13 8664 Comments in 75.45s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-08-14        7985    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  12347     5.0    4109    1057    0    0    0  55   0  30   8    0   9  4   

  spce bb  
0    5  2  
2020-08-14 - Writing File: .\data\ag\762_ag_csv.zip
2020-08-14 7985 Comments in 75.3s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', '

2020-08-24 8408 Comments in 76.46s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-08-25        7567    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  12420    16.0    3816    1093    0    0   13  14   3  24   2    0  75  1   

  spce bb  
0   10  1  
2020-08-25 - Writing File: .\data\ag\773_ag_csv.zip
2020-08-25 7567 Comments in 75.54s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 

2020-09-04 7729 Comments in 75.37s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-09-05        5433    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  5852     9.0    2836     748    0    0   14  52   9  15   6    0  20  1   

  spce bb  
0    6  1  
2020-09-05 - Writing File: .\data\ag\784_ag_csv.zip
2020-09-05 5433 Comments in 75.23s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 't

2020-09-15 8583 Comments in 75.09s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-09-16        8569    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  40619    19.0    4364     919    0    0    4  26  16  32   0    0   5  0   

  spce bb  
0    2  2  
2020-09-16 - Writing File: .\data\ag\795_ag_csv.zip
2020-09-16 8569 Comments in 75.01s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 

2020-09-26 6794 Comments in 75.28s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-09-27        5879    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  7978     9.0    2992     641    0    0   10  35  14  15   0    0  15  0   

  spce bb  
0    6  2  
2020-09-27 - Writing File: .\data\ag\806_ag_csv.zip
2020-09-27 5879 Comments in 74.95s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 't

2020-10-07 7693 Comments in 75.32s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-10-08        7525    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs  amd gme   x amc clne nio mu  \
0  10652     8.0    3895     916    0    0   17  111  92  25   0    0  23  1   

  spce bb  
0    8  1  
2020-10-08 - Writing File: .\data\ag\817_ag_csv.zip
2020-10-08 7525 Comments in 74.76s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors'

2020-10-18 5571 Comments in 76.66s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-10-19        1676    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

  score  awards authors threads clov sofi wkhs amd gme  x amc clne nio mu  \
0  1893     0.0    1144     313    0    0    0   9   6  6   6    0  30  1   

  spce bb  
0    3  0  
2020-10-19 - Writing File: .\data\ag\828_ag_csv.zip
2020-10-19 1676 Comments in 77.09s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'thr

2020-10-29 6325 Comments in 75.53s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-10-30        5545    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  7341     3.0    2963     643    0    2    3  47   9  17   1    0  89  2   

  spce bb  
0    4  0  
2020-10-30 - Writing File: .\data\ag\839_ag_csv.zip
2020-10-30 5545 Comments in 75.45s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 't

2020-11-10 6506 Comments in 75.61s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-11-11        6142    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne  nio mu  \
0  8419     0.0    3379     750    0    1    7  27   7  36   5    0  195  0   

  spce bb  
0   22  2  
2020-11-11 - Writing File: .\data\ag\850_ag_csv.zip
2020-11-11 6142 Comments in 74.71s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 

2020-11-21 5374 Comments in 74.99s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-11-22        5418    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  17191     5.0    3006     663    0    0    7  11   7  15   1    0  98  1   

  spce bb  
0    5  2  
2020-11-22 - Writing File: .\data\ag\861_ag_csv.zip
2020-11-22 5418 Comments in 74.61s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 

2020-12-02 8995 Comments in 75.06s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-12-03        5490    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  5809     1.0    3141     913    0    1    3  16  66  17   7    0  79  1   

  spce  bb  
0    8  10  
2020-12-03 - Writing File: .\data\ag\872_ag_csv.zip
2020-12-03 5490 Comments in 76.81s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 

2020-12-13 8025 Comments in 74.97s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-12-14        9854    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  15668     5.0    5129    1159    0    8    2  20  37  34  14    1  50  0   

  spce  bb  
0   26  12  
2020-12-14 - Writing File: .\data\ag\883_ag_csv.zip
2020-12-14 9854 Comments in 74.98s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors'

2020-12-24 10416 Comments in 75.13s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2020-12-25        8342    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

  score  awards authors threads clov sofi wkhs amd gme   x amc clne nio mu  \
0  8342     0.0    4341     945    1    2    3  24  59  39   6    0  39  0   

  spce bb  
0    4  1  
2020-12-25 - Writing File: .\data\ag\894_ag_csv.zip
2020-12-25 8342 Comments in 74.99s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', '

2021-01-04 10344 Comments in 75.15s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-01-05       10912    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd gme   x amc clne  nio mu  \
0  22970    23.0    5884    1300    0    4    1  14  29  31   4    1  130  4   

  spce bb  
0    4  1  
2021-01-05 - Writing File: .\data\ag\905_ag_csv.zip
2021-01-05 10912 Comments in 74.79s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'author

2021-01-15 10766 Comments in 75.25s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-01-16        9274    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x amc clne nio mu  \
0  52362    77.0    5279    1256    1   15    3  32  321  23   7    6  61  1   

  spce  bb  
0   10  99  
2021-01-16 - Writing File: .\data\ag\916_ag_csv.zip
2021-01-16 9274 Comments in 75.27s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'autho

2021-01-28 29810 Comments in 75.04s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-01-29       25106    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

    score  awards authors threads clov sofi wkhs amd   gme   x   amc clne nio  \
0  110102   119.0   14065    2766    1   77    1  50  2230  43  2283    1  19   

  mu spce   bb  
0  0   20  569  
2021-01-29 - Writing File: .\data\ag\927_ag_csv.zip
2021-01-29 25106 Comments in 75.63s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awar

2021-02-11 41051 Comments in 75.35s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-02-12       37050    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd   gme    x  amc clne nio  \
0  53993   524.0   15713    2967    5   33    4  79  1510  111  705    3  81   

  mu spce   bb  
0  8   35  198  
2021-02-12 - Writing File: .\data\ag\938_ag_csv.zip
2021-02-12 37050 Comments in 75.44s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards

2021-02-22 27685 Comments in 75.11s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-02-23       30186    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

    score  awards authors threads clov sofi wkhs amd  gme   x  amc clne  nio  \
0  155374   219.0   12860    2008    6   59   84  86  697  60  835    0  105   

  mu spce  bb  
0  3   14  61  
2021-02-23 - Writing File: .\data\ag\949_ag_csv.zip
2021-02-23 30186 Comments in 77.96s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards',

2021-03-10 26898 Comments in 76.24s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-03-11       25793    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  48894    71.0   11398    1851    4   22    8  24  869  92  310    0  93  4   

  spce  bb  
0   12  38  
2021-03-11 - Writing File: .\data\ag\960_ag_csv.zip
2021-03-11 25793 Comments in 75.31s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'au

2021-03-30 18131 Comments in 75.29s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-03-31       17858    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio  \
0  33894   115.0    7787    1286    3    8    8  26  350  52  123    4  30   

   mu spce  bb  
0  11    7  70  
2021-03-31 - Writing File: .\data\ag\971_ag_csv.zip
2021-03-31 17858 Comments in 75.85s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', '

2021-04-13 2241 Comments in 74.76s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-04-14       16514    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  43740   136.0    6771    1078    1   11    2  30  289  44  107    1  15  1   

  spce  bb  
0    8  19  
2021-04-14 - Writing File: .\data\ag\982_ag_csv.zip
2021-04-14 16514 Comments in 75.26s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'aut

2021-04-24 10176 Comments in 76.5s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-04-25        9744    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x amc clne nio mu  \
0  24233    50.0    4578     817    5    2    1  37  140  31  80    1  29  3   

  spce  bb  
0    7  12  
2021-04-25 - Writing File: .\data\ag\993_ag_csv.zip
2021-04-25 9744 Comments in 75.63s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'author

2021-05-05 12778 Comments in 75.04s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-05-06       13310    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  64608   120.0    6179    1032   11   24    2  33  207  35  128    2   9  1   

  spce  bb  
0    6  18  
2021-05-06 - Writing File: .\data\ag\1004_ag_csv.zip
2021-05-06 13310 Comments in 74.94s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'a

2021-05-16 10848 Comments in 74.78s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-05-17       13188    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  73295   120.0    6419     947   27   19    3  19  221  38  251    7  29  2   

  spce  bb  
0    5  17  
2021-05-17 - Writing File: .\data\ag\1015_ag_csv.zip
2021-05-17 13188 Comments in 75.56s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'a

2021-05-27 12927 Comments in 74.98s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-05-28       11368    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  44107   120.0    5534     999    3   23    1  65  368  31  732    0  11  0   

  spce  bb  
0   23  96  
2021-05-28 - Writing File: .\data\ag\1026_ag_csv.zip
2021-05-28 11368 Comments in 75.56s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'a

2021-06-07 12347 Comments in 74.99s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-06-08       12737    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  63689    86.0    6252    1129  352   20   35  30  594  41  616  105  18  1   

  spce   bb  
0    8  140  
2021-06-08 - Writing File: .\data\ag\1037_ag_csv.zip
2021-06-08 12737 Comments in 74.86s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 

2021-06-18 11534 Comments in 75.16s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-06-19        8416    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  41635    60.0    4538     864   36   18   27  33  154  36  204   28   8  3   

  spce  bb  
0    1  56  
2021-06-19 - Writing File: .\data\ag\1048_ag_csv.zip
2021-06-19 8416 Comments in 74.83s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'au

2021-06-29 11201 Comments in 74.93s
Empty DataFrame
Columns: [date, total_posts, tfidf, score, awards, authors, threads, clov, sofi, wkhs, amd, gme, x, amc, clne, nio, mu, spce, bb]
Index: []
['date', 'total_posts', 'tfidf', 'score', 'awards', 'authors', 'threads', 'clov', 'sofi', 'wkhs', 'amd', 'gme', 'x', 'amc', 'clne', 'nio', 'mu', 'spce', 'bb']
         date total_posts                                              tfidf  \
0  2021-06-30        3998    (0, 418755)\t0.9155295562090381\n  (0, 66200...   

   score  awards authors threads clov sofi wkhs amd  gme   x  amc clne nio mu  \
0  19233    30.0    2207     406   17   26    9   3  109  10  155    1   7  1   

  spce bb  
0    2  6  
2021-06-30 - Writing File: .\data\ag\1059_ag_csv.zip
2021-06-30 3998 Comments in 74.52s
Aggregation Complete!


Unnamed: 0,date,total_posts,tfidf,score,awards,authors,threads,clov,sofi,wkhs,amd,gme,x,amc,clne,nio,mu,spce,bb
0,2021-06-30,3998,"(0, 418755)\t0.9155295562090381\n (0, 66200...",19233,30.0,2207,406,17,26,9,3,109,10,155,1,7,1,2,6
