# concat complete tweets into monthly, quarterly, and yearly sections

In [1]:
from searchtweets import ResultStream, gen_rule_payload, load_credentials, collect_results

# general imports
import numpy as np
import pandas as pd
import subprocess
from textblob import TextBlob
import re
import time
import datetime

# plotting and visualization
import matplotlib
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
%matplotlib inline

In [31]:
def runCmd(cmd, run=False):
    '''
    use stdout to run command and shell=True to allow command to be entered as a string
    use rstrip() to remove carriage return and decode binary form to string. 
     '''
    if run == False:
        try:
            output_cmd = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
            output_cmd = output_cmd.rstrip().decode('UTF-8')
        except subprocess.CalledProccessError as e:
            print("Command", e.cmd, "returned with error code", e.returncode)
            print("setting output_cmd to empty array")
            output_cmd = []
        return output_cmd
    else:
        subprocess.Popen(cmd, stderr=subprocess.STDOUT, shell=True)
        return None

def S2_from_csv(filename_A, filename_B):
    # assumption, Tweets file ends with 'Tweets.csv'
    filename_tweets, filename_meta = '', ''
    if 'Tweets.csv' in filename_A:
        filename_tweets, filename_meta = filename_A, filename_B
    else:
        filename_tweets, filename_meta = filename_B, filename_A
    S2_tweets = pd.read_csv(filename_tweets, encoding='latin')
    S2_meta = pd.read_csv(filename_meta, parse_dates=['date'], encoding='latin')
    S2_internal = pd.concat([S2_tweets, S2_meta], axis=1)
    return S2_internal

In [57]:
current_dir = ['complete_tweets/' + x for x in runCmd('ls complete_tweets/').split('\n') if '.csv' in x]
print("Data files available to load")
for i, x in enumerate(current_dir):
    print(str(i), " ", x)
    
value_selection = input("\nPlease input tuple of file numbers separated by a space, Eg (2,3) (4,5) (9,10) ")
value_selection_regex = re.findall('\((\d+),(\d+)\)', value_selection)

S2 = pd.DataFrame()
for select_a, select_b in value_selection_regex:
    print('concatenating', current_dir[int(select_a)], 'and', current_dir[int(select_b)])
    S2 = pd.concat([S2, S2_from_csv(current_dir[int(select_a)], current_dir[int(select_b)])], axis=0)

Data files available to load
0   complete_tweets/complete_tweets_2018_10.csv
1   complete_tweets/complete_tweets_2018_11.csv
2   complete_tweets/complete_tweets_2018_12.csv
3   complete_tweets/tweets_2018-0916_2018-0930_Metadata.csv
4   complete_tweets/tweets_2018-0916_2018-0930_Tweets.csv
5   complete_tweets/tweets_2018-10-01_2018-10-05_Metadata.csv
6   complete_tweets/tweets_2018-10-01_2018-10-05_Tweets.csv
7   complete_tweets/tweets_2018-10-06_2018-10-10_Metadata.csv
8   complete_tweets/tweets_2018-10-06_2018-10-10_Tweets.csv
9   complete_tweets/tweets_2018-10-11_2018-10-15_Metadata.csv
10   complete_tweets/tweets_2018-10-11_2018-10-15_Tweets.csv
11   complete_tweets/tweets_2018-10-16_2018-10-31_Metadata.csv
12   complete_tweets/tweets_2018-10-16_2018-10-31_Tweets.csv
13   complete_tweets/tweets_2018-11-01_2018-11-05_Metadata.csv
14   complete_tweets/tweets_2018-11-01_2018-11-05_Tweets.csv
15   complete_tweets/tweets_2018-11-01_2018-11-14_Metadata.csv
16   complete_tweets/tweets_201

In [58]:
S2.tail()

Unnamed: 0,tweets,date,user_name,user_screen_name,user_followers,user_friends,user_verified,user_language,retweet_count,favorite_count
15995,"Bitcoin Falls Below $3,900 Dragging Top Crypto...",2018-12-31 11:46:20,Smartereum,Smartereum,59759,79476,False,en,0,2
15996,RT @chodza35: #Bitcoin mining by a market lead...,2018-12-31 11:46:17,Iam_ElvisChoruma,chodza35,576,2529,False,en,0,0
15997,RT @JoseAlNino: You:\n\nSupports central banki...,2018-12-31 11:46:17,liberated,randleman1,69,241,False,en,0,0
15998,I'm against gold market\n#forexsignal #forex #...,2018-12-31 11:46:15,Sonha,SnVuinh1,15,4,False,en,0,1
15999,RT @jordanbpeterson: Time magazine praises Bit...,2018-12-31 11:46:14,Paalsky [ÅTC/BSV/XRP],paalsky,823,468,False,no,0,0


In [59]:
output_filename = input("what would you like to save these concatenated files as? ")
S2.to_csv('complete_tweets/' + output_filename + '.csv', index=False)

what would you like to save these concatenated files as? complete_tweets_2018_10-12
