# Twitter Miner Notebook with File Handling

### Setting environment variables

In [None]:
from dotenv import load_dotenv
load_dotenv()

# OAuth keys
CONSUMER_KEY = os.environ.get('CONSUMER_KEY')
CONSUMER_SECRET = os.environ.get('CONSUMER_SECRET')
ACCESS_TOKEN = os.environ.get('ACCESS_TOKEN')
ACCESS_TOKEN_SECRET = os.environ.get('ACCESS_TOKEN_SECRET')


### Defining Keywords

In [None]:
keywords = ['bieber']

### Defining a timer function for the thread

In [None]:
import threading
import sys
import os
import time

def count_time( threadName, delay, limit):
    count = 0
    while count < limit:
        time.sleep(delay)
        count += 1
        print ("%s: %s" % ( threadName, time.ctime(time.time()) )  )
    print('finished ', threadName)
    #os._exit(0)

### Creating class for the Streaming

In [None]:
class TwitterListener(StreamListener):
    def __init__(self):
        super().__init__()
        self.counter = 0
        self.limit = 100
        
        #using multithread on the object construction
        #threading.Thread(target = count_time,
            #args= ("Time Counter", 1, 5)
        #).start()
        
    def on_data(self,data):
        
        tweet = json.loads(data)

        created_at = tweet["created_at"]
        id_str = tweet["id_str"]
        text = tweet["text"]
        
        user_id = tweet["user"]["id_str"]
        user_name = tweet["user"]["name"]
        screen_name = tweet["user"]["screen_name"]
        user_created_at = tweet["user"]["created_at"]
        
        obj = {
            "message_created_at":time.mktime(time.strptime( created_at,"%a %b %d %H:%M:%S +0000 %Y" )),
            "message_id":id_str,
            "message_text":text,
            "author_user_id": user_id,
            "author_created_at": time.mktime(time.strptime( user_created_at,"%a %b %d %H:%M:%S +0000 %Y" )),
            "author_user_name": user_name,
            "author_screen_name": screen_name
        }
        
        #print('🐦', end='')

        #insert line
        
        #Tweet limitation counter
        self.counter += 1
        if self.counter < self.limit:
            return True
        else:
            return False
        
        return True
    
    def on_error(self, status):
        print('error code: ', status)
        return False

### OAuth Implementation

In [None]:
from tweepy.streaming import StreamListener
from tweepy.auth import OAuthHandler
from tweepy import Stream

auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

### Initializing Stream

In [None]:
twitter_listener = TwitterListener()
twitter_stream = Stream(auth, listener = twitter_listener)

In [None]:
print('Start stream')

twitter_stream.filter(track = keywords, is_async = True)

time.sleep(30)

twitter_stream.disconnect()

print('Close stream')

### Saving to from MongoDB to .tsv file.

In [None]:
import json
import csv
from bson.json_util import dumps

with open('output.tsv', 'w') as output_file:
    dw = csv.DictWriter(
        output_file,
        sorted( json_data[0].keys() ),
        delimiter='\t')
    
    dw.writeheader()
    dw.writerows(json_values)

In [None]:
import pandas as pd

single_obj = """[{
"message_created_at" : 1557088516,
"message_id" : "1125091606351183872",
"message_text" : "RT @felipeneto: Justin Bieber postou um texto enaltecendo Chris Brown e dizendo q as pessoas devem parar de julgá-lo por “um erro”.\n\nEspanc…",
"author_user_id" : "490564806",
"author_created_at" : 1329075905,
"author_user_name" : "Ni",
"author_screen_name" : "nilove__"
}]"""

df = pd.read_json(single_obj)
df.head()


In [None]:
another_obj = """[{
"message_created_at" : 1557088519,
"message_id" : "1125091618921619456",
"message_text" : "RT @alph_salazar: Justin Bieber sobre Chris Brown:\n\"Dejaron de ver su talento por un error que cometió”.\n\nSu \"error\" fue desfigurarle la ca…",
"author_user_id" : "1090341546061381632",
"author_created_at" : 1548799856,
"author_user_name" : "@fiore",
"author_screen_name" : "fiore24327802"
}]"""

df = pd.read_json(another_obj)
df.head()

