### Twitter Dev
1. Connect to redis data store
2. Build dataframe from reading redids datastore
3. Get ticker list
4. create attribute filter, setup stream and log
5. Setup log schedule
6. Run stream

In [1]:
#Imports
import redis
import urllib.request
import json
import numpy as np
import pandas as pd
import schedule
import time
import boto3
from datetime import date, datetime
import traceback
import os

#Twitter requirements
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API

#Log Requirements
import mysql.connector
from sqlalchemy import create_engine
from sqlalchemy import inspect
from sqlalchemy import MetaData
from sqlalchemy import Table

kinesis = boto3.client('kinesis', region_name='us-east-1')


#Connect to Redis-DataStore
REDIS = redis.Redis(host='data_store')

In [2]:
#Get Environment Variables
# ACCESS_TOKEN = %env ACCESS_TOKEN
# ACCESS_TOKEN_SECRET = %env ACCESS_TOKEN_SECRET
# CONSUMER_KEY = %env CONSUMER_KEY
# CONSUMER_SECRET = %env CONSUMER_SECRET

ACCESS_TOKEN = os.environ['ACCESS_TOKEN'] 
ACCESS_TOKEN_SECRET = os.environ['ACCESS_TOKEN_SECRET']
CONSUMER_KEY = os.environ['CONSUMER_KEY']
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']


#### Build Company DataFrame

In [3]:
companies = json.loads(REDIS.get('companies').decode())
company_df = pd.DataFrame.from_dict(companies, orient='index')
company_df.index.name = 'Ticker'
company_df.columns=['Company']
#Add code to add ticker symbol
company_df['tweet_ticker']=company_df.index.map(lambda x: '$'+x)

company_df.head()

Unnamed: 0_level_0,Company,tweet_ticker
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,Apple,$AAPL
FB,Facebook,$FB
GOOG,Google Alphabet C,$GOOG
GOOGL,Google Alphabet A,$GOOGL
AMZN,Amazon,$AMZN


In [4]:
tickers = company_df['tweet_ticker'].tolist()

In [5]:
#Want to create a simple list, but how do I handle the users sub?
attributes = ['created_at',
             'id_str',
             'text',
              'quote_count',
              'reply_count',
              'retweet_count',
              'favorite_count',
              'retweeted',
              'lang',
              ['user','name'],
              ['user','followers_count'],
              ['user','statuses_count'],
              ['user','screen_name']
               ]
def filter_attr(data):
    output = {}
    #Choose filter attributes
    for element in attributes:
        if isinstance(element, str):
            output[element]=data[element]
        #Handle Nested Attributes
        else:
            string = str(element[0])+'_'+str(element[1])
            output[string]=data[element[0]][element[1]]
  
    #Need to also add the company name to output dictionary.
    #Add all companies tweet applies to in list
    attached_company = []
    
    for company in tickers:
        if data['text'].find(company) > -1:
            attached_company.append(company[1:])
            
    
    output['Company']=attached_company             
    
    return output

#This is a basic listener that just prints received tweets to stdout.
class TweetListener(StreamListener):
    
    def on_data(self, data):
        try:
            if int(REDIS.get('Data_On')) == 1:
                datajson = json.loads(data)
                filtered = filter_attr(datajson)
                #Check to see if a valid tweet
                if filtered['Company'] and filtered['lang']=='en':

                    print(filtered)
                    #Add counter to count stocks. 
                    REDIS.incr('Twitter_Stock_Count')
                    #---------- Insert to Kinesis Stream --------------
                    #response = kinesis.put_record(StreamName="Twitter_Stream", Data=json.dumps(filtered), PartitionKey="partitionkey")
                    return True
            
        except Exception as e:
            print(e)

    def on_error(self, status):
        error_string = 'The error code is: ' + repr(status)
        print(error_string)
        #Continue even if there is an error
        #Need to publish the error to the redies error handler in manager
        #Send Start event
        send_event('Twitter', 'Error', error_string)
        #Need to think about if I want to continue running
        return True


#Setup Log

global past_tweet_count
past_tweet_count = 0



#Serialize datetime.
def json_serial(obj):
    """JSON serializer for objects not serializable by default json code"""

    if isinstance(obj, (datetime, date)):
        return obj.isoformat()
    raise TypeError ("Type %s not serializable" % type(obj))
    


#Code to log to the event queue
def send_event(source, kind, message):
    event_time = datetime.now()
    event_time = json_serial(event_time)
    event = {
            "event_time": event_time,
            "source": source,
            "kind" : kind,
            "message" : message
            }
    payload = json.dumps(event)
    REDIS.publish('event_queue', payload)

def send_log(source, current_count, count_diff):
    log_time = datetime.now()
    log_time = json_serial(log_time)
    log = {
            "log_time": log_time,
            "source": source,
            "current_count" : current_count,
            "count_diff" : count_diff
            }
    payload = json.dumps(log)
    REDIS.publish('log_queue', payload)


#Send the log data to the Redis channel.
def log():
    #Need to log: Time, Source, Current Count, Count Diff
    #now = datetime.datetime.now()
    current_tweet_count = int(REDIS.get('Twitter_Stock_Count'))    
     
    global past_tweet_count
    tweet_count_diff = current_tweet_count - past_tweet_count
    past_tweet_count = current_tweet_count
    
    #Send the log event
    send_log(source='Twitter',current_count = current_tweet_count, count_diff=tweet_count_diff)    
    print('Logged Data')


In [6]:
REDIS.set('Twitter_Stock_Count', 0)

#This handles Twitter authetication and the connection to Twitter Streaming API
tweetlist = TweetListener(api=API(wait_on_rate_limit=True,wait_on_rate_limit_notify=True))
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN,ACCESS_TOKEN_SECRET)
stream = Stream(auth, tweetlist)

#Filters by the ticker names
print('Filtering: ' + str(tickers))
stream.filter(track=tickers, async=True)

#Send Start event
send_event('Twitter', 'Activity', 'Data Source Started')

#Setup Schedule
schedule.clear()
schedule.every(30).seconds.do(log)

#Execute
while True:
    schedule.run_pending()
    #Cancel Schedule if an error occurs. and stop this loop. 
    time.sleep(1)

Filtering: ['$AAPL', '$FB', '$GOOG', '$GOOGL', '$AMZN', '$MSFT', '$BAC', '$BA', '$NFLX', '$JPM', '$TSLA', '$CSCO', '$XOM', '$WFC', '$V', '$JNJ', '$PFE', '$INTC', '$HD', '$C', '$UTX', '$CMCSA', '$TWTR', '$GE', '$UNH', '$PCLN', '$BABA', '$NVDA']
{'created_at': 'Mon Feb 19 19:43:05 +0000 2018', 'id_str': '965673111315189760', 'text': 'RT @MattBurns25: Weekly Stock Market Review: Top Tech Stock Picks For This Week \nhttps://t.co/M0blYSIPEm\n$AAPL $AMD $NVDA $TSLA $WMT $FB $M…', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'MattBurns', 'user_followers_count': 2806, 'user_statuses_count': 4091, 'user_screen_name': 'MattBurns25', 'Company': ['AAPL', 'FB', 'TSLA', 'NVDA']}
{'created_at': 'Mon Feb 19 19:43:20 +0000 2018', 'id_str': '965673172589842433', 'text': 'RT @BertiePelham: Me, nervous: “For the remainder of Q1, and into Q2 if need be, I would humbly ask you, dear $TWTR Board Members, if you—a…', 'quote_count':

{'created_at': 'Mon Feb 19 19:45:23 +0000 2018', 'id_str': '965673689705598977', 'text': 'New Post! 🚨BOOM🚨\n👇\nAfter massive growth streak, Amazon ambitions seem boundless? $AMZN https://t.co/Oxv12frelu Filed In #AFP.', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'G. Hedged', 'user_followers_count': 4807, 'user_statuses_count': 7883, 'user_screen_name': 'GloballyHedged', 'Company': ['AMZN']}
Logged Data
{'created_at': 'Mon Feb 19 19:45:40 +0000 2018', 'id_str': '965673758433464322', 'text': 'Torn between news sites? Read financial news on just 1 page $MAT $PAY $FB https://t.co/V46bM7veJM https://t.co/QIZtPqAYEw', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'FinBuzz', 'user_followers_count': 1161, 'user_statuses_count': 217651, 'user_screen_name': 'PortfolioBuzz', 'Company': ['FB']}


KeyboardInterrupt: 

{'created_at': 'Mon Feb 19 19:46:02 +0000 2018', 'id_str': '965673852729724928', 'text': 'Intel Corporation (ITLC34) Jumps 1.06% on Feb 17 https://t.co/nrprIzc8Wf $INTC #stocks #stockmarket', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'StockMarketScoop', 'user_followers_count': 1553, 'user_statuses_count': 892, 'user_screen_name': 'StockScoops', 'Company': ['INTC']}
{'created_at': 'Mon Feb 19 19:46:18 +0000 2018', 'id_str': '965673919649783808', 'text': '$FB : a455e383-6655-47fd-b470-3d49702aa863', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'Test Account1', 'user_followers_count': 225, 'user_statuses_count': 9692974, 'user_screen_name': 'test5f1798', 'Company': ['FB']}
{'created_at': 'Mon Feb 19 19:47:14 +0000 2018', 'id_str': '965674155835142144', 'text': '@TopTickrs Chat with traders on $TWTR is great, no need other platform

{'created_at': 'Mon Feb 19 19:56:02 +0000 2018', 'id_str': '965676369802874883', 'text': 'LEVERAGE VOLATILITY! EXPLORE STRATEGIES IN OUR GOAL ORIENTED CHAT ROOM AT https://t.co/35LuLR0k4f $APRN $TSLA $BTSC', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'TopTickers', 'user_followers_count': 3821, 'user_statuses_count': 93155, 'user_screen_name': 'TopTickrs', 'Company': ['TSLA']}
{'created_at': 'Mon Feb 19 19:56:04 +0000 2018', 'id_str': '965676375880470529', 'text': 'Facebook broke German privacy laws https://t.co/vhLi7WMtnw $FB https://t.co/IWRUjczsbz', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'Jörn Menninger', 'user_followers_count': 1414, 'user_statuses_count': 21390, 'user_screen_name': 'JoeMenninger', 'Company': ['FB']}
{'created_at': 'Mon Feb 19 19:56:32 +0000 2018', 'id_str': '965676493501292545', 'text': 'Screen through

{'created_at': 'Mon Feb 19 20:01:34 +0000 2018', 'id_str': '965677762030784512', 'text': '$FB is run by a man who has dictatorial control of his public company due to super voting shares *for life*, has se… https://t.co/zI8On4dxBT', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'The Rational Walk', 'user_followers_count': 7129, 'user_statuses_count': 3238, 'user_screen_name': 'rationalwalk', 'Company': ['FB']}
{'created_at': 'Mon Feb 19 20:01:37 +0000 2018', 'id_str': '965677775175782405', 'text': '@FoxBusiness → https://t.co/CuP9rxeekQ Story is $JNJ steals #diabetes tech from $DECN small Co. then tries 2 BKR w/… https://t.co/T7VF0sBwXj', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'Corie Ruption', 'user_followers_count': 9, 'user_statuses_count': 1111, 'user_screen_name': 'usconsumerscare', 'Company': ['JNJ']}
{'created_at': 'Mon

{'created_at': 'Mon Feb 19 20:07:40 +0000 2018', 'id_str': '965679296416178176', 'text': 'RT @JTCampanella: $TWTR Twitter Shorts are like... https://t.co/1WJpE8E0G5', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'Hken', 'user_followers_count': 9, 'user_statuses_count': 16, 'user_screen_name': 'HkenBSGO', 'Company': ['TWTR']}
{'created_at': 'Mon Feb 19 20:08:02 +0000 2018', 'id_str': '965679390150529025', 'text': 'FIND YOUR EDGE! TALK STOCKS IN OUR REAL TIME CHAT ROOM AT https://t.co/35LuLR0k4f $NVDA $COST $MARA', 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'retweeted': False, 'lang': 'en', 'user_name': 'TopTickers', 'user_followers_count': 3821, 'user_statuses_count': 93169, 'user_screen_name': 'TopTickrs', 'Company': ['C', 'NVDA']}
{'created_at': 'Mon Feb 19 20:08:03 +0000 2018', 'id_str': '965679394617548800', 'text': 'RT @HealthInnovSAS: $NVDA The Doctor Is In: AI-Powered 