In [40]:
import pandas as pd
import numpy as np
from time import sleep
from datetime import date, timedelta
import twitter
import pickle

from scipy.stats import randint as sp_randint

from config import REDSHIFT_CONFIG
from src.features import *
from src.utils import *
from src.validation import *
from src.exploration import *
import src.realtime as rt

from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import RandomizedSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn import svm

pd.set_option('display.max_columns', 500)

In [38]:
# The four inputs
pitcher = 434378
pitcher_name = "Justin Verlander"
gid = "gid_2015_08_15_detmlb_houmlb_1"
tweeting = False

# Establish a connection to the redshift database
conn = create_rs_conn(config=REDSHIFT_CONFIG)
cur = conn.cursor()

# Connect to Twitter
with open('twitter_auth.pkl', 'rb') as pkl_file:
    twitter_auth_dict = pickle.load(pkl_file)

api = twitter.Api(consumer_key = twitter_auth_dict['consumer_key'],
                      consumer_secret = twitter_auth_dict['consumer_secret'],
                      access_token_key = twitter_auth_dict['access_token_key'],
                      access_token_secret = twitter_auth_dict['access_token_secret'])

In [37]:
#Get the historical pitcher info from redshift and store it
raw_query = """SELECT * FROM all_pitch_data \
WHERE game_id IN \
(SELECT DISTINCT game_id FROM all_pitch_data \
WHERE pitcher = %d)
""" % pitcher

sample_header, sample_rows = run_rs_query(cur, raw_query)
pitch_df = pd.DataFrame(sample_rows)
pitch_df.columns = sample_header    

In [41]:
# Process GID information for realtime scraping
gid_components = gid.split("_")
game_date = "-".join(gid_components[1:4])
game_id = "/".join(gid_components[1:4]) + "/" + "-".join(gid_components[4:7])

teams = [x[0:3].upper() for x in gid_components[4:6]]
start_string = 'Tweeting predictions for ' + pitcher_name + '\'s pitches during today\'s game between ' + str(teams[0]) + ' and ' + str(teams[1]) + '.'

print start_string
if tweeting == True:
    api.PostUpdate(start_string)

# Set up object to make multiple predictions
pm = rt.Prediction_Machine(pitcher, pitch_df, game_date)

# Every three seconds, take a look at the server
# If there are new pitches, then load the whole game's information into the object
# Make predictions for all pitches, and report pitches that haven't been reported before

last_id = 0
preds_made = 0

for i in range(0,10000):
    print "Gettin' data!"

    # Peek at the server
    data = rt.AtBats(gid, game_id).save()

    new_data = data[data['id'].astype(int) > last_id]
    
    next_pitch = new_data.head(1)

    # No pitches since the last peek
    if next_pitch.empty:
        print "No new pitches yet"
        
    # There are new pitches
    else:
        new_id = int(next_pitch.id)

        if new_id > last_id:
            
            last_id = new_id
            
            # Make predictions on all pitches
            new_preds, new_targets = pm.get_new_pred(data)
                        
            if len(new_preds) > preds_made:
                
                # For each unreported pitch, report it
                for i in range(preds_made, len(new_preds)):
                    outcome = "right" if new_preds[i] == new_targets.values[i] else "wrong"
                    result_string = "Pitch " + str(i + 1) +": We'd have guessed " + \
                                    new_preds[i] + " and the answer was " + \
                                    new_targets.values[i] + " so we were " + outcome
                    
                    if tweeting == True:
                        api.PostUpdate(result_string)
                    print result_string
                    sleep(1)
                
                preds_made = len(new_preds)

    sleep(3) # No DOS attacks on MLB

Tweeting predictions for Justin Verlander's pitches during today's game between DET and HOU.
Gettin' data!
Pitch 1: We'd have guessed Fastball and the answer was Not_Fastball so we were wrong
Pitch 2: We'd have guessed Not_Fastball and the answer was Fastball so we were wrong
Pitch 3: We'd have guessed Fastball and the answer was Fastball so we were right
Pitch 4: We'd have guessed Fastball and the answer was Fastball so we were right
Pitch 5: We'd have guessed Fastball and the answer was Fastball so we were right
Pitch 6: We'd have guessed Fastball and the answer was Fastball so we were right
Pitch 7: We'd have guessed Fastball and the answer was Not_Fastball so we were wrong
Pitch 8: We'd have guessed Fastball and the answer was Not_Fastball so we were wrong
Pitch 9: We'd have guessed Not_Fastball and the answer was Not_Fastball so we were right
Pitch 10: We'd have guessed Not_Fastball and the answer was Fastball so we were wrong
Pitch 11: We'd have guessed Fastball and the answer wa

KeyboardInterrupt: 