In [35]:
import numpy as np
import pandas as pd
import requests
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

pd.set_option('display.max_columns', None)

# Set up the pandas dataframe
gt_play_data = pd.DataFrame()

# Query the API for the CPJ data. We only need to do this once, so it's commented out.
# for year in range(2005,2018):
#     response = requests.get("https://api.collegefootballdata.com/plays?seasonType=both&year={0}&offense=Georgia Tech".format(year))
#     df = pd.io.json.json_normalize(response.json())
#     gt_play_data = pd.concat([gt_play_data,df])

# Data is exported to a CSV, which we will be pulling from now
# gt_play_data.to_csv('./data/plays/GeorgiaTech.csv')

data = pd.read_csv('./data/plays/GeorgiaTech.csv')[['home','away','offense_score','defense_score','period','clock.minutes','clock.seconds','yards_to_goal','down','distance','play_type']]

# Is GT playing at home?
data['is_home'] = np.where(data['home'] == 'Georgia Tech', 1, 0)

# Convert this to a single column
data['seconds_remaining'] = (data['clock.minutes'] * 60) + data['clock.seconds'] + ((data['clock.minutes'] + (4 - data['period']) * 15) * 60)

# I want both the result, and the playcall
pass_types = ['Pass Reception', 'Pass Interception Return', 'Pass Incompletion', 'Sack', 'Passing Touchdown', 'Interception Return Touchdown']
rush_types = ['Rush', 'Rushing Touchdown']
punt_types = ['Punt', 'Punt Return Touchdown', 'Blocked Punt', 'Blocked Punt Touchdown']
fg_types = ['Field Goal Good', 'Field Goal Missed', 'Blocked Field Goal']

def getPlayCall(x):
    if x in pass_types:
            return 'pass'
    elif x in rush_types:
        return 'rush'
    elif x in punt_types:
        return 'punt'
    elif x in fg_types:
        return 'fg'
    else:
        return None
        
data['play_call'] = data['play_type'].apply(getPlayCall)
data.dropna(subset=['play_call'], inplace=True)

plays = data[['offense_score', 'defense_score', 'period', 'yards_to_goal', 'down', 'distance', 'is_home', 'seconds_remaining', 'play_call']].query('period <= 4')

third_down_plays = plays[plays.down.eq(3)]
third_down_plays.head()

Unnamed: 0,offense_score,defense_score,period,yards_to_goal,down,distance,is_home,seconds_remaining,play_call
5,0,0,1,57,3,2,0,4383,rush
43,10,7,2,68,3,1,0,2559,rush
50,10,7,2,16,3,3,0,2199,rush
64,17,14,3,57,3,10,0,1860,rush
76,17,14,4,37,3,1,0,1720,rush


In [36]:
fig = px.scatter(
    plays,
    x='seconds_remaining',
    y='yards_to_goal',
    color='play_call',
    trendline='ols'
)
fig.show()