# Initialize and cleaning of play-by-play data

## Load the data

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy
import urllib.request
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib as mpl
from scipy.interpolate import make_interp_spline, BSpline
from adjustText import adjust_text

%config InlineBackend.figure_format = 'retina'

In [None]:
#I hardcode team colors because I'm a rube
colors = {
    'ARI':'#97233F',
    'ATL':'#A71930',
    'BAL':'#241773',
    'BUF':'#00338D',
    'CAR':'#0085CA',
    'CHI':'#C83803',
    'CIN':'#FB4F14',
    'CLE':'#311D00',
    'DAL':'#041E42',
    'DEN':'#FB4F14',
    'DET':'#0076B6',
    'GB' :'#203731',
    'HOU':'#03202F',
    'IND':'#002C5F',
    'JAX':'#006778',
    'KC' :'#E31837',
    'LAC':'#0080C6',
    'LAR':'#866D4B',
    'MIA':'#008E97',
    'MIN':'#4F2683',
    'NE' :'#002244',
    'NO' :'#D3BC8D',
    'NYG':'#0B2265',
    'NYJ':'#125740',
    'OAK':'#A5ACAF',
    'PHI':'#004C54',
    'PIT':'#FFB612',
    'SEA':'#69BE28',
    'SF' :'#AA0000',
    'TB' :'#D50A0A',
    'TEN':'#0C2340',
    'WAS':'#773141'
}

kc_p = '#E31837'
kc_s = '#FFB81C'

In [None]:
#load in data
data = pd.read_csv('clean_pbp_2019_wk19.csv')

In [None]:
#Get all Derrick Henry runs
henry_games = data.loc[(data['posteam']=='TEN') & (data['rusher_player_name']=='D.Henry')]
#Get a list of the game id for each run
henry_game_list = henry_games['alt_game_id'].unique()

In [None]:
#Loop through the games, identifying which run of the game that run was
game_runs = pd.DataFrame()
for game in henry_game_list:
    df = henry_games.loc[(henry_games['alt_game_id']==game)]
    df.reset_index(inplace=True)
    df.reset_index(inplace=True)
    df.rename(columns={'level_0':'run_attempt_count'},inplace=True)
    game_runs = pd.concat((game_runs,df))
    
#group by run attempt numbers    
henry = game_runs.groupby(by='run_attempt_count')['epa','yards_gained','success'].mean()
henry['n_games'] = game_runs.groupby(by='run_attempt_count')['epa'].count()
henry

In [None]:
#Plot
plt.style.use('seaborn-talk')
plt.style.use('fivethirtyeight')

x=henry.index
y=henry.epa
y2=henry.yards_gained
y3=henry.success
size=henry.n_games
plt.scatter(x,y,s=size*20, c=colors['TEN'], label='# of carries at that attempt number' )
plt.title('Does Derrick Henry get "in a rhythm"?')
plt.ylabel('Expected Points Added Per Carry')
plt.xlabel('Rush Attempt Number')
plt.legend()
txt= "Data: nflscrapR | Chart: Ethan Douglas @chiefsanalytics"
plt.figtext(0.1, -0.05, txt, wrap=True, horizontalalignment='left', fontsize=12)
plt.legend(fontsize=12)

In [None]:
plt.scatter(x,y2,s=size*20, c='#4b92db', label='# of carries at that attempt number' )
plt.title('Does Derrick Henry get "in a rhythm"?')
plt.ylabel('Yards Per Carry')
plt.xlabel('Rush Attempt Number')
plt.legend()
txt= "Data: nflscrapR | Chart: Ethan Douglas @chiefsanalytics"
plt.figtext(0.1, -0.05, txt, wrap=True, horizontalalignment='left', fontsize=12)
plt.legend(fontsize=12)