In [176]:
import os
import requests
import numpy as np
import pandas as pd

from os.path import join
from scipy.stats import gaussian_kde
from random import shuffle

from bokeh.io import output_notebook, show, reset_output
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.palettes import Accent5, Category20, inferno
from bokeh.models import ColumnDataSource

pd.set_option('display.max_columns', None)

In [2]:
output_notebook()

In [3]:
def hist(data, attribute, group, name=None):
    source = ColumnDataSource(data)

    x = np.linspace(0, data[attribute].max(), data[attribute].max())
    pdf = gaussian_kde(data[attribute])

    plot = figure(
        title=name,
        x_axis_label=attribute
    )

    hist, edges = np.histogram(
        data[attribute], density=True, bins=12
    )
    plot.quad(
        top=hist,
        bottom=0,
        left=edges[:-1],
        right=edges[1:],
        alpha=0.4
    )

    plot.line(x, pdf(x))

    show(plot)
    reset_output()

In [4]:
def hbar(data, attribute, index, category, name=None):
    source = data.sort_values(by=attribute, ascending=True)
    cmap = factor_cmap(
        category,
        palette=Accent5,
        factors=sorted(source[category].unique())
    )

    plot = figure(
        y_range=source[index],
        title=name,
        x_axis_label=attribute,
        tooltips=[(attribute.replace('_', ' '), f'@{attribute}')]
    )
    plot.hbar(
        y=index, right=attribute, height=0.8, source=source,
        fill_color=cmap, line_color=cmap
    )

    show(plot)
    reset_output()

In [5]:
def vbar(data, attribute, index, category, name=None):
    source = ColumnDataSource(data)
    print(type(source))
#     cmap = factor_cmap(
#         category,
#         palette=Accent5,
#         factors=data[category].unique()
#     )

    plot = figure(
        x_range=data[index],
        title=name,
        y_axis_label=attribute
    )
    plot.vbar(
        x=index, top=attribute, width=1.0, source=source
    )

    show(plot)

In [6]:
espn = pd.read_csv('data/espn_qbr.csv')
number_of_weeks = range(1, espn['week'].max() + 1)
score_to_beat = 60

In [7]:
espn.loc[:, 'is_shit'] = espn.apply(
    lambda x: 'Decent' if x['raw_qbr'] >= score_to_beat
    else 'Dog Shit', axis=1
)

hbar(espn, 'raw_qbr', 'player', 'is_shit')

In [9]:
pfr = pd.read_csv('data/pfr_qbr.csv')
number_of_weeks = range(1, pfr['week'].max() + 1)

In [10]:
for week in number_of_weeks:
    reduced = pfr[pfr['week'] == week]
    reduced['is_shit'] = reduced.apply(
        lambda x: 'Decent' if x['passer_rating'] >= score_to_beat
        else 'Dog Shit', axis=1
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [11]:
for week in number_of_weeks:
    reduced = espn[espn['week'] == week]
    reduced['is_shit'] = reduced.apply(
        lambda x: 'Decent' if x['raw_qbr'] >= score_to_beat
        else 'Dog Shit', axis=1
    )
    distribution_name = f'ESPN Week {week} Distribution'
    hist(reduced, 'raw_qbr', 'is_shit', distribution_name)

    reduced['is_shit'][reduced['player'] == 'Andy_Dalton'] = 'Dalton'
    ratings_name = f'ESPN Week {week} Raw QBR'
    hbar(reduced, 'raw_qbr', 'player', 'is_shit', ratings_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [144]:
year = 2018

pfr_2018 = pfr[(pfr['date'] > f'{year}-06-01') & (pfr['date'] < f'{year + 1}-06-01')]
pfr_2018.shape

(646, 23)

In [165]:
player_pr = pfr_2018.sort_values(['player', 'week']).groupby('player').agg({'passer_rating': ['mean', 'max', 'min'], 'passes_attempted': ['mean', 'max', 'min', 'sum']})

top_players = player_pr.loc[player_pr['passes_attempted', 'sum'] > 100, :]
top_players.sort_values(('passer_rating', 'mean'), ascending=False, inplace=True)
top_players[:10]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,passer_rating,passer_rating,passer_rating,passes_attempted,passes_attempted,passes_attempted,passes_attempted
Unnamed: 0_level_1,mean,max,min,mean,max,min,sum
player,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Patrick Mahomes,112.111111,154.8,62.7,36.222222,53,24,652
Drew Brees,110.217647,153.2,69.1,33.352941,49,22,567
Russell Wilson,108.905882,158.3,37.9,26.705882,39,17,454
Matt Ryan,108.09375,148.1,57.4,38.0,52,26,608
Philip Rivers,102.944444,143.4,51.7,32.833333,51,20,591
Carson Wentz,102.109091,122.2,31.9,36.454545,50,28,401
Ryan Fitzpatrick,100.7125,156.2,47.2,30.75,50,15,246
Deshaun Watson,100.488235,156.0,61.6,32.588235,49,20,554
Kirk Cousins,100.23125,137.9,70.4,37.875,55,21,606
Andrew Luck,98.738889,143.8,66.8,39.277778,62,23,707


In [139]:
top_players.loc['Patrick Mahomes', ('passer_rating', 'mean')]

112.11111111111113

In [159]:
top_players.index.values

array(['Patrick Mahomes', 'Drew Brees', 'Russell Wilson', 'Matt Ryan',
       'Philip Rivers', 'Carson Wentz', 'Ryan Fitzpatrick',
       'Deshaun Watson', 'Kirk Cousins', 'Andrew Luck', 'Aaron Rodgers',
       'Baker Mayfield', 'Ben Roethlisberger', 'Jared Goff',
       'Eli Manning', 'Dak Prescott', 'Cam Newton', 'Tom Brady',
       'Ryan Tannehill', 'Derek Carr'], dtype=object)

In [169]:
max_pr = top_players.loc[:, ('passer_rating', 'mean')].max()
print(max_pr)

112.11111111111113


In [182]:
colors = Category20[20]
print(colors)

shuffle(colors)
print(colors)

['#ff7f0e', '#8c564b', '#98df8a', '#1f77b4', '#9edae5', '#ffbb78', '#d62728', '#2ca02c', '#dbdb8d', '#e377c2', '#f7b6d2', '#c5b0d5', '#aec7e8', '#17becf', '#7f7f7f', '#ff9896', '#c7c7c7', '#c49c94', '#9467bd', '#bcbd22']
['#d62728', '#2ca02c', '#dbdb8d', '#8c564b', '#e377c2', '#f7b6d2', '#c5b0d5', '#1f77b4', '#9467bd', '#7f7f7f', '#17becf', '#bcbd22', '#98df8a', '#ff9896', '#c7c7c7', '#ffbb78', '#c49c94', '#ff7f0e', '#aec7e8', '#9edae5']


In [175]:
options = []
options.extend(['solid'] * 5)
options.extend(['dashed'] * 5)
options.extend(['dotted'] * 5)
options.extend(['dotdash'] * 5)
options.extend(['dashdot'] * 5)
options

['solid',
 'solid',
 'solid',
 'solid',
 'solid',
 'dashed',
 'dashed',
 'dashed',
 'dashed',
 'dashed',
 'dotted',
 'dotted',
 'dotted',
 'dotted',
 'dotted',
 'dotdash',
 'dotdash',
 'dotdash',
 'dotdash',
 'dotdash',
 'dashdot',
 'dashdot',
 'dashdot',
 'dashdot',
 'dashdot']

In [103]:
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
plot = figure(width=1200, height=900, y_axis_type='linear', x_axis_type='linear', tools=TOOLS)

players = set(pfr_2018.player)
player_set = zip(players, inferno(len(players)))
# player_set = zip(players, Category20)

for player, color in player_set:
    xs = pfr_2018.loc[pfr_2018['player'] == player, 'week']
    ys = pfr_2018.loc[pfr_2018['player'] == player, 'passer_rating']
    
    avg_pr = pfr_2018.loc[pfr_2018['player'] == player, 'passer_rating'].mean()
    
    if pfr_2018.loc[pfr_2018['player'] == player, 'passes_attempted'].sum() > 50:
        plot.line(xs, ys, legend=player, line_color=color, line_width=avg_pr/35.5, line_alpha=avg_pr/150)
    
plot.legend.location = 'top_left'
    
show(plot)
reset_output()

In [93]:
plot = figure(width=1200, height=900, x_axis_type='linear')

a = pfr_2018.loc[pfr_2018['player'] == 'Aaron Rodgers', 'passer_rating'].mean()
print(a)

b = pfr_2018.loc[pfr_2018['player'] == player, 'passes_attempted'].sum()
print(b)
plot.line(pfr_2018.loc[pfr_2018['player'] == 'Aaron Rodgers', 'week'], pfr_2018.loc[pfr_2018['player'] == 'Aaron Rodgers', 'passer_rating'], legend='Aaron Rodgers', line_color='#FF00FF', line_width=2, line_alpha=a/200)

show(plot)
reset_output()

97.6875
320
