In [40]:
import os
import requests
import numpy as np
import pandas as pd

from os.path import join
from scipy.stats import gaussian_kde

from bokeh.io import output_notebook, show, reset_output
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.palettes import Accent5, viridis
from bokeh.models import ColumnDataSource

pd.set_option('display.max_columns', None)

In [2]:
output_notebook()

In [3]:
def hist(data, attribute, group, name=None):
    source = ColumnDataSource(data)

    x = np.linspace(0, data[attribute].max(), data[attribute].max())
    pdf = gaussian_kde(data[attribute])

    plot = figure(
        title=name,
        x_axis_label=attribute
    )

    hist, edges = np.histogram(
        data[attribute], density=True, bins=12
    )
    plot.quad(
        top=hist,
        bottom=0,
        left=edges[:-1],
        right=edges[1:],
        alpha=0.4
    )

    plot.line(x, pdf(x))

    show(plot)
    reset_output()

In [4]:
def hbar(data, attribute, index, category, name=None):
    source = data.sort_values(by=attribute, ascending=True)
    cmap = factor_cmap(
        category,
        palette=Accent5,
        factors=sorted(source[category].unique())
    )

    plot = figure(
        y_range=source[index],
        title=name,
        x_axis_label=attribute,
        tooltips=[(attribute.replace('_', ' '), f'@{attribute}')]
    )
    plot.hbar(
        y=index, right=attribute, height=0.8, source=source,
        fill_color=cmap, line_color=cmap
    )

    show(plot)
    reset_output()

In [5]:
def vbar(data, attribute, index, category, name=None):
    source = ColumnDataSource(data)
    print(type(source))
#     cmap = factor_cmap(
#         category,
#         palette=Accent5,
#         factors=data[category].unique()
#     )

    plot = figure(
        x_range=data[index],
        title=name,
        y_axis_label=attribute
    )
    plot.vbar(
        x=index, top=attribute, width=1.0, source=source
    )

    show(plot)

In [6]:
espn = pd.read_csv('data/espn_qbr.csv')
number_of_weeks = range(1, espn['week'].max() + 1)
score_to_beat = 60

In [7]:
espn.loc[:, 'is_shit'] = espn.apply(
    lambda x: 'Decent' if x['raw_qbr'] >= score_to_beat
    else 'Dog Shit', axis=1
)

hbar(espn, 'raw_qbr', 'player', 'is_shit')

In [9]:
pfr = pd.read_csv('data/pfr_qbr.csv')
number_of_weeks = range(1, pfr['week'].max() + 1)

In [10]:
for week in number_of_weeks:
    reduced = pfr[pfr['week'] == week]
    reduced['is_shit'] = reduced.apply(
        lambda x: 'Decent' if x['passer_rating'] >= score_to_beat
        else 'Dog Shit', axis=1
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [11]:
for week in number_of_weeks:
    reduced = espn[espn['week'] == week]
    reduced['is_shit'] = reduced.apply(
        lambda x: 'Decent' if x['raw_qbr'] >= score_to_beat
        else 'Dog Shit', axis=1
    )
    distribution_name = f'ESPN Week {week} Distribution'
    hist(reduced, 'raw_qbr', 'is_shit', distribution_name)

    reduced['is_shit'][reduced['player'] == 'Andy_Dalton'] = 'Dalton'
    ratings_name = f'ESPN Week {week} Raw QBR'
    hbar(reduced, 'raw_qbr', 'player', 'is_shit', ratings_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [51]:
year = 2018

pfr_2018 = pfr[(pfr['date'] > f'{year}-06-01') & (pfr['date'] < f'{year + 1}-06-01')]
pfr_2018.shape

(646, 23)

In [38]:
pfr_2018.sort_values(by=['player', 'date'])

Unnamed: 0,player,pos,age,date,league,team,away,opponent,result,game_number,week,day,passes_completed,passes_attempted,completion_percentage,yards_from_passing,passing_touchdowns,interceptions,passer_rating,sacks_taken,yards_lost_to_sacks,yards_per_attempt,adjusted_yards_per_attempt
307,A.J. McCarron,QB,28.049,2018-11-01,NFL,OAK,@,SFO,L 3-34,8,9,Thu,1,3,33.33,8,0,0,42.4,1.0,7.0,2.67,2.67
1,Aaron Rodgers,QB,34.281,2018-09-09,NFL,GNB,,CHI,W 24-23,1,1,Sun,20,30,66.67,286,3,0,130.7,2.0,19.0,9.53,11.53
59,Aaron Rodgers,QB,34.288,2018-09-16,NFL,GNB,,MIN,T 29-29,2,2,Sun,30,42,71.43,281,1,0,97.4,4.0,28.0,6.69,7.17
89,Aaron Rodgers,QB,34.295,2018-09-23,NFL,GNB,@,WAS,L 17-31,3,3,Sun,27,44,61.36,265,2,0,93.5,4.0,25.0,6.02,6.93
133,Aaron Rodgers,QB,34.302,2018-09-30,NFL,GNB,,BUF,W 22-0,4,4,Sun,22,40,55.00,298,1,1,76.9,2.0,16.0,7.45,6.83
151,Aaron Rodgers,QB,34.309,2018-10-07,NFL,GNB,@,DET,L 23-31,5,5,Sun,32,52,61.54,442,3,0,108.0,4.0,19.0,8.50,9.65
190,Aaron Rodgers,QB,34.317,2018-10-15,NFL,GNB,,SFO,W 33-30,6,6,Mon,25,46,54.35,425,2,0,100.4,3.0,20.0,9.24,10.11
260,Aaron Rodgers,QB,34.330,2018-10-28,NFL,GNB,@,LAR,L 27-29,7,8,Sun,18,30,60.00,286,1,0,102.9,3.0,33.0,9.53,10.20
299,Aaron Rodgers,QB,34.337,2018-11-04,NFL,GNB,@,NWE,L 17-31,8,9,Sun,24,43,55.81,259,2,0,89.2,1.0,9.0,6.02,6.95
323,Aaron Rodgers,QB,34.344,2018-11-11,NFL,GNB,,MIA,W 31-12,9,10,Sun,19,28,67.86,199,2,0,112.1,2.0,17.0,7.11,8.54


In [53]:
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
plot = figure(y_axis_type='linear', x_axis_type='datetime', tools=TOOLS)

players = set(pfr_2018.player)
player_set = zip(players, viridis(len(players)))

for player, color in player_set:
    xs = pfr_2018.loc[pfr_2018['player'] == player, 'date']
    ys = pfr_2018.loc[pfr_2018['player'] == player, 'passer_rating']
    
    plot.line(xs, ys, legend=player, line_color=color, line_width=1)
    
show(plot)


In [43]:
viridis(10)

['#440154',
 '#472777',
 '#3E4989',
 '#30678D',
 '#25828E',
 '#1E9C89',
 '#35B778',
 '#6BCD59',
 '#B2DD2C',
 '#FDE724']