In [152]:
from google.cloud import bigquery
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from collections import defaultdict
from scipy.spatial import distance

In [2]:
bqc = bigquery.Client(project="spurs-sp2018")

# Graphing helpers

In [249]:
def draw_basketball_court(fig):

    min_x = -47
    max_x = 47
    min_y = -25
    max_y = 25
    mid_x = 0
    mid_y = 0

    key_width = 19
    key_half_height = 6
    rectangles = [
        # sidelines
        (min_x, min_y, max_x, max_y),
        # key left
        (min_x, -key_half_height, min_x + key_width, key_half_height),
        # key right
        (max_x - key_width, -key_half_height, max_x, key_half_height),
        # three point line top left
        (min_x, max_y - 3, min_x + 14, max_y - 3),
        # top three point line bottom left
        (min_x, min_y + 3, min_x + 14, min_y + 3),
        # three point line top right
        (max_x - 14, max_y - 3, max_x, max_y - 3),
        # top three point line bottom right
        (max_x - 14, min_y + 3, max_x, min_y + 3),
    ]

    for rectangle in rectangles:
        fig.add_shape(
            type="rect",
            x0=rectangle[0],
            y0=rectangle[1],
            x1=rectangle[2],
            y1=rectangle[3],
            line=dict(
                color="black",
                width=2,
            )
        )

    backboard_x_dist_from_baseline = 4
    backboard_half_width = 3
    lines = [
        # mid court line
        (mid_x, min_y, mid_x, max_y),
        # backboard left
        (min_x + backboard_x_dist_from_baseline, -backboard_half_width, min_x + backboard_x_dist_from_baseline, backboard_half_width),
        # backboard right
        (max_x - backboard_x_dist_from_baseline, -backboard_half_width, max_x - backboard_x_dist_from_baseline, backboard_half_width),
    ]
    for line in lines:
        fig.add_shape(
            type="line",
            x0=line[0],
            y0=line[1],
            x1=line[2],
            y1=line[3],
            line=dict(
                color="black",
                width=2,
            )
        )

    hoop_radius = 0.375
    hoop_x_dist_from_baseline = 4.75
    center_circle_radius = 6

    circles = [
        # center circle
        (mid_x - center_circle_radius, mid_y - center_circle_radius, mid_x + center_circle_radius, mid_y + center_circle_radius),
        # hoop left
        (max_x - hoop_x_dist_from_baseline - hoop_radius, -hoop_radius, max_x - hoop_x_dist_from_baseline + hoop_radius, hoop_radius),
        # hoop right
        (min_x + hoop_x_dist_from_baseline - hoop_radius, -hoop_radius, min_x + hoop_x_dist_from_baseline + hoop_radius, hoop_radius),
        # free throw circle left
        (min_x + key_width - center_circle_radius, mid_y - center_circle_radius, min_x + key_width + center_circle_radius, mid_y + center_circle_radius),
        # free throw circle right
        (max_x - key_width - center_circle_radius, mid_y - center_circle_radius, max_x - key_width + center_circle_radius, mid_y + center_circle_radius),
        # (-66, 23.75, -18.5, -23.75)
    ]
    for circle in circles:
        fig.add_shape(
            type="circle",
            x0=circle[0],
            y0=circle[1],
            x1=circle[2],
            y1=circle[3],
            line=dict(
                color="black",
                width=2,
            )
        )

    curves = [
        #left
        (-33, -22, -5, 0, -33, 22),
        #right
        (33, -22, 5, 0, 33, 22),
    ]
    for curve in curves:
        fig.add_shape(
            type="path",
            path=f"M {curve[0]},{curve[1]} Q {curve[2]},{curve[3]} {curve[4]},{curve[5]}",
            line=dict(
                color="black",
                width=2,
            )
        )



In [313]:
players_sql = """--sql
    Select concat(pl.firstName, ' ', pl.lastName) as player_name, team_id, game_id
    from ss.player_team_games
    join ss.players pl on pl.id = player_team_games.player_id
"""
players_df = bqc.query(players_sql).to_dataframe()

player_game_team_dict = {}
for index, row in players_df.iterrows():
    player_game_team_dict[(row['player_name'], row['game_id'])] = row['team_id']

In [315]:
import pickle

In [318]:
with open('data/player_game_team.pkl', 'wb') as f:
    pickle.dump(player_game_team_dict, f)

In [319]:
with open('data/player_game_team.pkl', 'rb') as f:
    player_game_team_dict = pickle.load(f)

In [320]:
player_game_team_dict

{('Miles Plumlee',
  '1c93796f-5888-4702-9122-a218e063cae7'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('Kevin Huerter',
  '0d2daece-1171-462d-959d-265bc068d095'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('Kyle Korver',
  '14c2c6dc-89f0-11e6-828b-a45e60e298d3'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('DeMarre Carroll',
  '14c2c6dc-89f0-11e6-828b-a45e60e298d3'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ("DeAndre' Bembry",
  '0d2daece-1171-462d-959d-265bc068d095'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('Tyler Dorsey',
  'e3a82108-ebc4-48ec-a925-84c828889589'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('Kris Humphries',
  '902f24ab-8b51-11e6-bc8a-a45e60e298d3'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('Dewayne Dedmon',
  '76e0fbcc-4fba-400d-b493-d6b134cfeed9'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('Taurean Prince',
  'e3a82108-ebc4-48ec-a925-84c828889589'): 'feb3e09c-89ef-11e6-9f68-a45e60e298d3',
 ('Jaylen Morris',
  'e3a82108-ebc4-48ec-a925-84c828889589'): 'feb3e09c-89ef-1

In [311]:
def display_motion(graph_data, i=0):
    data = graph_data[i]
    df = data['dataframe']
    player_name = data['player_name']
    outcomes = data['outcomes']

    home_team = df['home_team_id'].iloc[0]
    game_id = df['game_id'].iloc[0]
    names = set(df['player_name'].unique()) - {'ball'}

    color_map = {}

    for name in names:
        if (name, game_id) not in player_game_team_dict:
            color_map[name] = 'black'
        else:
            color_map[name] = 'blue' if player_game_team_dict[(name, game_id)] == home_team else 'red'
            
    color_map['ball'] = "orange"
    color_map[player_name] = "green"
    
    fig = px.scatter(df, x="x", y="y", animation_frame="game_clock", animation_group="player_name",
            hover_name="player_name", range_x=[-50, 50], range_y=[-25, 25], color='player_name' ,color_discrete_map=color_map)
    
    # set the frame duration to 25
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 25
    
    # set the transition duration to 10
    fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 10

    # update fig widht and height
    fig.layout.width = 800
    fig.layout.height = 600

    # set title of figure
    fig.layout.title = f"{player_name} Drive, outcomes: {outcomes}"

    draw_basketball_court(fig)
    fig.update_xaxes(
        scaleanchor = "y",
        scaleratio = 1,
        )
    
    fig.show()

# get drives for a team

In [111]:
def get_drives_sql(season, player_name):
    drives_sql = f"""--sql
        SELECT concat(pl.firstName, ' ', pl.lastName) as player_name, bhr_outcomes, end_type, start_game_clock, end_game_clock, period, x as start_x, y as start_y, end_x, end_y, game_id, season
        from ss.drives
        join ss.players pl on pl.id = drives.ball_handler_id
        where season = '{season}'
        and concat(pl.firstName, ' ', pl.lastName) = '{player_name}'
    """
    return drives_sql

In [112]:
def get_drives_dataframe(season, player_name):
    drives_sql = get_drives_sql(season, player_name)
    drives_df = bqc.query(drives_sql).to_dataframe()
    return drives_df

# get location data given a game, start, end

In [306]:
def get_locations_sql(game_id, start, end, period):
    locations_sql = f"""--sql
        SELECT x, y, concat(pl.firstName, ' ', pl.lastName) as player_name, game_id, player_id, season, game_clock, period, ball_x, ball_y, home_team_id
        from ss.tracking
        join ss.players as pl on pl.id = tracking.player_id
        where season in (2018, 2019)
        and game_id = '{game_id}'
        and game_clock_stopped = false
        and game_clock between {end} and {start}
        and period = {period}
    """
    return locations_sql

In [93]:
def get_location_data(game_id, start, end, period):
    sql = get_locations_sql(game_id, start, end, period)
    all_data = bqc.query(sql).to_dataframe()
    return all_data

In [198]:
def process_location_data(all_data):
    plot_columns = ['x', 'y', 'game_clock', 'player_name', 'period', 'game_id', 'home_team_id']

    player_locations = all_data.drop_duplicates(['game_clock', 'player_name', 'period'])
    player_locations = player_locations.sort_values(by=['period', 'game_clock'], ascending=[True, False])
    player_locations = player_locations[plot_columns]

    ball_locations = all_data.drop_duplicates(['game_clock', 'period'])
    ball_locations = ball_locations.drop(['x', 'y'], axis=1)
    ball_locations = ball_locations.rename(columns={'ball_x': 'x', 'ball_y': 'y'})
    ball_locations['player_name'] = 'ball'
    ball_locations = ball_locations[plot_columns]

    all_locations = pd.concat([player_locations, ball_locations], ignore_index=True)
    all_locations = all_locations.sort_values(by=['period', 'game_clock'], ascending=[True, False]).reset_index(drop=True)
    return all_locations.fillna("unknown")

# Test

In [199]:
season = 2018
player_name = 'Jayson Tatum'
drives_df = get_drives_dataframe(season, player_name)

In [280]:
start_loc = (-28, 6)
end_loc = (-40, 0)
drives_df['start_dist'] = drives_df.apply(lambda row: distance.euclidean(start_loc, (row['start_x'], row['start_y'])), axis=1)
drives_df = drives_df.sort_values(by=['start_dist'], ascending=True, ignore_index=True)

In [281]:
outcomes_dict = {
    'FGX': 'Field Goal Missed',
    'FGM': 'Field Goal Made',
    'TO': 'Turnover',
    'PASS': 'Pass',
    'BLK': 'Blocked',
}

In [307]:
n = 5
graph_data = {}
for i in range(n):
    row = drives_df.iloc[i]
    start_game_clock = row['start_game_clock'] + 2
    end_game_clock = row['end_game_clock'] - 2
    period = row['period']
    game_id = row['game_id']
    location_data = get_location_data(game_id, start_game_clock, end_game_clock, period)
    location_data = process_location_data(location_data)

    outcomes = []
    for outcome in row['bhr_outcomes']:
        outcomes.append(outcomes_dict.get(outcome, outcome))
        
    graph_data[i] = {
        'dataframe': location_data,
        'player_name': player_name,
        'outcomes': outcomes,
    }

In [312]:
for i in range(n):
    display_motion(graph_data, i)