In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from comet_ml import Experiment
from ipywidgets import interact, IntSlider

from src.data.dataset import split_dataset, tidy_plays_df
from src.data.data_query import StorageEngine
from src.data.plays_model import game_json_to_plays_list
from src.features.features import basic_features, advanced_features, normalize_plays_coords
#from src.models.xgboost import train_xgb

In [None]:
#tidied = tidy_plays_df([2015, 2016, 2017, 2018, 2019], augment=True)
#tidied.to_csv("./data/processed/plays_2015-2020.csv", index=False)

In [2]:
season_plays_df = pd.read_csv("./data/processed/plays_2015-2020.csv", index_col=False)

In [3]:
advanced_df = advanced_features(season_plays_df)
advanced_df.head()

Unnamed: 0,seconds_elapsed,period_idx,x_coord,y_coord,x_coord_norm,y_coord_norm,dist_from_net,angle_from_net,Backhand,Deflected,...,SHOT,STOP,TAKEAWAY,previous_x_coord,previous_y_coord,seconds_from_previous,dist_from_previous,rebound,angle_change,speed
0,33.0,1,-83.0,13.0,83.0,-13.0,14.317821,-65.224859,0,0,...,0,0,0,0.0,0.0,33.0,84.011904,0,0.0,2.545815
1,96.0,1,-34.0,2.0,34.0,-2.0,55.036352,-2.082565,0,0,...,0,0,0,-69.0,-22.0,18.0,42.43819,0,0.0,2.357677
2,174.0,1,-57.0,-29.0,57.0,29.0,43.185646,42.184443,0,0,...,0,0,0,-97.0,-5.0,1.0,46.647615,0,0.0,46.647615
3,221.0,1,61.0,4.0,61.0,4.0,28.284271,8.130102,0,0,...,0,0,0,88.0,-39.0,14.0,50.774009,0,0.0,3.626715
4,240.0,1,64.0,7.0,64.0,7.0,25.96151,15.642246,0,0,...,1,0,0,61.0,4.0,19.0,4.242641,1,7.512144,0.223297


In [6]:
wpg = season_plays_df.loc[season_plays_df.gamePk==2017021065].copy()

In [7]:
wpg_features = advanced_features(wpg)

In [10]:
experiment = Experiment(project_name="hockey-all-star-analytics")

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/zilto/hockey-all-star-analytics/9f3edb9e588a4b489acef509c1f6b30b



In [11]:
experiment.log_dataframe_profile(wpg_features, name="wpg_v_wsh_2017021065", workspace="q4.5_df", dataframe_format="csv")

  x = asanyarray(arr - arrmean)
Summarize dataset: 100%|██████████| 240/240 [00:20<00:00, 11.52it/s, Completed]
Generate report structure: 100%|██████████| 1/1 [00:06<00:00,  6.30s/it]
Render HTML: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]
COMET ERROR: dataframe conversion to 'csv' failed; ignored


{'profile': {'web': 'https://www.comet.ml/api/asset/download?assetId=20b07b259837414689e4450e744f8720&experimentKey=9f3edb9e588a4b489acef509c1f6b30b',
  'api': 'https://www.comet.ml/api/rest/v2/experiment/asset/get-asset?assetId=20b07b259837414689e4450e744f8720&experimentKey=9f3edb9e588a4b489acef509c1f6b30b',
  'assetId': '20b07b259837414689e4450e744f8720'},
 'dataframe': None}

In [12]:
experiment.end()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/zilto/hockey-all-star-analytics/9f3edb9e588a4b489acef509c1f6b30b
COMET INFO:   Uploads:
COMET INFO:     dataframe-profile        : 1 (6.20 MB)
COMET INFO:     environment details      : 1
COMET INFO:     filename                 : 1
COMET INFO:     git metadata             : 1
COMET INFO:     git-patch (uncompressed) : 1 (41.19 KB)
COMET INFO:     installed packages       : 1
COMET INFO:     notebook                 : 1
COMET INFO:     source_code              : 1
COMET INFO: ---------------------------
COMET INFO: Uploading metrics, params, and assets to Comet before program termination (may take several seconds)
COMET INFO: The Python SDK has 3600 seconds to finish before aborting...
COMET INFO: Waiting for completion of the file uploads (may ta

In [None]:
@interact(row=IntSlider(options=advanced_df.shape[0]), show_prev=[True, False], show_norm=[True, False])
def view_features(row=0, show_prev=True, show_norm=True):
    example = advanced_df.iloc[row, :].copy()
    
    plt.figure()
    plt.xlim(-100, 100)
    plt.ylim(-43, 43)
    plt.scatter(100-11, 0, c="red", label="goal", marker="<") # net
    plt.scatter(-(100-11), 0, c="red", marker=">") # net
    plt.scatter(example.x_coord, example.y_coord, c="C0", marker="+", s=200, label="true_coords")
    if show_prev:
        plt.scatter(example.previous_x_coord, example.previous_y_coord, c="C0", marker="+", s=50, label="previous_coords")
    
    if show_norm & (example.x_coord != example.x_coord_norm):
        plt.scatter(example.x_coord_norm, example.y_coord_norm, c="C1", marker="+", s=200, label="normalized_coords")
    plt.legend()
    plt.show()
    
    # print("current_event: ", example.event_type)
    # print("previous_event: ", example.previous_event_type)
    print("\nangle: ", example.angle_from_net)
    print("prev_angle: ", example.prev_angle)
    print("angle_diff: ", example.angle_from_net - example.prev_angle)
    print("\ndist: ", example.dist_from_previous)
    print("seconds: ", example.seconds_from_previous)
    print("speed: ", example.speed)

In [None]:
advanced_df

# 5 - XGBoost 

In [None]:
from xgboost import XGBClassifier