In [None]:
# Importing correct packages
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

# Machine Learning Libraries
import lightgbm as lgb
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Figures
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")

from sklearn.metrics import (
    confusion_matrix, 
    accuracy_score, 
    precision_recall_curve,
    average_precision_score,
    roc_curve,
    roc_auc_score,
    classification_report,
    plot_confusion_matrix
)

In [None]:
# No null values
path = '/Users/AngelaChen/NHL-Game-II'
df = pd.read_csv(path + "/Period_1_Game_Stats_Final_ModelReady.csv")
df = df[:-9611]
df.drop(df.index[18800:40871],0,inplace=True)
df = df.reset_index(drop=True)
df.isna().sum()

In [None]:
df.head(5)

In [None]:
# split data into X and y
y = df.pop('won')
X = df
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)

In [None]:
# Lightgbm.LGBMClassifier
model = lgb.LGBMClassifier(learning_rate=0.09,max_depth=-5,random_state=42)
model.fit(X_train,y_train,eval_set=[(X_test,y_test),(X_train,y_train)],
          verbose=20,eval_metric='logloss')

In [None]:
print('Training accuracy {:.4f}'.format(model.score(X_train,y_train)))
print('Testing accuracy {:.4f}'.format(model.score(X_test,y_test)))

In [None]:
# Interestingly, slightly better accuracy
# make predictions for test data
from sklearn.metrics import precision_score, recall_score
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print('Precision score: ', (precision_score(y_test, predictions)))
print('Recall score: ', recall_score(y_test, predictions))

In [None]:
# plot feature importance
import lightgbm
from matplotlib import pyplot
lightgbm.plot_importance(model)
pyplot.show()

In [None]:
lgb.plot_metric(model)

In [None]:
metrics.plot_confusion_matrix(model,X_test,y_test,cmap='Blues_r')

In [None]:
print(metrics.classification_report(y_test,model.predict(X_test)))

## Hyperparameter tuning (gridsearchcv)

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

clf_lgb_grid = lgb.LGBMClassifier(learning_rate=0.09,max_depth=-5,random_state=42)
gkf = KFold(n_splits=5, shuffle=True, random_state=42).split(X=X_train, y=y_train)

param_grid = {
    'num_leaves': [31, 62],
    'max_depth': [-1, 10, 20],
    'learning_rate': [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1]
    }
gsearch = GridSearchCV(estimator=clf_lgb_grid, param_grid=param_grid, cv=gkf)
lgb_model = gsearch.fit(X=X_train, y=y_train)

print(lgb_model.best_params_, lgb_model.best_score_)

## Final model

In [None]:
final_model = lgb.LGBMClassifier(learning_rate=0.04, max_depth=10, num_leaves = 31, random_state=42)
final_model.fit(X_train,y_train)
final_model.score(X_test, y_test)

In [None]:
# Define some helpful functions
def clf_score(clf, X_train, y_train, X_val, y_val, train=True):
    if train:
        print("Train Result:\n")
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_train, clf.predict(X_train))))
        print("Classification Report: \n {}\n".format(classification_report(y_train, clf.predict(X_train))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_train, clf.predict(X_train))))

        res = cross_val_score(clf, X_train, y_train, cv=10, scoring='accuracy')
        print("Average Accuracy: \t {0:.4f}".format(np.mean(res)))
        print("Accuracy SD: \t\t {0:.4f}".format(np.std(res)))

    elif train == False:
        print("Validation Result:\n")
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_val, clf.predict(X_val))))
        
        precision, recall, _ = precision_recall_curve(y_val, clf.predict(X_val))
        average_precision = average_precision_score(y_val, clf.predict(X_val))
        plot_pr_curve(precision, recall, average_precision)
        
        fpr, tpr, _ = roc_curve(y_val, clf.predict(X_val))
        roc_auc = roc_auc_score(y_val, clf.predict(X_val))
        print("roc auc score: {}\n".format(roc_auc))
        plot_roc_curve(fpr, tpr, roc_auc)
        
        print("Classification Report: \n {}\n".format(classification_report(y_val, clf.predict(X_val))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_val, clf.predict(X_val))))
        plot_confusion_matrix(clf,  X_val, clf.predict(X_val))
        print("End of validation Result\n")

def plot_pr_curve(precision, recall, average_precision):
    plt.step(recall, precision, color='b', alpha=0.2, where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')   
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
    plt.show()

def plot_roc_curve(fpr, tpr, roc_auc):
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

## Streaming data using Sportradar API

In [None]:
# classification report
clf_score(final_model, X_train, y_train, X_test, y_test, train=False)

In [None]:
# make predictions for tonight data
def tonight_bet(shots,shots_against,goals,goals_against,takeaways,takeaways_against,hits,hits_against,blockedShots,blockedShots_against,giveaways,giveaways_against,missedShots,missedShots_against,penalties,penalties_against,Won_Faceoffs,Lost_Faceoffs,hoa_away,hoa_home):
    bet_df = pd.DataFrame({"shots": [shots],"shots_against": [shots_against],"goals": [goals],"goals_against": [goals_against],"takeaways": [takeaways],"takeaways_against": [takeaways_against],"hits": [hits],"hits_against": [hits_against]
                       ,"blockedShots": [blockedShots],"blockedShots_against": [blockedShots_against],"giveaways": [giveaways],"giveaways_against": [giveaways_against],"missedShots": [missedShots],"missedShots_against": [missedShots_against]
                       ,"penalties": [penalties],"penalties_against": [penalties_against],"#Won Faceoffs": [Won_Faceoffs],"#Lost Faceoffs": [Lost_Faceoffs],"hoa_away": [hoa_away],"hoa_home": [hoa_home],},index=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19],)
    bet_df = bet_df.iloc[:1 , :]
    tonights_team = bet_df
    return tonights_team

In [None]:
tonight_bet(30,5,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0)

In [None]:
# make predictions for test data
X_Game_test = tonight_bet(30,5,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0)
# store value to use in model

Game_y_pred = model.predict(X_Game_test)
prediction = [round(value) for value in Game_y_pred]
print(prediction)
probability = model.predict_proba(X_Game_test)
print(probability)

# API SportsRadar Request

In [None]:
# API Packages
import requests
import json
from sportradar.api import API

api_key = "sa8b3wuwy549qwy6jw6srsgv"
print("API Key:",api_key)

## Today's Games

In [None]:
from datetime import date
import re
from datetime import datetime,timedelta

today = date.today()

# dd/mm/YY
Day = today.strftime("%d")
Month = today.strftime("%m")
Year = today.strftime("%Y")
today = date.today()
print("Today's date:", today)
# Get API info
response = requests.get(("https://api.sportradar.us/nhl/trial/v7/en/games/{Year}/{Month}/{Day}/schedule.json?api_key=sa8b3wuwy549qwy6jw6srsgv").format(Day=Day, Month = Month, Year = Year))

# Define function that translates API into text
def jprint(obj):
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)
    return ""

# Set response as json
x = response.json()
# Print API info
def getrs(obj):
    Todays_Schedule = []
    Todays_Schedule = pd.DataFrame([],columns = ["Game ID","Home Team","Away Team","Start Time"])
    q = len(obj["games"])
    for i in range(q):
            GID = obj["games"][i]["id"]
            H = obj['games'][i]['home']['name']
            A = obj['games'][i]['away']['name']
            PD = obj['games'][i]['scheduled']
            PD = re.search('T(.+?)Z',PD)
            if PD:
                found = PD.group(1)
            format = "%H:%M:%S"
            d = datetime.strptime(found,format) - timedelta(hours=4)
            d = d.strftime("%H:%M:%S")
            PD = d
            temp_df = [GID,H,A,PD]
            a_series = pd.Series(temp_df,index = Todays_Schedule.columns)
            Todays_Schedule = Todays_Schedule.append(a_series, ignore_index=True)
    return Todays_Schedule
data=getrs(x)
data.head(10)

# Current Game Stats

In [None]:
def get_game_play_by_play(self, game_id):
        """Get the Play-by-Play data for an NHL Game"""
        path = "https://api.sportradar.us/nhl/trial/v7/en/games/{game_id}/pbp.json?api_key={your_api_key}".format(game_id=game_id,your_api_key = self)
        response = requests.get(path)
        x = response.json()
        text = json.dumps(x, sort_keys=True, indent=4)
        print(text)
        return ""

In [None]:
response = requests.get("https://api.sportradar.us/nhl/trial/v7/en/games/e0e9b4c3-2d01-458f-9094-331687b96cb3/pbp.json?api_key=sa8b3wuwy549qwy6jw6srsgv")

# Define function that translates API into text
def jprint(obj):
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)
    return ""

# Set response as json
x = response.json()
x

In [None]:
#j = len(x['periods'][0]['events'])
#for i in range(j):
#    t = x['periods'][0]['events'][i]['event_type']
#    if t == "goal":
#        print(t)
 #   else:
 #       pass

In [None]:
#def update_stats(obj):
 #   Home_Team = []
 #   Away_Team = []
  #  Home_Team = pd.DataFrame([],columns = ["shots","goals","takeaways","hits","BlockedShots","giveaway","misses","penalties","#Won Faceoffs","#Lost Faceoffs"])
    #Home_Team = pd.DataFrame([],columns = ["shots",'shots_against','goals','goals_against','takeaways','takeaways_against','hits','hits_against','blockedShots','blockedShots_against','giveaways','giveaways_against','missedShots','missedShots_against','penalties','penalties_against','#Won Faceoffs','#Lost Faceoffs'])
  #  Away_Team = pd.DataFrame([],columns = ["shots",'shots_against','goals','goals_against','takeaways','takeaways_against','hits','hits_against','blockedShots','blockedShots_against','giveaways','giveaways_against','missedShots','missedShots_against','penalties','penalties_against','#Won Faceoffs','#Lost Faceoffs'])
  #  ht = len(obj["home"]['players'])
  #  at = len(obj['away']['players'])   
  #  total_g = 0
  #  total_s = 0
  #  total_t = 0
  #  total_h = 0
  #  total_bs = 0
   # total_gi = 0
  #  total_m = 0
   # total_p = 0
   # total_fw = 0
  #  total_fl = 0
  #  I = None
  #  for i in range(ht):
    #        g = obj["home"]["players"][i]['statistics']['periods'][0]['total']['goals']
    #        s = obj["home"]["players"][i]['statistics']['periods'][0]['total']['shots']
    #        t = obj["home"]["players"][i]['statistics']['periods'][0]['total']['takeaways']
    #        h = obj["home"]["players"][i]['statistics']['periods'][0]['total']['hits']
     #       bs = obj["home"]["players"][i]['statistics']['periods'][0]['total']['blocked_shots']
     #       gi = obj["home"]["players"][i]['statistics']['periods'][0]['total']['giveaways']
      #      m = obj["home"]["players"][i]['statistics']['periods'][0]['total']['missed_shots']
    #        p = obj["home"]["players"][i]['statistics']['periods'][0]['total']['penalties']
     #       fw = obj["home"]["players"][i]['statistics']['periods'][0]['total']['faceoffs_won']
       #     fl = obj["home"]["players"][i]['statistics']['periods'][0]['total']['faceoffs_lost'] 
      #      if total_g < g and total_g + g != total_g:
      #          I = i
      #      else:
       #         pass
      #      if total_g == g and i != I:
      #          total_g = total_g + g
      #          I = i
      #      else:
      #          pass
      #      if total_s < s and total_s + s != total_s:
      #          total_s = total_s + 1
     #           I = i
     #       else:
      #          pass
      #      if total_s == s and i != I:
      #          total_s = total_s + 1
      #          I = i
      #      else:
      #          pass
      #      if total_t < t:
       #         total_t = total_t + (t - total_t)
       #     else:
        #        pass
     #       if total_h < h:
     #           total_h = total_h + (h - total_h)
     #       else:
    #            pass
    #        if total_bs < bs:
    #            total_bs = total_bs + (bs - total_bs)
    #        else:
   #             pass
   #         if total_gi < gi:
   #             total_gi = total_gi + (gi - total_gi)
   #         else:
   #             pass
    #        if total_m < m:
   #             total_m = total_m + (m - total_m)
    #        else:
    #            pass
   #         if total_p < p:
     #           total_p = total_p + (p - total_p)
    #        else:
    #            pass
    #        if total_fw < fw:
    #            total_fw = total_fw + (fw - total_fw)
    #        else:
    #            pass
   #         if total_fl < fl:
    #            total_fl = total_fl + (fl - total_fl)
 #           else:
 #               pass
 #   home_df = [total_s,total_g,total_t,total_h,total_bs,total_gi,total_m,total_p,total_fw,total_fl]
 #   h_series = pd.Series(home_df,index = Home_Team.columns)
 #   Home_Team = Home_Team.append(h_series, ignore_index=True)
 #   return Home_Team
#update_stats(x)

In [None]:
#for i in range(at):
    #        ga = obj["away"]["players"][i]['statistics']['periods'][0]['total']['goals']
    #        sa = obj["away"]["players"][i]['statistics']['periods'][0]['total']['shots']
    #        ta = obj["away"]["players"][i]['statistics']['periods'][0]['total']['takeaways']
    #        ha = obj["away"]["players"][i]['statistics']['periods'][0]['total']['hits']
   #         bsa = obj["away"]["players"][i]['statistics']['periods'][0]['total']['blocked_shots']
   #         gia = obj["away"]["players"][i]['statistics']['periods'][0]['total']['giveaways']
   #         ma = obj["away"]["players"][i]['statistics']['periods'][0]['total']['missed_shots']
   #         pa = obj["away"]["players"][i]['statistics']['periods'][0]['total']['penalties']
    #        fwa = obj["away"]["players"][i]['statistics']['periods'][0]['total']['faceoffs_won']
    #        fla = obj["away"]["players"][i]['statistics']['periods'][0]['total']['faceoffs_lost'] 
    #away_df = [sa,s,ga,g,ta,t,ha,h,bsa,bs,gia,gi,ma,m,pa,p,fwa,fla]
      #  a_series = pd.Series(away_df,index = Away_Team.columns)
         #   Away_Team = Away_Team.append(a_series, ignore_index=True)