In [1]:
#import comet_ml in the top of your file
from comet_ml import Experiment
import os
from copy import deepcopy
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from skopt import BayesSearchCV
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
import xgboost as xgb
import pandas as pd
from ift6758.features.feature_engineering2 import SeasonDataSetTwo
from ift6758.metrics import plot_metrics
import random


In [2]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
seed = 42
seed_everything(seed)

In [None]:
import json
fname = "../ift6758/data/JSON/2017021065.json"
with open(fname) as f:
    json_f = json.load(f)

In [14]:
df_init = pd.json_normalize(json_f,record_path=[['liveData','linescore','periods']],meta=['gamePk',['gameData','teams','away','name'],['gameData','teams','home','name']])
home_columns= ['periodType', 'startTime', 'endTime', 'num', 'ordinalNum', 'home.goals','home.shotsOnGoal', 'home.rinkSide', 'gamePk', 'gameData.teams.home.name']
away_columns= ['periodType', 'startTime', 'endTime', 'num', 'ordinalNum', 'away.goals','away.shotsOnGoal', 'away.rinkSide', 'gamePk', 'gameData.teams.away.name']
common_columns = ['periodType', 'startTime', 'endTime', 'num', 'ordinalNum', 'goals', 'shotsOnGoal', 'rinkSide', 'gamePk', 'teamname']
df_home = df_init[home_columns].rename(columns=dict(zip(home_columns,common_columns)))
df_home["isHomeTeam"]=True

df_away = df_init[away_columns].rename(columns=dict(zip(away_columns,common_columns)))
df_away["isHomeTeam"]=False
df_tot = pd.concat([df_home,df_away])
df_tot["goalCoordinates"]=df_tot.apply(lambda r: (-89,0) if r['rinkSide']=='right' else ((89,0) if r['rinkSide']=='left'  else np.nan),axis=1)
df_tot = df_tot.reset_index(drop=True)
map_columns = {"periodType": "about.periodType", "num": "about.period","teamname":"team.name" }
df_periods_to_join = df_tot[list(map_columns.keys())+["gamePk","goalCoordinates"]].rename(columns=map_columns)


In [16]:
df_tot

Unnamed: 0,periodType,startTime,endTime,num,ordinalNum,goals,shotsOnGoal,rinkSide,gamePk,teamname,isHomeTeam,goalCoordinates
0,REGULAR,2018-03-12T23:08:21Z,2018-03-12T23:48:49Z,1,1st,1,14,right,2017021065,Washington Capitals,True,"(-89, 0)"
1,REGULAR,2018-03-13T00:07:14Z,2018-03-13T00:49:49Z,2,2nd,1,13,left,2017021065,Washington Capitals,True,"(89, 0)"
2,REGULAR,2018-03-13T01:08:03Z,2018-03-13T01:41:45Z,3,3rd,0,9,right,2017021065,Washington Capitals,True,"(-89, 0)"
3,OVERTIME,2018-03-13T01:44:12Z,2018-03-13T01:51:32Z,4,OT,1,7,left,2017021065,Washington Capitals,True,"(89, 0)"
4,REGULAR,2018-03-12T23:08:21Z,2018-03-12T23:48:49Z,1,1st,1,8,left,2017021065,Winnipeg Jets,False,"(89, 0)"
5,REGULAR,2018-03-13T00:07:14Z,2018-03-13T00:49:49Z,2,2nd,0,10,right,2017021065,Winnipeg Jets,False,"(-89, 0)"
6,REGULAR,2018-03-13T01:08:03Z,2018-03-13T01:41:45Z,3,3rd,1,10,left,2017021065,Winnipeg Jets,False,"(89, 0)"
7,OVERTIME,2018-03-13T01:44:12Z,2018-03-13T01:51:32Z,4,OT,0,0,right,2017021065,Winnipeg Jets,False,"(-89, 0)"


In [15]:
df_periods_to_join

Unnamed: 0,about.periodType,about.period,team.name,gamePk,goalCoordinates
0,REGULAR,1,Washington Capitals,2017021065,"(-89, 0)"
1,REGULAR,2,Washington Capitals,2017021065,"(89, 0)"
2,REGULAR,3,Washington Capitals,2017021065,"(-89, 0)"
3,OVERTIME,4,Washington Capitals,2017021065,"(89, 0)"
4,REGULAR,1,Winnipeg Jets,2017021065,"(89, 0)"
5,REGULAR,2,Winnipeg Jets,2017021065,"(-89, 0)"
6,REGULAR,3,Winnipeg Jets,2017021065,"(89, 0)"
7,OVERTIME,4,Winnipeg Jets,2017021065,"(-89, 0)"


In [28]:


df = pd.json_normalize(json_f,record_path=[['liveData','plays','allPlays']],meta=['gamePk',['gameData','teams','home','name']])



In [29]:
pd.set_option('display.max_columns', None)


In [30]:
df

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0,1,1,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1,5,1,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals
2,Period Start,WSH8,PERIOD_START,Period Start,2,8,1,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3,9,1,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0,0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4,51,1,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0,0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366,Faceoff,WSH781,FACEOFF,Nicklas Backstrom faceoff won against Bryan Li...,366,781,4,OVERTIME,OT,03:28,01:32,2018-03-13T01:49:39Z,2,2,"[{'player': {'id': 8473563, 'fullName': 'Nickl...",69.0,22.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals
367,Goal,WSH782,GOAL,"Evgeny Kuznetsov (21) Wrist Shot, assists: Cha...",367,782,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:50:50Z,2,3,"[{'player': {'id': 8475744, 'fullName': 'Evgen...",74.0,1.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,True,False,2017021065,Washington Capitals
368,Period End,WSH783,PERIOD_END,End of OT,368,783,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:32Z,2,3,,,,,,,,,,,,,,,2017021065,Washington Capitals
369,Period Official,WSH786,PERIOD_OFFICIAL,Period Official,369,786,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:33Z,2,3,,,,,,,,,,,,,,,2017021065,Washington Capitals


In [31]:
df["isHome"] = df[["team.name","gameData.teams.home.name"]].apply(lambda r: 1 if r["team.name"]==r["gameData.teams.home.name"] else 0,axis=1)

In [32]:
df['periodSeconds'] = pd.to_timedelta('00:' + df['about.periodTime'].astype(str)) #concat '00:' to have the format 'hh:mm:ss'
df['periodSeconds'] = df['periodSeconds'].dt.total_seconds()


In [35]:
def time_played(row):
    """
    return time in seconds
    """
    if row['about.period']>3:
        ## Overtime is 5 mins and It can go till Shootouts
        time_secs = 3600 + (row['about.period']-4)*300 + row['periodSeconds']
        return time_secs
    else:
        time_secs =  (row['about.period']-1)*1200 + row['periodSeconds']
        return time_secs

df["gameSeconds"] = df[["periodSeconds","about.period"]].apply(lambda r: time_played(r),axis=1)

In [40]:
df["result.event"].value_counts()

Faceoff            69
Shot               66
Stoppage           55
Hit                53
Blocked Shot       43
Missed Shot        21
Takeaway           19
Giveaway           15
Penalty             7
Goal                5
Period Ready        4
Period Start        4
Period End          4
Period Official     4
Game Scheduled      1
Game End            1
Name: result.event, dtype: int64

In [41]:
dfpenaltyGoals = df.loc[(df["result.event"]=="Goal")|(df["result.event"]=="Penalty")]

In [50]:
dfpenaltyGoals

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds
26,Penalty,WSH64,PENALTY,Bryan Little Hooking against Evgeny Kuznetsov,26,64,1,REGULAR,1st,03:18,16:42,2018-03-12T23:13:15Z,0,0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",-67.0,8.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Hooking,Minor,2.0,,,,,2017021065,Washington Capitals,0,198.0,198.0
29,Penalty,WSH66,PENALTY,Matt Hendricks Tripping against Nicklas Backstrom,29,66,1,REGULAR,1st,03:53,16:07,2018-03-12T23:14:33Z,0,0,"[{'player': {'id': 8468611, 'fullName': 'Matt ...",-29.0,-34.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Tripping,Minor,2.0,,,,,2017021065,Washington Capitals,0,233.0,233.0
39,Goal,WSH29,GOAL,"Alex Ovechkin (41) Wrist Shot, assists: John C...",39,29,1,REGULAR,1st,04:35,15:25,2018-03-12T23:16:38Z,0,1,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-57.0,-20.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,PPG,Power Play,False,False,2017021065,Washington Capitals,1,275.0,275.0
42,Penalty,WSH70,PENALTY,T.J. Oshie Slashing against Brandon Tanev,42,70,1,REGULAR,1st,05:10,14:50,2018-03-12T23:18:03Z,0,1,"[{'player': {'id': 8471698, 'fullName': 'T.J. ...",-88.0,28.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Slashing,Minor,2.0,,,,,2017021065,Washington Capitals,1,310.0,310.0
45,Goal,WSH32,GOAL,"Nikolaj Ehlers (27) Wrist Shot, assists: none",45,32,1,REGULAR,1st,05:33,14:27,2018-03-12T23:19:06Z,1,1,"[{'player': {'id': 8477940, 'fullName': 'Nikol...",77.0,-3.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Wrist Shot,,,EVEN,Even,False,False,2017021065,Washington Capitals,0,333.0,333.0
87,Penalty,WSH94,PENALTY,T.J. Oshie Hooking against Blake Wheeler,87,94,1,REGULAR,1st,14:44,05:16,2018-03-12T23:37:41Z,1,1,"[{'player': {'id': 8471698, 'fullName': 'T.J. ...",73.0,34.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Hooking,Minor,2.0,,,,,2017021065,Washington Capitals,1,884.0,884.0
103,Penalty,WSH254,PENALTY,Mathieu Perreault Tripping against Tom Wilson,103,254,1,REGULAR,1st,16:59,03:01,2018-03-12T23:44:28Z,1,1,"[{'player': {'id': 8473618, 'fullName': 'Mathi...",-31.0,-34.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Tripping,Minor,2.0,,,,,2017021065,Washington Capitals,0,1019.0,1019.0
151,Goal,WSH247,GOAL,"Alex Ovechkin (42) Wrist Shot, assists: Tom Wi...",151,247,2,REGULAR,2nd,03:53,16:07,2018-03-13T00:14:42Z,1,2,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",85.0,8.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,False,False,2017021065,Washington Capitals,1,233.0,1433.0
211,Penalty,WSH299,PENALTY,Jay Beagle Slashing against Mathieu Perreault,211,299,2,REGULAR,2nd,12:24,07:36,2018-03-13T00:34:29Z,1,2,"[{'player': {'id': 8474291, 'fullName': 'Jay B...",-75.0,23.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Slashing,Minor,2.0,,,,,2017021065,Washington Capitals,1,744.0,1944.0
272,Goal,WSH623,GOAL,"Patrik Laine (41) Wrist Shot, assists: Paul St...",272,623,3,REGULAR,3rd,05:02,14:58,2018-03-13T01:13:54Z,2,2,"[{'player': {'id': 8479339, 'fullName': 'Patri...",80.0,16.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Wrist Shot,,,EVEN,Even,False,False,2017021065,Washington Capitals,0,302.0,2702.0


In [73]:
a=dfpenaltyGoals[["gamePk","result.event","about.periodType","isHome","gameSeconds","result.penaltyMinutes"]].groupby(["gamePk","result.event"]).agg(lambda x: list(x)).to_dict()
# apply(list)
# [["about.periodType","isHome","gameSeconds"]].apply(list)
# .to_dict()


In [74]:
a

{'about.periodType': {(2017021065, 'Goal'): ['REGULAR',
   'REGULAR',
   'REGULAR',
   'REGULAR',
   'OVERTIME'],
  (2017021065, 'Penalty'): ['REGULAR',
   'REGULAR',
   'REGULAR',
   'REGULAR',
   'REGULAR',
   'REGULAR',
   'OVERTIME']},
 'isHome': {(2017021065, 'Goal'): [1, 0, 1, 0, 1],
  (2017021065, 'Penalty'): [0, 0, 1, 1, 0, 1, 0]},
 'gameSeconds': {(2017021065, 'Goal'): [275.0, 333.0, 1433.0, 2702.0, 3851.0],
  (2017021065, 'Penalty'): [198.0,
   233.00000000000003,
   310.0,
   884.0,
   1019.0000000000001,
   1944.0,
   3619.0]},
 'result.penaltyMinutes': {(2017021065, 'Goal'): [nan, nan, nan, nan, nan],
  (2017021065, 'Penalty'): [2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]}}

In [98]:
dfgameevent_group=dfpenaltyGoals[["gamePk","result.event","about.periodType","isHome","gameSeconds","result.penaltyMinutes"]].groupby(["gamePk","result.event"]).agg(lambda x: list(x)).reset_index()
dfgame_group = dfgameevent_group.groupby(["gamePk"]).agg(lambda x: list(x)).reset_index()

In [99]:
dfgameevent_group

Unnamed: 0,gamePk,result.event,about.periodType,isHome,gameSeconds,result.penaltyMinutes
0,2017021065,Goal,"[REGULAR, REGULAR, REGULAR, REGULAR, OVERTIME]","[1, 0, 1, 0, 1]","[275.0, 333.0, 1433.0, 2702.0, 3851.0]","[nan, nan, nan, nan, nan]"
1,2017021065,Penalty,"[REGULAR, REGULAR, REGULAR, REGULAR, REGULAR, ...","[0, 0, 1, 1, 0, 1, 0]","[198.0, 233.00000000000003, 310.0, 884.0, 1019...","[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]"


In [100]:
dfgame_group

Unnamed: 0,gamePk,result.event,about.periodType,isHome,gameSeconds,result.penaltyMinutes
0,2017021065,"[Goal, Penalty]","[[REGULAR, REGULAR, REGULAR, REGULAR, OVERTIME...","[[1, 0, 1, 0, 1], [0, 0, 1, 1, 0, 1, 0]]","[[275.0, 333.0, 1433.0, 2702.0, 3851.0], [198....","[[nan, nan, nan, nan, nan], [2.0, 2.0, 2.0, 2...."


In [106]:
from collections import defaultdict

a=defaultdict(float)
a["l"]+=2


In [110]:
a.items()

dict_items([('l', 2.0)])

In [122]:
a=np.array([1,2])
b=np.array([1,0]).astype('bool')
a[np.logical_not(b)]
# b

array([2])

In [143]:
import collections

In [131]:
# penalty_addition_dict

defaultdict(<function __main__.<lambda>()>,
            {30: defaultdict(int, {'homeAddition': -1})})

In [195]:
def penalty_time_dict(row):
    events = row['result.event']
    if events[0]=='Penalty':
        penalty_index=0
        scoring_index=1
    else:
        penalty_index=1
        scoring_index=0

    ## Source for writing this default dict: https://stackoverflow.com/a/5029958
    penalty_addition_dict = defaultdict(lambda: defaultdict(int))  ## Contains time: {"awayAddition":+2,"homeAddition":-2}
        
    penalty_periodtypes = np.array(row['about.periodType'][penalty_index])
    penalty_ishome = np.array(row['isHome'][penalty_index]).astype('bool')
    scoring_ishome = np.array(row['isHome'][scoring_index]).astype('bool')
    penalty_gameseconds= np.array(row['gameSeconds'][penalty_index]).astype(int)
    scoring_gameseconds= np.array(row['gameSeconds'][scoring_index]).astype(int)
    penalty_minutes = np.array(row['result.penaltyMinutes'][penalty_index]).astype(int)

    #home
    penalty_periodtypes_home = penalty_periodtypes[penalty_ishome]
    penalty_gameseconds_home = penalty_gameseconds[penalty_ishome]
    scoring_gameseconds_home = scoring_gameseconds[scoring_ishome]
    penalty_minutes_home = penalty_minutes[penalty_ishome]

    #away
    penalty_isaway = np.logical_not(penalty_ishome)
    penalty_periodtypes_away = penalty_periodtypes[penalty_isaway]
    penalty_gameseconds_away= penalty_gameseconds[penalty_isaway]
    scoring_gameseconds_away = scoring_gameseconds[np.logical_not(scoring_ishome)]
    penalty_minutes_away = penalty_minutes[penalty_isaway]
   

    # Home Penalties
    scoring_index = 0
    for (penalty_minute,penalty_periodtype,penalty_gamesecond) in zip(penalty_minutes_home,penalty_periodtypes_home,penalty_gameseconds_home):
        penalty_seconds = int(penalty_minute*60)
        estimated_penalty_end_time= int(penalty_gamesecond+penalty_seconds)
        if penalty_periodtype!="OVERTIME":
            penalty_addition_dict[penalty_gamesecond]["homeAddition"] -= 1
        else:
            penalty_addition_dict[penalty_gamesecond]["awayAddition"] += 1
        if penalty_seconds==120.0 and penalty_periodtype!="OVERTIME":
            while scoring_gameseconds_away[scoring_index]<=penalty_gamesecond and scoring_index<len(scoring_gameseconds_away)-1:
                scoring_index+=1
            nearest_next_goalsecond = scoring_gameseconds_away[scoring_index]
            if nearest_next_goalsecond>penalty_gamesecond and nearest_next_goalsecond<=estimated_penalty_end_time:
                penalty_addition_dict[nearest_next_goalsecond]["homeAddition"]+=1
                if scoring_index<len(scoring_gameseconds_home)-1:
                    scoring_index+=1

            else:
                penalty_addition_dict[estimated_penalty_end_time]["homeAddition"]+=1
        else:
            if penalty_periodtype!="OVERTIME":
                penalty_addition_dict[estimated_penalty_end_time]["homeAddition"]+=1
            else:
                penalty_addition_dict[penalty_gamesecond]["awayAddition"] -= 1

    # Away Penalties (TODO: Form a function for both away and home, if you get time)
   
    # Away Penalties
    scoring_index = 0
    for (penalty_minute,penalty_periodtype,penalty_gamesecond) in zip(penalty_minutes_away,penalty_periodtypes_away,penalty_gameseconds_away):
        penalty_seconds = int(penalty_minute*60)
        estimated_penalty_end_time= int(penalty_gamesecond+penalty_seconds)
        if penalty_periodtype!="OVERTIME":
            penalty_addition_dict[penalty_gamesecond]["awayAddition"] -= 1
        else:

            penalty_addition_dict[penalty_gamesecond]["homeAddition"] += 1

        if penalty_seconds==120.0 and penalty_periodtype!="OVERTIME":
            while scoring_gameseconds_home[scoring_index]<=penalty_gamesecond and scoring_index<len(scoring_gameseconds_home)-1:
                scoring_index+=1
            nearest_next_goalsecond = scoring_gameseconds_home[scoring_index]
            if nearest_next_goalsecond>penalty_gamesecond and nearest_next_goalsecond<=estimated_penalty_end_time:
                penalty_addition_dict[nearest_next_goalsecond]["awayAddition"]+=1                 
                if scoring_index<len(scoring_gameseconds_home)-1:
                    scoring_index+=1
            else:
                penalty_addition_dict[estimated_penalty_end_time]["awayAddition"]+=1
        else:
            if penalty_periodtype!="OVERTIME":
                penalty_addition_dict[estimated_penalty_end_time]["awayAddition"]+=1
            else:
                penalty_addition_dict[estimated_penalty_end_time]["homeAddition"] -= 1
    ordered_penalties = collections.OrderedDict(sorted(penalty_addition_dict.items()))

    

    return ordered_penalties


def getPenaltyTimePeriods(row):
    dc=row
    penalty_time_periods = []
    awaySum = 0
    homeSum = 0
    foundEnd=True
    for time in dc:
        if foundEnd:
            startTime =time
            foundEnd=False
        awayAddition = dc[time]['awayAddition']
        homeAddition = dc[time]['homeAddition']
        awaySum += awayAddition
        homeSum += homeAddition

        if (awaySum==0 and homeSum==0):
            foundEnd = True
            endTime = time
            penalty_time_periods.append((startTime,endTime))
    return penalty_time_periods


    
    


In [249]:

dfgame_group
# [["gamePk","penaltyAdditions"]].set_index(["gamePk"]).to_dict()["penaltyAdditions"]


Unnamed: 0,gamePk,result.event,about.periodType,isHome,gameSeconds,result.penaltyMinutes,penaltyAdditions,penaltyTimePeriods
0,2017021065,"[Goal, Penalty]","[[REGULAR, REGULAR, REGULAR, REGULAR, OVERTIME...","[[1, 0, 1, 0, 1], [0, 0, 1, 1, 0, 1, 0]]","[[275.0, 333.0, 1433.0, 2702.0, 3851.0], [198....","[[nan, nan, nan, nan, nan], [2.0, 2.0, 2.0, 2....","{198: {'awayAddition': -1, 'homeAddition': 0},...","[(198, 353), (884, 1004), (1019, 1139), (1944,..."


In [209]:
a=dfgame_group[["gamePk","penaltyAdditions"]].set_index(["gamePk"]).to_dict()["penaltyAdditions"]

In [210]:
a

{2017021065: OrderedDict([(198,
               defaultdict(int, {'awayAddition': -1, 'homeAddition': 0})),
              (233, defaultdict(int, {'awayAddition': -1, 'homeAddition': 0})),
              (275, defaultdict(int, {'awayAddition': 1, 'homeAddition': 0})),
              (310, defaultdict(int, {'homeAddition': -1, 'awayAddition': 0})),
              (333, defaultdict(int, {'homeAddition': 1, 'awayAddition': 0})),
              (353, defaultdict(int, {'awayAddition': 1, 'homeAddition': 0})),
              (884, defaultdict(int, {'homeAddition': -1, 'awayAddition': 0})),
              (1004, defaultdict(int, {'homeAddition': 1, 'awayAddition': 0})),
              (1019,
               defaultdict(int, {'awayAddition': -1, 'homeAddition': 0})),
              (1139, defaultdict(int, {'awayAddition': 1, 'homeAddition': 0})),
              (1944,
               defaultdict(int, {'homeAddition': -1, 'awayAddition': 0})),
              (2064, defaultdict(int, {'homeAddition': 1, 'awayA

In [211]:
a.items()

dict_items([(2017021065, OrderedDict([(198, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (233, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (275, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (310, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (333, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (353, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (884, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (1004, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (1019, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (1139, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (1944, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (2064, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (3619, defaultdict(<class 'int'>, {'homeAddition': 

In [214]:
j = pd.DataFrame.from_dict({(i,j): a[i][j] 
                           for i in a.keys() 
                           for j in a[i].keys()},
                       orient='index')
                       
j.index = j.index.set_names(['gamePk','gameSeconds'])



In [217]:
m=j.reset_index()

In [219]:
m

Unnamed: 0,gamePk,gameSeconds,awayAddition,homeAddition
0,2017021065,198,-1,0
1,2017021065,233,-1,0
2,2017021065,275,1,0
3,2017021065,310,0,-1
4,2017021065,333,0,1
5,2017021065,353,1,0
6,2017021065,884,0,-1
7,2017021065,1004,0,1
8,2017021065,1019,-1,0
9,2017021065,1139,1,0


In [220]:
df

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0,1,1,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1,5,1,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
2,Period Start,WSH8,PERIOD_START,Period Start,2,8,1,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3,9,1,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0,0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4,51,1,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0,0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1,17.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366,Faceoff,WSH781,FACEOFF,Nicklas Backstrom faceoff won against Bryan Li...,366,781,4,OVERTIME,OT,03:28,01:32,2018-03-13T01:49:39Z,2,2,"[{'player': {'id': 8473563, 'fullName': 'Nickl...",69.0,22.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1,208.0,3808.0
367,Goal,WSH782,GOAL,"Evgeny Kuznetsov (21) Wrist Shot, assists: Cha...",367,782,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:50:50Z,2,3,"[{'player': {'id': 8475744, 'fullName': 'Evgen...",74.0,1.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,True,False,2017021065,Washington Capitals,1,251.0,3851.0
368,Period End,WSH783,PERIOD_END,End of OT,368,783,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:32Z,2,3,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,251.0,3851.0
369,Period Official,WSH786,PERIOD_OFFICIAL,Period Official,369,786,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:33Z,2,3,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,251.0,3851.0


In [228]:
dfs = df.merge(m,on=["gamePk","gameSeconds"],how="outer")

In [224]:
m.dtypes

gamePk          int64
gameSeconds     int64
awayAddition    int64
homeAddition    int64
dtype: object

In [230]:
dfs.head()

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0.0,1.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1.0,5.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
2,Period Start,WSH8,PERIOD_START,Period Start,2.0,8.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3.0,9.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4.0,51.0,1.0,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0.0,0.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1.0,17.0,17,,


In [245]:
dfsort = dfs.sort_values(['gamePk','gameSeconds'])
dfsort.loc[:,["awayAddition","homeAddition"]]=dfsort.loc[:,["awayAddition","homeAddition"]].fillna(0)
# dfsort.loc[:,["awayAddition","homeAddition"]]
# .fillna(0,inplace=True)

In [258]:
dfsort.loc[(dfsort['result.event']!="Goal")&(dfsort['result.event']!="Penalty")&(dfsort['result.event'].notna())&((dfsort['awayAddition']!=0)|(dfsort['homeAddition']!=0)),["awayAddition","homeAddition"]]
# =0

Unnamed: 0,awayAddition,homeAddition


In [260]:
dfsort.loc[(dfsort['awayAddition']!=0)]

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition
26,Penalty,WSH64,PENALTY,Bryan Little Hooking against Evgeny Kuznetsov,26.0,64.0,1.0,REGULAR,1st,03:18,16:42,2018-03-12T23:13:15Z,0.0,0.0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",-67.0,8.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Hooking,Minor,2.0,,,,,2017021065,Washington Capitals,0.0,198.0,198,-1.0,0.0
29,Penalty,WSH66,PENALTY,Matt Hendricks Tripping against Nicklas Backstrom,29.0,66.0,1.0,REGULAR,1st,03:53,16:07,2018-03-12T23:14:33Z,0.0,0.0,"[{'player': {'id': 8468611, 'fullName': 'Matt ...",-29.0,-34.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Tripping,Minor,2.0,,,,,2017021065,Washington Capitals,0.0,233.0,233,-1.0,0.0
39,Goal,WSH29,GOAL,"Alex Ovechkin (41) Wrist Shot, assists: John C...",39.0,29.0,1.0,REGULAR,1st,04:35,15:25,2018-03-12T23:16:38Z,0.0,1.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-57.0,-20.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,PPG,Power Play,False,False,2017021065,Washington Capitals,1.0,275.0,275,1.0,0.0
371,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,353,1.0,0.0
103,Penalty,WSH254,PENALTY,Mathieu Perreault Tripping against Tom Wilson,103.0,254.0,1.0,REGULAR,1st,16:59,03:01,2018-03-12T23:44:28Z,1.0,1.0,"[{'player': {'id': 8473618, 'fullName': 'Mathi...",-31.0,-34.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Tripping,Minor,2.0,,,,,2017021065,Washington Capitals,0.0,1019.0,1019,-1.0,0.0


In [None]:
# dict_items([(2017021065, OrderedDict([(198, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (233, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (275, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (310, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (333, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (353, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (884, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (1004, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (1019, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (1139, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (1944, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (2064, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (3619, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (3739, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0}))]))])

In [257]:
dfsort.groupby(["gamePk"])[["awayAddition","homeAddition"]].cumsum()

Unnamed: 0,awayAddition,homeAddition
0,0.0,0.0
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
...,...,...
366,-1.0,0.0
367,-1.0,0.0
368,-1.0,0.0
369,-1.0,0.0


In [225]:
df["gameSeconds"] =df["gameSeconds"].astype(int)
df["gamePk"] =df["gamePk"].astype(int)

In [227]:
df.dtypes

result.event                  object
result.eventCode              object
result.eventTypeId            object
result.description            object
about.eventIdx                 int64
about.eventId                  int64
about.period                   int64
about.periodType              object
about.ordinalNum              object
about.periodTime              object
about.periodTimeRemaining     object
about.dateTime                object
about.goals.away               int64
about.goals.home               int64
players                       object
coordinates.x                float64
coordinates.y                float64
team.id                      float64
team.name                     object
team.link                     object
team.triCode                  object
result.secondaryType          object
result.penaltySeverity        object
result.penaltyMinutes        float64
result.strength.code          object
result.strength.name          object
result.gameWinningGoal        object
r

In [196]:
dfgame_group["penaltyAdditions"]=dfgame_group.apply(lambda row: penalty_time_dict(row),axis=1 )

dfgame_group["penaltyTimePeriods"] = dfgame_group["penaltyAdditions"].apply(lambda row: getPenaltyTimePeriods(row))

In [199]:
# dfgame_group[["gamePk","penaltyTimePeriods"]].to_ict()

{'gamePk': {0: 2017021065},
 'penaltyTimePeriods': {0: [(198, 353),
   (884, 1004),
   (1019, 1139),
   (1944, 2064),
   (3619, 3739)]}}

In [185]:
dc = dfgame_group["penaltyAdditions"].values[0]

In [None]:
# array([OrderedDict([(198, defaultdict(<class 'int'>, {'awayAddition': -1})), (233, defaultdict(<class 'int'>, {'awayAddition': -1})), (275, defaultdict(<class 'int'>, {'awayAddition': 1})), (310, defaultdict(<class 'int'>, {'homeAddition': -1})), (333, defaultdict(<class 'int'>, {'homeAddition': 1})), (353, defaultdict(<class 'int'>, {'awayAddition': 1})), (884, defaultdict(<class 'int'>, {'homeAddition': -1})), (1004, defaultdict(<class 'int'>, {'homeAddition': 1})), (1019, defaultdict(<class 'int'>, {'awayAddition': -1})), (1139, defaultdict(<class 'int'>, {'awayAddition': 1})), (1944, defaultdict(<class 'int'>, {'homeAddition': -1})), (2064, defaultdict(<class 'int'>, {'homeAddition': 1})), (3619, defaultdict(<class 'int'>, {'homeAddition': 0}))])],


In [186]:
dc

OrderedDict([(198, defaultdict(int, {'awayAddition': -1})),
             (233, defaultdict(int, {'awayAddition': -1})),
             (275, defaultdict(int, {'awayAddition': 1})),
             (310, defaultdict(int, {'homeAddition': -1})),
             (333, defaultdict(int, {'homeAddition': 1})),
             (353, defaultdict(int, {'awayAddition': 1})),
             (884, defaultdict(int, {'homeAddition': -1})),
             (1004, defaultdict(int, {'homeAddition': 1})),
             (1019, defaultdict(int, {'awayAddition': -1})),
             (1139, defaultdict(int, {'awayAddition': 1})),
             (1944, defaultdict(int, {'homeAddition': -1})),
             (2064, defaultdict(int, {'homeAddition': 1})),
             (3619, defaultdict(int, {'homeAddition': 1})),
             (3739, defaultdict(int, {'homeAddition': -1}))])

In [188]:
time_periods

[(198, 353), (884, 1004), (1019, 1139), (1944, 2064), (3619, 3739)]

In [187]:
time_periods = []
awaySum = 0
homeSum = 0
foundEnd=True
for time in dc:
    if foundEnd:
        startTime =time
        foundEnd=False
    awayAddition = dc[time]['awayAddition']
    homeAddition = dc[time]['homeAddition']
    awaySum += awayAddition
    homeSum += homeAddition

    if (awaySum==0 and homeSum==0):
        foundEnd = True
        endTime = time
        time_periods.append((startTime,endTime))
