In [3]:
#import comet_ml in the top of your file
from comet_ml import Experiment
import os
from copy import deepcopy
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from skopt import BayesSearchCV
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
import xgboost as xgb
import pandas as pd
from ift6758.features.feature_engineering2 import SeasonDataSetTwo
from ift6758.metrics import plot_metrics
import random


In [5]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
seed = 42
seed_everything(seed)

In [6]:
import json
fname = "../ift6758/data/JSON/2017020003.json"
with open(fname) as f:
    json_f = json.load(f)

In [7]:
df_init = pd.json_normalize(json_f,record_path=[['liveData','linescore','periods']],meta=['gamePk',['gameData','teams','away','name'],['gameData','teams','home','name']])
home_columns= ['periodType', 'startTime', 'endTime', 'num', 'ordinalNum', 'home.goals','home.shotsOnGoal', 'home.rinkSide', 'gamePk', 'gameData.teams.home.name']
away_columns= ['periodType', 'startTime', 'endTime', 'num', 'ordinalNum', 'away.goals','away.shotsOnGoal', 'away.rinkSide', 'gamePk', 'gameData.teams.away.name']
common_columns = ['periodType', 'startTime', 'endTime', 'num', 'ordinalNum', 'goals', 'shotsOnGoal', 'rinkSide', 'gamePk', 'teamname']
df_home = df_init[home_columns].rename(columns=dict(zip(home_columns,common_columns)))
df_home["isHomeTeam"]=True

df_away = df_init[away_columns].rename(columns=dict(zip(away_columns,common_columns)))
df_away["isHomeTeam"]=False
df_tot = pd.concat([df_home,df_away])
df_tot["goalCoordinates"]=df_tot.apply(lambda r: (-89,0) if r['rinkSide']=='right' else ((89,0) if r['rinkSide']=='left'  else np.nan),axis=1)
df_tot = df_tot.reset_index(drop=True)
map_columns = {"periodType": "about.periodType", "num": "about.period","teamname":"team.name" }
df_periods_to_join = df_tot[list(map_columns.keys())+["gamePk","goalCoordinates"]].rename(columns=map_columns)


In [8]:
df_tot

Unnamed: 0,periodType,startTime,endTime,num,ordinalNum,goals,shotsOnGoal,rinkSide,gamePk,teamname,isHomeTeam,goalCoordinates
0,REGULAR,2017-10-05T02:16:40Z,2017-10-05T02:55:58Z,1,1st,1,16,left,2017020003,Edmonton Oilers,True,"(89, 0)"
1,REGULAR,2017-10-05T03:14:27Z,2017-10-05T03:52:47Z,2,2nd,0,13,right,2017020003,Edmonton Oilers,True,"(-89, 0)"
2,REGULAR,2017-10-05T04:11:11Z,2017-10-05T04:50:35Z,3,3rd,2,16,left,2017020003,Edmonton Oilers,True,"(89, 0)"
3,REGULAR,2017-10-05T02:16:40Z,2017-10-05T02:55:58Z,1,1st,0,13,right,2017020003,Calgary Flames,False,"(-89, 0)"
4,REGULAR,2017-10-05T03:14:27Z,2017-10-05T03:52:47Z,2,2nd,0,9,left,2017020003,Calgary Flames,False,"(89, 0)"
5,REGULAR,2017-10-05T04:11:11Z,2017-10-05T04:50:35Z,3,3rd,0,5,right,2017020003,Calgary Flames,False,"(-89, 0)"


In [9]:
df_periods_to_join

Unnamed: 0,about.periodType,about.period,team.name,gamePk,goalCoordinates
0,REGULAR,1,Edmonton Oilers,2017020003,"(89, 0)"
1,REGULAR,2,Edmonton Oilers,2017020003,"(-89, 0)"
2,REGULAR,3,Edmonton Oilers,2017020003,"(89, 0)"
3,REGULAR,1,Calgary Flames,2017020003,"(-89, 0)"
4,REGULAR,2,Calgary Flames,2017020003,"(89, 0)"
5,REGULAR,3,Calgary Flames,2017020003,"(-89, 0)"


In [10]:


df = pd.json_normalize(json_f,record_path=[['liveData','plays','allPlays']],meta=['gamePk',['gameData','teams','home','name']])



In [11]:
pd.set_option('display.max_columns', None)


In [12]:
df

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,result.penaltySeverity,result.penaltyMinutes,gamePk,gameData.teams.home.name
0,Game Scheduled,EDM1,GAME_SCHEDULED,Game Scheduled,0,1,1,REGULAR,1st,00:00,20:00,2017-10-05T00:13:29Z,0,0,,,,,,,,,,,,,,,2017020003,Edmonton Oilers
1,Period Ready,EDM5,PERIOD_READY,Period Ready,1,5,1,REGULAR,1st,00:00,20:00,2017-10-05T02:14:57Z,0,0,,,,,,,,,,,,,,,2017020003,Edmonton Oilers
2,Period Start,EDM51,PERIOD_START,Period Start,2,51,1,REGULAR,1st,00:00,20:00,2017-10-05T02:16:40Z,0,0,,,,,,,,,,,,,,,2017020003,Edmonton Oilers
3,Faceoff,EDM52,FACEOFF,Ryan Nugent-Hopkins faceoff won against Sam Be...,3,52,1,REGULAR,1st,00:00,20:00,2017-10-05T02:16:40Z,0,0,"[{'player': {'id': 8476454, 'fullName': 'Ryan ...",0.0,0.0,22.0,Edmonton Oilers,/api/v1/teams/22,EDM,,,,,,,,2017020003,Edmonton Oilers
4,Stoppage,EDM8,STOP,Icing,4,8,1,REGULAR,1st,00:08,19:52,2017-10-05T02:17:36Z,0,0,,,,,,,,,,,,,,,2017020003,Edmonton Oilers
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
393,Faceoff,EDM811,FACEOFF,Ryan Nugent-Hopkins faceoff won against Mikael...,393,811,3,REGULAR,3rd,19:42,00:18,2017-10-05T04:50:07Z,0,3,"[{'player': {'id': 8476454, 'fullName': 'Ryan ...",-20.0,-22.0,22.0,Edmonton Oilers,/api/v1/teams/22,EDM,,,,,,,,2017020003,Edmonton Oilers
394,Period End,EDM758,PERIOD_END,End of 3rd Period,394,758,3,REGULAR,3rd,20:00,00:00,2017-10-05T04:50:35Z,0,3,,,,,,,,,,,,,,,2017020003,Edmonton Oilers
395,Period Official,EDM761,PERIOD_OFFICIAL,Period Official,395,761,3,REGULAR,3rd,20:00,00:00,2017-10-05T04:50:37Z,0,3,,,,,,,,,,,,,,,2017020003,Edmonton Oilers
396,Game End,EDM762,GAME_END,Game End,396,762,3,REGULAR,3rd,20:00,00:00,2017-10-05T04:50:39Z,0,3,,,,,,,,,,,,,,,2017020003,Edmonton Oilers


In [13]:
df["isHome"] = df[["team.name","gameData.teams.home.name"]].apply(lambda r: 1 if r["team.name"]==r["gameData.teams.home.name"] else 0,axis=1)

In [14]:
df['periodSeconds'] = pd.to_timedelta('00:' + df['about.periodTime'].astype(str)) #concat '00:' to have the format 'hh:mm:ss'
df['periodSeconds'] = df['periodSeconds'].dt.total_seconds()


In [15]:
def time_played(row):
    """
    return time in seconds
    """
    if row['about.period']>3:
        ## Overtime is 5 mins and It can go till Shootouts
        time_secs = 3600 + (row['about.period']-4)*300 + row['periodSeconds']
        return time_secs
    else:
        time_secs =  (row['about.period']-1)*1200 + row['periodSeconds']
        return time_secs

df["gameSeconds"] = df[["periodSeconds","about.period"]].apply(lambda r: time_played(r),axis=1)

In [16]:
df["result.event"].value_counts()

Shot               69
Faceoff            63
Hit                60
Stoppage           54
Blocked Shot       35
Giveaway           34
Missed Shot        31
Takeaway           29
Penalty             5
Period Start        3
Period Ready        3
Goal                3
Period End          3
Period Official     3
Game End            1
Game Scheduled      1
Game Official       1
Name: result.event, dtype: int64

In [17]:
dfpenaltyGoals = df.loc[(df["result.event"]=="Goal")|(df["result.event"]=="Penalty")]

In [18]:
dfpenaltyGoals

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,result.penaltySeverity,result.penaltyMinutes,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds
83,Goal,EDM97,GOAL,"Connor McDavid (1) Wrist Shot, assists: Leon D...",83,97,1,REGULAR,1st,11:01,08:59,2017-10-05T02:36:41Z,0,1,"[{'player': {'id': 8478402, 'fullName': 'Conno...",80.0,12.0,22.0,Edmonton Oilers,/api/v1/teams/22,EDM,Wrist Shot,EVEN,Even,True,False,,,2017020003,Edmonton Oilers,1,661.0,661.0
93,Penalty,EDM201,PENALTY,Kris Russell Holding the stick against Micheal...,93,201,1,REGULAR,1st,12:12,07:48,2017-10-05T02:40:49Z,0,1,"[{'player': {'id': 8471729, 'fullName': 'Kris ...",69.0,32.0,22.0,Edmonton Oilers,/api/v1/teams/22,EDM,Holding the stick,,,,,Minor,2.0,2017020003,Edmonton Oilers,1,732.0,732.0
117,Penalty,EDM213,PENALTY,Zack Kassian Fighting against Tanner Glass,117,213,1,REGULAR,1st,15:43,04:17,2017-10-05T02:48:07Z,0,1,"[{'player': {'id': 8475178, 'fullName': 'Zack ...",-8.0,8.0,22.0,Edmonton Oilers,/api/v1/teams/22,EDM,Fighting,,,,,Major,5.0,2017020003,Edmonton Oilers,1,943.0,943.0
118,Penalty,EDM215,PENALTY,Tanner Glass Fighting against Zack Kassian,118,215,1,REGULAR,1st,15:43,04:17,2017-10-05T02:48:28Z,0,1,"[{'player': {'id': 8470854, 'fullName': 'Tanne...",-8.0,16.0,20.0,Calgary Flames,/api/v1/teams/20,CGY,Fighting,,,,,Major,5.0,2017020003,Edmonton Oilers,0,943.0,943.0
129,Penalty,EDM223,PENALTY,Matthew Tkachuk Holding the stick against Adam...,129,223,1,REGULAR,1st,17:16,02:44,2017-10-05T02:51:28Z,0,1,"[{'player': {'id': 8479314, 'fullName': 'Matth...",-84.0,9.0,20.0,Calgary Flames,/api/v1/teams/20,CGY,Holding the stick,,,,,Minor,2.0,2017020003,Edmonton Oilers,0,1036.0,1036.0
194,Penalty,EDM406,PENALTY,Dougie Hamilton Hooking against Leon Draisaitl,194,406,2,REGULAR,2nd,06:23,13:37,2017-10-05T03:24:44Z,0,1,"[{'player': {'id': 8476462, 'fullName': 'Dougi...",-84.0,-7.0,20.0,Calgary Flames,/api/v1/teams/20,CGY,Hooking,,,,,Minor,2.0,2017020003,Edmonton Oilers,0,383.0,1583.0
327,Goal,EDM623,GOAL,"Connor McDavid (2) Wrist Shot, assists: none",327,623,3,REGULAR,3rd,08:07,11:53,2017-10-05T04:25:37Z,0,2,"[{'player': {'id': 8478402, 'fullName': 'Conno...",77.0,-5.0,22.0,Edmonton Oilers,/api/v1/teams/22,EDM,Wrist Shot,EVEN,Even,False,False,,,2017020003,Edmonton Oilers,1,487.0,2887.0
389,Goal,EDM809,GOAL,"Connor McDavid (3) Wrist Shot, assists: Leon D...",389,809,3,REGULAR,3rd,19:01,00:59,2017-10-05T04:46:23Z,0,3,"[{'player': {'id': 8478402, 'fullName': 'Conno...",68.0,1.0,22.0,Edmonton Oilers,/api/v1/teams/22,EDM,Wrist Shot,EVEN,Even,False,True,,,2017020003,Edmonton Oilers,1,1141.0,3541.0


In [19]:
a=dfpenaltyGoals[["gamePk","result.event","about.periodType","isHome","gameSeconds","result.penaltyMinutes"]].groupby(["gamePk","result.event"]).agg(lambda x: list(x)).to_dict()
# apply(list)
# [["about.periodType","isHome","gameSeconds"]].apply(list)
# .to_dict()


In [20]:
a

{'about.periodType': {(2017020003, 'Goal'): ['REGULAR', 'REGULAR', 'REGULAR'],
  (2017020003, 'Penalty'): ['REGULAR',
   'REGULAR',
   'REGULAR',
   'REGULAR',
   'REGULAR']},
 'isHome': {(2017020003, 'Goal'): [1, 1, 1],
  (2017020003, 'Penalty'): [1, 1, 0, 0, 0]},
 'gameSeconds': {(2017020003, 'Goal'): [661.0, 2887.0, 3541.0],
  (2017020003, 'Penalty'): [732.0,
   943.0000000000001,
   943.0000000000001,
   1036.0,
   1583.0]},
 'result.penaltyMinutes': {(2017020003, 'Goal'): [nan, nan, nan],
  (2017020003, 'Penalty'): [2.0, 5.0, 5.0, 2.0, 2.0]}}

In [21]:
dfgameevent_group=dfpenaltyGoals[["gamePk","result.event","about.periodType","isHome","gameSeconds","result.penaltyMinutes"]].groupby(["gamePk","result.event"]).agg(lambda x: list(x)).reset_index()
dfgame_group = dfgameevent_group.groupby(["gamePk"]).agg(lambda x: list(x)).reset_index()

In [22]:
dfgameevent_group

Unnamed: 0,gamePk,result.event,about.periodType,isHome,gameSeconds,result.penaltyMinutes
0,2017020003,Goal,"[REGULAR, REGULAR, REGULAR]","[1, 1, 1]","[661.0, 2887.0, 3541.0]","[nan, nan, nan]"
1,2017020003,Penalty,"[REGULAR, REGULAR, REGULAR, REGULAR, REGULAR]","[1, 1, 0, 0, 0]","[732.0, 943.0000000000001, 943.0000000000001, ...","[2.0, 5.0, 5.0, 2.0, 2.0]"


In [23]:
dfgame_group

Unnamed: 0,gamePk,result.event,about.periodType,isHome,gameSeconds,result.penaltyMinutes
0,2017020003,"[Goal, Penalty]","[[REGULAR, REGULAR, REGULAR], [REGULAR, REGULA...","[[1, 1, 1], [1, 1, 0, 0, 0]]","[[661.0, 2887.0, 3541.0], [732.0, 943.00000000...","[[nan, nan, nan], [2.0, 5.0, 5.0, 2.0, 2.0]]"


In [24]:
from collections import defaultdict

a=defaultdict(float)
a["l"]+=2


In [25]:
a.items()

dict_items([('l', 2.0)])

In [26]:
a=np.array([1,2])
b=np.array([1,0]).astype('bool')
a[np.logical_not(b)]
# b

array([2])

In [27]:
import collections

In [28]:
# penalty_addition_dict

In [40]:
l=[2]
if l:
    print(2)

2


In [58]:
def penalty_time_dict(row):
    events = row['result.event']
    if events[0]=='Penalty':
        penalty_index=0
        scoring_index=1
    else:
        penalty_index=1
        scoring_index=0

    ## Source for writing this default dict: https://stackoverflow.com/a/5029958
    penalty_addition_dict = defaultdict(lambda: defaultdict(int))  ## Contains time: {"awayAddition":+2,"homeAddition":-2}
        
    penalty_periodtypes = np.array(row['about.periodType'][penalty_index])
    penalty_ishome = np.array(row['isHome'][penalty_index]).astype('bool')
    scoring_ishome = np.array(row['isHome'][scoring_index]).astype('bool')
    penalty_gameseconds= np.array(row['gameSeconds'][penalty_index]).astype(int)
    scoring_gameseconds= np.array(row['gameSeconds'][scoring_index]).astype(int)
    penalty_minutes = np.array(row['result.penaltyMinutes'][penalty_index]).astype(int)

    #home
    penalty_periodtypes_home = penalty_periodtypes[penalty_ishome]
    penalty_gameseconds_home = penalty_gameseconds[penalty_ishome]
    scoring_gameseconds_home = scoring_gameseconds[scoring_ishome]
    penalty_minutes_home = penalty_minutes[penalty_ishome]

    #away
    penalty_isaway = np.logical_not(penalty_ishome)
    penalty_periodtypes_away = penalty_periodtypes[penalty_isaway]
    penalty_gameseconds_away= penalty_gameseconds[penalty_isaway]
    scoring_gameseconds_away = scoring_gameseconds[np.logical_not(scoring_ishome)]
    penalty_minutes_away = penalty_minutes[penalty_isaway]
   

    # Home Penalties
    scoring_index = 0
    for (penalty_minute,penalty_periodtype,penalty_gamesecond) in zip(penalty_minutes_home,penalty_periodtypes_home,penalty_gameseconds_home):
        penalty_seconds = int(penalty_minute*60)
        estimated_penalty_end_time= int(penalty_gamesecond+penalty_seconds)
        if penalty_periodtype!="OVERTIME":
            penalty_addition_dict[penalty_gamesecond]["homeAddition"] -= 1
        else:
            penalty_addition_dict[penalty_gamesecond]["awayAddition"] += 1
        if penalty_seconds==120.0 and penalty_periodtype!="OVERTIME":
            while scoring_index<len(scoring_gameseconds_away)-1 and scoring_gameseconds_away[scoring_index]<=penalty_gamesecond:
                scoring_index+=1
            if scoring_index<=len(scoring_gameseconds_away)-1: ## No Goals by away
                nearest_next_goalsecond = scoring_gameseconds_away[scoring_index]
            else:
                nearest_next_goalsecond=-1

            if nearest_next_goalsecond>penalty_gamesecond and nearest_next_goalsecond<=estimated_penalty_end_time:
                penalty_addition_dict[nearest_next_goalsecond]["homeAddition"]+=1
                if scoring_index<len(scoring_gameseconds_home)-1:
                    scoring_index+=1

            else:
                penalty_addition_dict[estimated_penalty_end_time]["homeAddition"]+=1
        else:
            if penalty_periodtype!="OVERTIME":
                penalty_addition_dict[estimated_penalty_end_time]["homeAddition"]+=1
            else:
                penalty_addition_dict[penalty_gamesecond]["awayAddition"] -= 1

    # Away Penalties (TODO: Form a function for both away and home, if you get time)
   
    # Away Penalties
    scoring_index = 0
    for (penalty_minute,penalty_periodtype,penalty_gamesecond) in zip(penalty_minutes_away,penalty_periodtypes_away,penalty_gameseconds_away):
        penalty_seconds = int(penalty_minute*60)
        estimated_penalty_end_time= int(penalty_gamesecond+penalty_seconds)
        if penalty_periodtype!="OVERTIME":
            penalty_addition_dict[penalty_gamesecond]["awayAddition"] -= 1
        else:

            penalty_addition_dict[penalty_gamesecond]["homeAddition"] += 1

        if penalty_seconds==120.0 and penalty_periodtype!="OVERTIME":
            while  scoring_index<len(scoring_gameseconds_home)-1 and scoring_gameseconds_home[scoring_index]<=penalty_gamesecond :
                scoring_index+=1
            if scoring_index<=len(scoring_gameseconds_home)-1:
                nearest_next_goalsecond = scoring_gameseconds_home[scoring_index]
            else:
                nearest_next_goalsecond=-1
            if nearest_next_goalsecond>penalty_gamesecond and nearest_next_goalsecond<=estimated_penalty_end_time:
                penalty_addition_dict[nearest_next_goalsecond]["awayAddition"]+=1                 
                if scoring_index<len(scoring_gameseconds_home)-1:
                    scoring_index+=1
            else:
                penalty_addition_dict[estimated_penalty_end_time]["awayAddition"]+=1
        else:
            if penalty_periodtype!="OVERTIME":
                penalty_addition_dict[estimated_penalty_end_time]["awayAddition"]+=1
            else:
                penalty_addition_dict[estimated_penalty_end_time]["homeAddition"] -= 1
    ordered_penalties = collections.OrderedDict(sorted(penalty_addition_dict.items()))

    

    return ordered_penalties


def getPenaltyTimePeriods(row):
    dc=row
    penalty_time_periods = []
    awaySum = 0
    homeSum = 0
    foundEnd=True
    for time in dc:
        if foundEnd:
            startTime =time
            foundEnd=False
        awayAddition = dc[time]['awayAddition']
        homeAddition = dc[time]['homeAddition']
        awaySum += awayAddition
        homeSum += homeAddition

        if (awaySum==0 and homeSum==0):
            foundEnd = True
            endTime = time
            penalty_time_periods.append((startTime,endTime))
    return penalty_time_periods


    
    


In [59]:
# dfgame_group["penaltyAdditions"] = 

In [60]:

dfgame_group["gameSeconds"].to_numpy()
# [["gamePk","penaltyAdditions"]].set_index(["gamePk"]).to_dict()["penaltyAdditions"]


array([list([[661.0, 2887.0, 3541.0], [732.0, 943.0000000000001, 943.0000000000001, 1036.0, 1583.0]])],
      dtype=object)

In [61]:
dfgame_group["penaltyAdditions"]=dfgame_group.apply(lambda row: penalty_time_dict(row),axis=1 )

dfgame_group["penaltyTimePeriods"] = dfgame_group["penaltyAdditions"].apply(lambda row: getPenaltyTimePeriods(row))

  if penalty_seconds==120.0 and penalty_periodtype!="OVERTIME" and scoring_gameseconds_away:


In [62]:
dfgame_group["penaltyAdditions"].to_numpy()

array([OrderedDict([(732, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (852, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (943, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': -1})), (1036, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (1156, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (1243, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 1})), (1583, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (1703, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0}))])],
      dtype=object)

In [31]:
a=dfgame_group[["gamePk","penaltyAdditions"]].set_index(["gamePk"]).to_dict()["penaltyAdditions"]

In [32]:
a

{2017021065: OrderedDict([(198,
               defaultdict(int, {'awayAddition': -1, 'homeAddition': 0})),
              (233, defaultdict(int, {'awayAddition': -1, 'homeAddition': 0})),
              (275, defaultdict(int, {'awayAddition': 1, 'homeAddition': 0})),
              (310, defaultdict(int, {'homeAddition': -1, 'awayAddition': 0})),
              (333, defaultdict(int, {'homeAddition': 1, 'awayAddition': 0})),
              (353, defaultdict(int, {'awayAddition': 1, 'homeAddition': 0})),
              (884, defaultdict(int, {'homeAddition': -1, 'awayAddition': 0})),
              (1004, defaultdict(int, {'homeAddition': 1, 'awayAddition': 0})),
              (1019,
               defaultdict(int, {'awayAddition': -1, 'homeAddition': 0})),
              (1139, defaultdict(int, {'awayAddition': 1, 'homeAddition': 0})),
              (1944,
               defaultdict(int, {'homeAddition': -1, 'awayAddition': 0})),
              (2064, defaultdict(int, {'homeAddition': 1, 'awayA

In [33]:
a.items()

dict_items([(2017021065, OrderedDict([(198, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (233, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (275, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (310, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (333, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (353, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (884, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (1004, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (1019, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (1139, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (1944, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (2064, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (3619, defaultdict(<class 'int'>, {'homeAddition': 

In [34]:
j = pd.DataFrame.from_dict({(i,j): a[i][j] 
                           for i in a.keys() 
                           for j in a[i].keys()},
                       orient='index')
                       
j.index = j.index.set_names(['gamePk','gameSeconds'])



In [35]:
m=j.reset_index()

In [36]:
m

Unnamed: 0,gamePk,gameSeconds,awayAddition,homeAddition
0,2017021065,198,-1,0
1,2017021065,233,-1,0
2,2017021065,275,1,0
3,2017021065,310,0,-1
4,2017021065,333,0,1
5,2017021065,353,1,0
6,2017021065,884,0,-1
7,2017021065,1004,0,1
8,2017021065,1019,-1,0
9,2017021065,1139,1,0


In [37]:
df

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0,1,1,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1,5,1,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
2,Period Start,WSH8,PERIOD_START,Period Start,2,8,1,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0,0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3,9,1,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0,0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals,0,0.0,0.0
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4,51,1,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0,0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1,17.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366,Faceoff,WSH781,FACEOFF,Nicklas Backstrom faceoff won against Bryan Li...,366,781,4,OVERTIME,OT,03:28,01:32,2018-03-13T01:49:39Z,2,2,"[{'player': {'id': 8473563, 'fullName': 'Nickl...",69.0,22.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1,208.0,3808.0
367,Goal,WSH782,GOAL,"Evgeny Kuznetsov (21) Wrist Shot, assists: Cha...",367,782,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:50:50Z,2,3,"[{'player': {'id': 8475744, 'fullName': 'Evgen...",74.0,1.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,True,False,2017021065,Washington Capitals,1,251.0,3851.0
368,Period End,WSH783,PERIOD_END,End of OT,368,783,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:32Z,2,3,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,251.0,3851.0
369,Period Official,WSH786,PERIOD_OFFICIAL,Period Official,369,786,4,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:33Z,2,3,,,,,,,,,,,,,,,2017021065,Washington Capitals,0,251.0,3851.0


In [38]:
dfs = pd.concat([df,m], axis=0, ignore_index=True)

# f.concat(m,on=["gamePk","gameSeconds"],how="outer")d

In [39]:
dfs

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0.0,1.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,,
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1.0,5.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,,
2,Period Start,WSH8,PERIOD_START,Period Start,2.0,8.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,,
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3.0,9.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,,
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4.0,51.0,1.0,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0.0,0.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1.0,17.0,17.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,1139.0,1.0,0.0
381,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,1944.0,0.0,-1.0
382,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,2064.0,0.0,1.0
383,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,3619.0,0.0,1.0


In [224]:
m.dtypes

gamePk          int64
gameSeconds     int64
awayAddition    int64
homeAddition    int64
dtype: object

In [230]:
dfs.head()

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0.0,1.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1.0,5.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
2,Period Start,WSH8,PERIOD_START,Period Start,2.0,8.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3.0,9.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0,,
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4.0,51.0,1.0,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0.0,0.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1.0,17.0,17,,


In [40]:
dfsort = dfs.sort_values(['gamePk','gameSeconds'])
dfsort.loc[:,["awayAddition","homeAddition"]]=dfsort.loc[:,["awayAddition","homeAddition"]].fillna(0)
# dfsort.loc[:,["awayAddition","homeAddition"]]
# .fillna(0,inplace=True)

In [41]:
dfsort.loc[(dfsort['result.event']!="Goal")&(dfsort['result.event']!="Penalty")&(dfsort['result.event'].notna())&((dfsort['awayAddition']!=0)|(dfsort['homeAddition']!=0)),["awayAddition","homeAddition"]]
# =0

Unnamed: 0,awayAddition,homeAddition


In [42]:
dfsort.loc[(dfsort['awayAddition']!=0)]

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition
371,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,198.0,-1.0,0.0
372,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,233.0,-1.0,0.0
373,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,275.0,1.0,0.0
376,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,353.0,1.0,0.0
379,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,1019.0,-1.0,0.0
380,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,1139.0,1.0,0.0


In [None]:
# dict_items([(2017021065, OrderedDict([(198, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (233, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (275, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (310, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (333, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (353, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (884, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (1004, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (1019, defaultdict(<class 'int'>, {'awayAddition': -1, 'homeAddition': 0})), (1139, defaultdict(<class 'int'>, {'awayAddition': 1, 'homeAddition': 0})), (1944, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0})), (2064, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (3619, defaultdict(<class 'int'>, {'homeAddition': 1, 'awayAddition': 0})), (3739, defaultdict(<class 'int'>, {'homeAddition': -1, 'awayAddition': 0}))]))])

In [46]:
dfsort[["awayCum","homeCum"]]= dfsort.groupby(["gamePk"])[["awayAddition","homeAddition"]].cumsum()

In [51]:
dfsort

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition,awayCum,homeCum
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0.0,1.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1.0,5.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Period Start,WSH8,PERIOD_START,Period Start,2.0,8.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3.0,9.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4.0,51.0,1.0,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0.0,0.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1.0,17.0,17.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366,Faceoff,WSH781,FACEOFF,Nicklas Backstrom faceoff won against Bryan Li...,366.0,781.0,4.0,OVERTIME,OT,03:28,01:32,2018-03-13T01:49:39Z,2.0,2.0,"[{'player': {'id': 8473563, 'fullName': 'Nickl...",69.0,22.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1.0,208.0,3808.0,0.0,0.0,0.0,0.0
367,Goal,WSH782,GOAL,"Evgeny Kuznetsov (21) Wrist Shot, assists: Cha...",367.0,782.0,4.0,OVERTIME,OT,04:11,00:49,2018-03-13T01:50:50Z,2.0,3.0,"[{'player': {'id': 8475744, 'fullName': 'Evgen...",74.0,1.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,True,False,2017021065,Washington Capitals,1.0,251.0,3851.0,0.0,0.0,0.0,0.0
368,Period End,WSH783,PERIOD_END,End of OT,368.0,783.0,4.0,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:32Z,2.0,3.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,251.0,3851.0,0.0,0.0,0.0,0.0
369,Period Official,WSH786,PERIOD_OFFICIAL,Period Official,369.0,786.0,4.0,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:33Z,2.0,3.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,251.0,3851.0,0.0,0.0,0.0,0.0


In [60]:
dfsort["homePlayers"] = dfsort[["homeCum","about.periodType"]].apply(lambda r: 5+r["homeCum"] if r["about.periodType"]=="REGULAR" else 3+r["homeCum"],axis=1)
dfsort["awayPlayers"] = dfsort[["awayCum","about.periodType"]].apply(lambda r: 5+r["awayCum"] if r["about.periodType"]=="REGULAR" else 3+r["awayCum"],axis=1)

In [82]:
dfsort[dfsort]

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition,awayCum,homeCum,homePlayers,awayPlayers,penaltyTime
0,Game Scheduled,WSH1,GAME_SCHEDULED,Game Scheduled,0.0,1.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T22:07:35Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
1,Period Ready,WSH5,PERIOD_READY,Period Ready,1.0,5.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:07:16Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
2,Period Start,WSH8,PERIOD_START,Period Start,2.0,8.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
3,Faceoff,WSH9,FACEOFF,Bryan Little faceoff won against Evgeny Kuznetsov,3.0,9.0,1.0,REGULAR,1st,00:00,20:00,2018-03-12T23:08:21Z,0.0,0.0,"[{'player': {'id': 8473412, 'fullName': 'Bryan...",0.0,0.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,,,,,,,,2017021065,Washington Capitals,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
4,Hit,WSH51,HIT,Alex Ovechkin hit Dustin Byfuglien,4.0,51.0,1.0,REGULAR,1st,00:17,19:43,2018-03-12T23:09:00Z,0.0,0.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-94.0,35.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1.0,17.0,17.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366,Faceoff,WSH781,FACEOFF,Nicklas Backstrom faceoff won against Bryan Li...,366.0,781.0,4.0,OVERTIME,OT,03:28,01:32,2018-03-13T01:49:39Z,2.0,2.0,"[{'player': {'id': 8473563, 'fullName': 'Nickl...",69.0,22.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,,,,,,,,2017021065,Washington Capitals,1.0,208.0,3808.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0
367,Goal,WSH782,GOAL,"Evgeny Kuznetsov (21) Wrist Shot, assists: Cha...",367.0,782.0,4.0,OVERTIME,OT,04:11,00:49,2018-03-13T01:50:50Z,2.0,3.0,"[{'player': {'id': 8475744, 'fullName': 'Evgen...",74.0,1.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,True,False,2017021065,Washington Capitals,1.0,251.0,3851.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0
368,Period End,WSH783,PERIOD_END,End of OT,368.0,783.0,4.0,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:32Z,2.0,3.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,251.0,3851.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0
369,Period Official,WSH786,PERIOD_OFFICIAL,Period Official,369.0,786.0,4.0,OVERTIME,OT,04:11,00:49,2018-03-13T01:51:33Z,2.0,3.0,,,,,,,,,,,,,,,2017021065,Washington Capitals,0.0,251.0,3851.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0


In [84]:
a

{'penaltyTimePeriods': {2017021065: [(198, 353),
   (884, 1004),
   (1019, 1139),
   (1944, 2064),
   (3619, 3739)]}}

In [83]:
dfsort[dfsort["result.strength.name"].notna()]

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition,awayCum,homeCum,homePlayers,awayPlayers,penaltyTime
39,Goal,WSH29,GOAL,"Alex Ovechkin (41) Wrist Shot, assists: John C...",39.0,29.0,1.0,REGULAR,1st,04:35,15:25,2018-03-12T23:16:38Z,0.0,1.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",-57.0,-20.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,PPG,Power Play,False,False,2017021065,Washington Capitals,1.0,275.0,275.0,0.0,0.0,-2.0,0.0,5.0,3.0,77.0
45,Goal,WSH32,GOAL,"Nikolaj Ehlers (27) Wrist Shot, assists: none",45.0,32.0,1.0,REGULAR,1st,05:33,14:27,2018-03-12T23:19:06Z,1.0,1.0,"[{'player': {'id': 8477940, 'fullName': 'Nikol...",77.0,-3.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Wrist Shot,,,EVEN,Even,False,False,2017021065,Washington Capitals,0.0,333.0,333.0,0.0,0.0,-1.0,-1.0,4.0,4.0,135.0
151,Goal,WSH247,GOAL,"Alex Ovechkin (42) Wrist Shot, assists: Tom Wi...",151.0,247.0,2.0,REGULAR,2nd,03:53,16:07,2018-03-13T00:14:42Z,1.0,2.0,"[{'player': {'id': 8471214, 'fullName': 'Alex ...",85.0,8.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,False,False,2017021065,Washington Capitals,1.0,233.0,1433.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
272,Goal,WSH623,GOAL,"Patrik Laine (41) Wrist Shot, assists: Paul St...",272.0,623.0,3.0,REGULAR,3rd,05:02,14:58,2018-03-13T01:13:54Z,2.0,2.0,"[{'player': {'id': 8479339, 'fullName': 'Patri...",80.0,16.0,52.0,Winnipeg Jets,/api/v1/teams/52,WPG,Wrist Shot,,,EVEN,Even,False,False,2017021065,Washington Capitals,0.0,302.0,2702.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
367,Goal,WSH782,GOAL,"Evgeny Kuznetsov (21) Wrist Shot, assists: Cha...",367.0,782.0,4.0,OVERTIME,OT,04:11,00:49,2018-03-13T01:50:50Z,2.0,3.0,"[{'player': {'id': 8475744, 'fullName': 'Evgen...",74.0,1.0,15.0,Washington Capitals,/api/v1/teams/15,WSH,Wrist Shot,,,EVEN,Even,True,False,2017021065,Washington Capitals,1.0,251.0,3851.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0


In [45]:
dfsort.groupby(["gamePk"])[["awayAddition","homeAddition"]].cumsum().to_csv("dummy.csv")

In [225]:
df["gameSeconds"] =df["gameSeconds"].astype(int)
df["gamePk"] =df["gamePk"].astype(int)

In [227]:
df.dtypes

result.event                  object
result.eventCode              object
result.eventTypeId            object
result.description            object
about.eventIdx                 int64
about.eventId                  int64
about.period                   int64
about.periodType              object
about.ordinalNum              object
about.periodTime              object
about.periodTimeRemaining     object
about.dateTime                object
about.goals.away               int64
about.goals.home               int64
players                       object
coordinates.x                float64
coordinates.y                float64
team.id                      float64
team.name                     object
team.link                     object
team.triCode                  object
result.secondaryType          object
result.penaltySeverity        object
result.penaltyMinutes        float64
result.strength.code          object
result.strength.name          object
result.gameWinningGoal        object
r

In [64]:
dfgame_group.columns

Index(['gamePk', 'result.event', 'about.periodType', 'isHome', 'gameSeconds',
       'result.penaltyMinutes', 'penaltyAdditions', 'penaltyTimePeriods'],
      dtype='object')

In [66]:
dfsort.shape

(385, 39)

In [68]:
dfgame_group[["gamePk","penaltyTimePeriods"]].to_dict()

{'gamePk': {0: 2017021065},
 'penaltyTimePeriods': {0: [(198, 353),
   (884, 1004),
   (1019, 1139),
   (1944, 2064),
   (3619, 3739)]}}

In [69]:
a=dfgame_group[["gamePk","penaltyTimePeriods"]].set_index(["gamePk"]).to_dict()
# ["penaltyAdditions"]
a

{'penaltyTimePeriods': {2017021065: [(198, 353),
   (884, 1004),
   (1019, 1139),
   (1944, 2064),
   (3619, 3739)]}}

In [76]:
# dfsort.merge(dfgame_group[["gamePk","penaltyTimePeriods"]],how="inner",on="gamePk")

In [73]:
def getPenaltyTime(row):
    gameSec = row["gameSeconds"]
    gamePk = row["gamePk"]
    penalty_periods = a['penaltyTimePeriods'][gamePk]
    for period in penalty_periods:
        if gameSec>period[0] and gameSec<=period[1]:
            return gameSec-period[0]
        elif gameSec < period[0]:
            return 0
    return 0

In [74]:
a

{'penaltyTimePeriods': {2017021065: [(198, 353),
   (884, 1004),
   (1019, 1139),
   (1944, 2064),
   (3619, 3739)]}}

In [79]:
dfsort[dfsort["about.period"].isna()]

Unnamed: 0,result.event,result.eventCode,result.eventTypeId,result.description,about.eventIdx,about.eventId,about.period,about.periodType,about.ordinalNum,about.periodTime,about.periodTimeRemaining,about.dateTime,about.goals.away,about.goals.home,players,coordinates.x,coordinates.y,team.id,team.name,team.link,team.triCode,result.secondaryType,result.penaltySeverity,result.penaltyMinutes,result.strength.code,result.strength.name,result.gameWinningGoal,result.emptyNet,gamePk,gameData.teams.home.name,isHome,periodSeconds,gameSeconds,awayAddition,homeAddition,awayCum,homeCum,homePlayers,awayPlayers,penaltyTime
371,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,198.0,-1.0,0.0,-1.0,0.0,3.0,2.0,0.0
372,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,233.0,-1.0,0.0,-2.0,0.0,3.0,1.0,35.0
373,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,275.0,1.0,0.0,-1.0,0.0,3.0,2.0,77.0
374,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,310.0,0.0,-1.0,-1.0,-1.0,2.0,2.0,112.0
375,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,333.0,0.0,1.0,-1.0,0.0,3.0,2.0,135.0
376,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,353.0,1.0,0.0,0.0,0.0,3.0,3.0,155.0
377,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,884.0,0.0,-1.0,0.0,-1.0,2.0,3.0,0.0
378,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,1004.0,0.0,1.0,0.0,0.0,3.0,3.0,120.0
379,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,1019.0,-1.0,0.0,-1.0,0.0,3.0,2.0,0.0
380,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2017021065,,,,1139.0,1.0,0.0,0.0,0.0,3.0,3.0,120.0


In [75]:
dfsort["penaltyTime"] = dfsort.apply(lambda r: getPenaltyTime(r),axis=1)

In [80]:
dfsort[]

Index(['result.event', 'result.eventCode', 'result.eventTypeId',
       'result.description', 'about.eventIdx', 'about.eventId', 'about.period',
       'about.periodType', 'about.ordinalNum', 'about.periodTime',
       'about.periodTimeRemaining', 'about.dateTime', 'about.goals.away',
       'about.goals.home', 'players', 'coordinates.x', 'coordinates.y',
       'team.id', 'team.name', 'team.link', 'team.triCode',
       'result.secondaryType', 'result.penaltySeverity',
       'result.penaltyMinutes', 'result.strength.code', 'result.strength.name',
       'result.gameWinningGoal', 'result.emptyNet', 'gamePk',
       'gameData.teams.home.name', 'isHome', 'periodSeconds', 'gameSeconds',
       'awayAddition', 'homeAddition', 'awayCum', 'homeCum', 'homePlayers',
       'awayPlayers', 'penaltyTime'],
      dtype='object')

In [199]:
# dfgame_group[["gamePk","penaltyTimePeriods"]].to_ict()

{'gamePk': {0: 2017021065},
 'penaltyTimePeriods': {0: [(198, 353),
   (884, 1004),
   (1019, 1139),
   (1944, 2064),
   (3619, 3739)]}}

In [185]:
dc = dfgame_group["penaltyAdditions"].values[0]

In [None]:
# array([OrderedDict([(198, defaultdict(<class 'int'>, {'awayAddition': -1})), (233, defaultdict(<class 'int'>, {'awayAddition': -1})), (275, defaultdict(<class 'int'>, {'awayAddition': 1})), (310, defaultdict(<class 'int'>, {'homeAddition': -1})), (333, defaultdict(<class 'int'>, {'homeAddition': 1})), (353, defaultdict(<class 'int'>, {'awayAddition': 1})), (884, defaultdict(<class 'int'>, {'homeAddition': -1})), (1004, defaultdict(<class 'int'>, {'homeAddition': 1})), (1019, defaultdict(<class 'int'>, {'awayAddition': -1})), (1139, defaultdict(<class 'int'>, {'awayAddition': 1})), (1944, defaultdict(<class 'int'>, {'homeAddition': -1})), (2064, defaultdict(<class 'int'>, {'homeAddition': 1})), (3619, defaultdict(<class 'int'>, {'homeAddition': 0}))])],


In [186]:
dc

OrderedDict([(198, defaultdict(int, {'awayAddition': -1})),
             (233, defaultdict(int, {'awayAddition': -1})),
             (275, defaultdict(int, {'awayAddition': 1})),
             (310, defaultdict(int, {'homeAddition': -1})),
             (333, defaultdict(int, {'homeAddition': 1})),
             (353, defaultdict(int, {'awayAddition': 1})),
             (884, defaultdict(int, {'homeAddition': -1})),
             (1004, defaultdict(int, {'homeAddition': 1})),
             (1019, defaultdict(int, {'awayAddition': -1})),
             (1139, defaultdict(int, {'awayAddition': 1})),
             (1944, defaultdict(int, {'homeAddition': -1})),
             (2064, defaultdict(int, {'homeAddition': 1})),
             (3619, defaultdict(int, {'homeAddition': 1})),
             (3739, defaultdict(int, {'homeAddition': -1}))])

In [188]:
time_periods

[(198, 353), (884, 1004), (1019, 1139), (1944, 2064), (3619, 3739)]

In [187]:
time_periods = []
awaySum = 0
homeSum = 0
foundEnd=True
for time in dc:
    if foundEnd:
        startTime =time
        foundEnd=False
    awayAddition = dc[time]['awayAddition']
    homeAddition = dc[time]['homeAddition']
    awaySum += awayAddition
    homeSum += homeAddition

    if (awaySum==0 and homeSum==0):
        foundEnd = True
        endTime = time
        time_periods.append((startTime,endTime))
