# Ingénierie des caractéristiques II (15%)

In [2]:
import numpy as np
import pandas as pd
import datetime

In [None]:
#Helper Function
def role(z):
    shooter = []
    assist = []
    guardian = []

    for player in z:
        if player['playerType'] in ["Shooter", 'Scorer']:
            shooter.append((player['player']['fullName'], player['player']['id']))
        elif player['playerType'] == 'Assist':
            assist.append((player['player']['fullName'], player['player']['id']))
        elif player['playerType'] == 'Goalie':
            guardian.append((player['player']['fullName'], player['player']['id']))
        else:
            print("new player type: "+ player['playerType'])
    if len(guardian) == 0:
        guardian = ("Empty", 0)
    else:
        guardian = guardian[0]
    return [shooter, assist, guardian]


def stringCoordx(strCoord):
    l = strCoord.split(",")
    return float(l[0].split(" ")[-1])

def stringCoordy(strCoord):
    l = strCoord.split(",")
    return float(l[1].split(" ")[-1][:-1])


def clean_with_PreviousEvent(json_path):
    """
    json_path: path vers le fichier json

    clean et sauvegarde un df clean
    """
    df = pd.read_json(json_path)

    teamdf = df['gameData'].apply(pd.Series)['teams'].apply(pd.Series)

    df = df['liveData'].apply(pd.Series)['plays'].apply(pd.Series)['allPlays']
    df = pd.DataFrame(df)
    df = df.explode("allPlays")
    df = df.rename(columns={'allPlays': "play"})

    #add the wayTeam & homeTeam columns
    df['awayTeam'] = teamdf['away'].apply(pd.Series)['name']
    df['homeTeam'] = teamdf['home'].apply(pd.Series)['name']

    df = df['play'].apply(pd.Series)
    df = pd.concat([df, df['result'].apply(pd.Series)], axis=1)
    df = df.drop('result', axis=1)
    df = pd.concat([df, df['about'].apply(pd.Series)], axis=1)
    df = df.drop('about', axis=1)
    df['previousEventTypeId'] = df['eventTypeId'].shift(1)
    df.loc[df['eventId'] == 1, 'previousEventTypeId'] = None
    df['previousDescription'] = df['description'].shift(1)
    df.loc[df['eventId'] == 1, 'previousDescription'] = None
    df['previousCoordinates'] = df['coordinates'].shift(1)
    df.loc[df['eventId'] == 1, 'previousCoordinates'] = None
    df['previousPeriod'] = df['period'].shift(1)
    df.loc[df['eventId'] == 1, 'previousPeriod'] = None
    df['previousPeriodTime'] = df['periodTime'].shift(1)
    df.loc[df['eventId'] == 1, 'previousPeriodTime'] = None
    df['previousDateTime'] = df['dateTime'].shift(1)
    df.loc[df['eventId'] == 1, 'previousDateTime'] = None
    df = df[df['event'].isin(['Shot', 'Goal'])]

    df_inbetween = df[[ 'dateTime', 'period', 'periodTime', 'event', 'eventTypeId', 'eventId', 'description',  'team', 'secondaryType', 'strength', 'coordinates', 'goals', 'previousEventTypeId', 'previousDescription', 'previousCoordinates', 'previousPeriod', 'previousPeriodTime', 'previousDateTime']].copy()
    df_inbetween = pd.concat([df_inbetween, df_inbetween['team'].apply(pd.Series).iloc[:,[1,3]]], axis=1)
    df_inbetween = pd.concat([df_inbetween, df['players'].apply(role).apply(pd.Series)], axis=1)
    df_inbetween = df_inbetween.rename(columns={0: "tireur/ID", 1: "assist/ID", 2: "guardian/ID", 'name': 'attackingTeamName', 'triCode': 'attackingTeamTriCode', 'secondaryType': 'typeDeTir', 'goals': 'scoreAfterShot'})
    df_inbetween = df_inbetween.drop('team', axis=1)
    df_inbetween['emptyNet'] = pd.Series(np.where(df_inbetween['guardian/ID'] ==("Empty", 0), True, False))
    df_inbetween = df_inbetween.rename(columns={"Unnamed: 0": 'matchNum'})
    df_inbetween = pd.concat([df_inbetween, df_inbetween['coordinates'].apply(lambda x: pd.Series(x, dtype=np.float64))], axis=1)

    #drop some columns columns
    df = df_inbetween[['dateTime', 'period', 'periodTime', 'eventTypeId', 'typeDeTir', 'description', 'x', 'y', 'previousEventTypeId', 'previousCoordinates', 'previousPeriodTime', 'previousDateTime']]
    # Coordonnées des camps gauche et droit
    coord_camp_gauche = (-90, 0)
    coord_camp_droit = (90, 0)

    df['distanceToNet'] = np.sqrt(np.minimum((df['x'] - coord_camp_gauche[0])**2 + (df['y'] - coord_camp_gauche[1])**2, (df['x'] - coord_camp_droit[0])**2 + (df['y'] - coord_camp_droit[1])**2))

    # Calculer l'angle relatif du joueur par rapport au filet (filet gauche)
    df['relativeAngleToNet'] = np.degrees(np.arctan2(df['y'], df['x'] - coord_camp_gauche[0]))

    df = pd.concat([df, df['previousCoordinates'].apply(lambda x: pd.Series(x, dtype=np.float64))], axis=1)
    #rename previous X and Y
    df.columns = np.concatenate([np.array(df.columns[:-2]), ['previousX', 'previousY']])

    #drop NaN rows
    df = df.dropna(subset=['x', 'y'])
    #drop obsolete previousCoordinates columns
    df = df.drop('previousCoordinates', axis=1)
    #calculate distance from previous event
    df['distanceFromPrevious'] = round(np.sqrt((df['x']-df['previousX'])**2+(df['y']-df['previousY'])**2), 2)
    #calculate timeDiff
    time_format = "%M:%S"
    df['periodTimeInSeconds'] = pd.to_datetime(df['periodTime'], format=time_format).dt.minute * 60 + pd.to_datetime(df['periodTime'], format=time_format).dt.second
    df['timeDiff'] = (pd.to_datetime(df['periodTime'], format=time_format) - pd.to_datetime(df['previousPeriodTime'], format=time_format)).dt.total_seconds().astype(int)
    #cree rebond
    df['rebond'] = df['previousEventTypeId'] == 'SHOT'
    #cree vitess
    df['vitesse'] = df['distanceFromPrevious']/df['timeDiff']
    # Calculer l'angle relatif du joueur par rapport au filet (filet gauche)
    df['previousRelativeAngleToNet'] = np.degrees(np.arctan2(df['previousY'], df['previousX'] - coord_camp_gauche[0]))
    #cree angleChange
    df['angleChange'] = np.abs(df['relativeAngleToNet']-df['previousRelativeAngleToNet'])
    df.loc[df['rebond'] == False, 'angleChange'] = 0.0
    #cr
    df['isGoal'] = (df['eventTypeId'] == "GOAL")*1

    #cree le df final
    final = df[['period', 'periodTimeInSeconds', 'isGoal', 'typeDeTir', 'x', 'y', 'distanceToNet', 'relativeAngleToNet', 'previousEventTypeId', 'previousX', 'previousY', 'distanceFromPrevious', 'timeDiff', 'rebond', 'angleChange','vitesse']]

    final['season'] = int(json_path[-9:-5])

    final.to_csv(json_path[:-5]+"_previous.csv", index=False)

In [None]:
#iterate over all nhl_data files and make a clean df
p = ["nhl_data_2016.json", "nhl_data_2017.json", "nhl_data_2018.json", "nhl_data_2019.json", "nhl_data_2020.json"]
for file_path in p:
    clean_with_PreviousEvent(file_path)

In [None]:
#merge all the clean df into one
p_previous = ["nhl_data_2016_previous.csv","nhl_data_2017_previous.csv", "nhl_data_2018_previous.csv", "nhl_data_2019_previous.csv"]
df_list = []
count = 0
for path in p_previous:
    df = pd.read_csv(path)
    df['Unnamed: 0'] = df['Unnamed: 0']+count
    count = df['Unnamed: 0'].values[-1]
    df_list.append(df)
df = pd.concat(df_list, axis=0, ignore_index=True)
df.to_csv('nhl_2016to2019_previous.csv', index=False)

### Enregistrement des données filtrées pour un jeu spécifique

In [None]:
from comet_ml import Experiment
import os

In [None]:
clean_with_PreviousEvent('/content/drive/MyDrive/NHL2/nhl_data_2017.json')
WWdf = final.loc[1064]

In [None]:
experience = Experiment(
    api_key = os.environ.get(comet_api_key),
    project_name = 'milestone2',
    workspace = 'ift6758-a02',
)
experience.set_name("Enregistrement des données spécifique due match avec ID=2017021065")


experience.log_dataframe_profile(
    dataframe = WWdf,
    name = 'wpg_v_wsh_2017021065', 
    dataframe_format = 'csv'
)

experience.end()