# Fetch data 

In [68]:
import requests
import json 
import pandas as pd
import numpy as np
def read_key(dir):
    f = open(dir,"r")
    return f.readline()[:-1]

def fetch_Data(url , key=read_key('../.key_subscription')):
    
    headers = {"Ocp-Apim-Subscription-Key": key,
            'Content-Type': 'application/json'}
    res = requests.get(url, headers=headers)


    response = json.loads(res.text)
    return response

## News_by_date

In [69]:
url = "https://api.sportsdata.io/v3/nhl/scores/json/NewsByDate/2023-11-01"
data_2 = fetch_Data(url)
data_2

[{'NewsID': 27140,
  'PlayerID': 30003147,
  'TeamID': 1,
  'Team': 'BOS',
  'Title': 'Charlie McAvoy Suspended Four Games',
  'Content': "Boston Bruins defenseman Charlie McAvoy has received a four-game suspension for an illegal check to the head of Florida Panthers defenseman Oliver Ekman-Larsson. The incident resulted in McAvoy getting a match penalty in Monday's game, and he'll now have plenty of time to think about his actions. McAvoy scored in a second consecutive game Monday and has been on fire with eight points in nine games. The Bruins also lost Matt Grzelcyk (upper body) to injury in Monday's game, meaning the entire first defensive pairing got wiped out in one night. Hampus Lindholm and Brandon Carlo should receive a nice boost in playing time now, while the rest of the blue line will need to put in extra shifts as well. McAvoy will be eligible to return to action on Nov. 11 against Montreal.",
  'Url': 'https://www.rotoballer.com/player-news/charlie-mcavoy-suspended-four-g

In [3]:
df_2 = pd.DataFrame(data_2)
df_2.columns

Index(['NewsID', 'PlayerID', 'TeamID', 'Team', 'Title', 'Content', 'Url',
       'Source', 'TermsOfUse', 'Updated'],
      dtype='object')

In [4]:
df_2["Updated"].value_counts(dropna=False
                             )

2023-11-01T14:41:02    1
Name: Updated, dtype: int64

## Make_dataset

In [39]:
from datetime import datetime , timedelta
now = datetime.now() 
now.strftime("%Y/%m/%d") , now

('2023/11/16', datetime.datetime(2023, 11, 16, 1, 49, 43, 72363))

In [45]:
df_Game = []
df_news = []
days = 30
for i in range(1,days):
    date_Game = (now - timedelta(days=i)).strftime("%Y-%m-%d")
    date_News = (now - timedelta(days=i+1)).strftime("%Y-%m-%d")
    url = f"https://api.sportsdata.io/v3/nhl/scores/json/GamesByDate/{date_Game}"
    data_Game = fetch_Data(url)
    url = f"https://api.sportsdata.io/v3/nhl/scores/json/NewsByDate/{date_News}"
    data_News = fetch_Data(url)
    # if len(data_Game) and len(data_News): # if there is no game , we dont want news and if there is no news we dont want game stat !
    df_Game.append(pd.DataFrame(data_Game)[["GameID","DateTime","IsClosed","AwayTeamID","HomeTeamID","AwayTeamScore","HomeTeamScore"]])
    for dic in data_News : # add the date to the data
        dic["DateTime"] = datetime.strptime(date_News, '%Y-%m-%d')
    df_news.append(pd.DataFrame(data_News)[["TeamID" , "NewsID" , "Content" , "DateTime" ,"Url","Source"]])

df_Game = pd.concat(df_Game,ignore_index=True)
df_news = pd.concat(df_news,ignore_index=True)


df_Game["DateTime"] = df_Game["DateTime"].apply(lambda x : datetime.strptime(x.split("T")[0], '%Y-%m-%d'))

In [46]:
df_news.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   TeamID    28 non-null     object        
 1   NewsID    29 non-null     int64         
 2   Content   29 non-null     object        
 3   DateTime  29 non-null     datetime64[ns]
 4   Url       29 non-null     object        
 5   Source    29 non-null     object        
dtypes: datetime64[ns](1), int64(1), object(4)
memory usage: 1.5+ KB


In [47]:
df_Game.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   GameID         193 non-null    int64         
 1   DateTime       193 non-null    datetime64[ns]
 2   IsClosed       193 non-null    bool          
 3   AwayTeamID     193 non-null    int64         
 4   HomeTeamID     193 non-null    int64         
 5   AwayTeamScore  189 non-null    object        
 6   HomeTeamScore  189 non-null    object        
dtypes: bool(1), datetime64[ns](1), int64(3), object(2)
memory usage: 9.4+ KB


map away team `game` to away team `news` and home team `game` to home team `news`

In [48]:
Away_df = pd.merge(df_Game , df_news ,how="inner" , left_on="AwayTeamID" , right_on="TeamID")
Home_df = pd.merge(df_Game , df_news ,how="inner" , left_on="HomeTeamID" , right_on="TeamID")

Determine an `Interval`

In [49]:
pivot_0 = pd.Timedelta(days=0)
pivot_1 = pd.Timedelta(days=7)

In [50]:
mask_Away = (pivot_0 < (Away_df["DateTime_x"] - Away_df["DateTime_y"])) & ((Away_df["DateTime_x"] - Away_df["DateTime_y"]) < pivot_1)
mask_home = (pivot_0 < (Home_df["DateTime_x"] - Home_df["DateTime_y"])) & ((Home_df["DateTime_x"] - Home_df["DateTime_y"]) < pivot_1)

Make DataFrame

In [51]:
final_Away_df = Away_df[mask_Away].sort_values(by="DateTime_x",ascending=False).reset_index()

In [52]:
final_Home_df = Home_df[mask_home].sort_values(by="DateTime_x",ascending=False).reset_index()

Map *News* to `Nearest` *game* 

In [53]:
final_Away_df["DiffrenceInDay"] = final_Away_df["DateTime_x"] - final_Away_df["DateTime_y"]
final_Home_df["DiffrenceInDay"] = final_Home_df["DateTime_x"] - final_Home_df["DateTime_y"]
dup_Away = final_Away_df.sort_values(by="DiffrenceInDay")["NewsID"].duplicated(keep="first")
dup_Home = final_Home_df.sort_values(by="DiffrenceInDay")["NewsID"].duplicated(keep="first")
final_Away_df = final_Away_df[dup_Away.apply(lambda x : not x)]
final_Home_df = final_Home_df[dup_Home.apply(lambda x : not x)]

  final_Away_df = final_Away_df[dup_Away.apply(lambda x : not x)]
  final_Home_df = final_Home_df[dup_Home.apply(lambda x : not x)]


In [54]:
final_Away_df

Unnamed: 0,index,GameID,DateTime_x,IsClosed,AwayTeamID,HomeTeamID,AwayTeamScore,HomeTeamScore,TeamID,NewsID,Content,DateTime_y,Url,Source,DiffrenceInDay
0,72,20858,2023-11-14,True,8,27,1,0,8,27337,Florida Panthers center/right wing Sam Reinhar...,2023-11-13,https://www.rotoballer.com/player-news/sam-rei...,RotoBaller,1 days
2,53,20855,2023-11-14,True,6,20,0,1,6,27255,Tampa Bay Lightning goaltender Matt Tomkins co...,2023-11-08,https://www.rotoballer.com/player-news/matt-to...,RotoBaller,6 days
4,48,20839,2023-11-11,True,29,21,11,8,29,27290,Arizona Coyotes forward Jason Zucker (upper bo...,2023-11-10,https://www.rotoballer.com/player-news/jason-z...,RotoBaller,1 days
5,23,20827,2023-11-10,True,24,7,1,1,24,27224,Calgary Flames winger Andrew Mangiapane has be...,2023-11-06,https://www.rotoballer.com/player-news/andrew-...,RotoBaller,4 days
9,62,20795,2023-11-05,True,10,17,6,3,10,27196,New Jersey Devils forward Jack Hughes sustaine...,2023-11-04,https://www.rotoballer.com/player-news/jack-hu...,RotoBaller,1 days
10,29,20794,2023-11-04,True,24,36,1,1,24,27102,Calgary Flames center Adam Ruzicka (shoulder) ...,2023-10-29,https://www.rotoballer.com/player-news/adam-ru...,RotoBaller,6 days
12,28,20794,2023-11-04,True,24,36,1,1,24,27108,Calgary Flames defenseman MacKenzie Weegar tal...,2023-10-30,https://www.rotoballer.com/player-news/mackenz...,RotoBaller,5 days
13,144,20790,2023-11-04,True,12,23,1,1,12,27173,New York Rangers defenseman Adam Fox left Thur...,2023-11-03,https://www.rotoballer.com/player-news/adam-fo...,RotoBaller,1 days
14,10,20782,2023-11-04,True,1,3,1,1,1,27140,Boston Bruins defenseman Charlie McAvoy has re...,2023-11-01,https://www.rotoballer.com/player-news/charlie...,RotoBaller,3 days
15,4,20778,2023-11-03,True,13,2,8,2,13,27159,Philadelphia Flyers goaltender Carter Hart had...,2023-11-02,https://www.rotoballer.com/player-news/carter-...,RotoBaller,1 days


In [55]:
final_Home_df

Unnamed: 0,index,GameID,DateTime_x,IsClosed,AwayTeamID,HomeTeamID,AwayTeamScore,HomeTeamScore,TeamID,NewsID,Content,DateTime_y,Url,Source,DiffrenceInDay
0,4,20860,2023-11-15,False,36,25,,,25,27356,Edmonton Oilers center/left wing Leon Draisait...,2023-11-14,https://www.rotoballer.com/player-news/leon-dr...,RotoBaller,1 days
4,124,20826,2023-11-10,True,15,10,1.0,0.0,10,27196,New Jersey Devils forward Jack Hughes sustaine...,2023-11-04,https://www.rotoballer.com/player-news/jack-hu...,RotoBaller,6 days
7,94,20818,2023-11-09,True,17,6,6.0,4.0,6,27255,Tampa Bay Lightning goaltender Matt Tomkins co...,2023-11-08,https://www.rotoballer.com/player-news/matt-to...,RotoBaller,1 days
10,110,20811,2023-11-08,True,5,7,1.0,0.0,7,27239,Toronto Maple Leafs center Auston Matthews con...,2023-11-07,https://www.rotoballer.com/player-news/auston-...,RotoBaller,1 days
11,83,20804,2023-11-07,True,3,12,4.0,7.0,12,27173,New York Rangers defenseman Adam Fox left Thur...,2023-11-03,https://www.rotoballer.com/player-news/adam-fo...,RotoBaller,4 days
12,137,20806,2023-11-07,True,21,24,3.0,5.0,24,27224,Calgary Flames winger Andrew Mangiapane has be...,2023-11-06,https://www.rotoballer.com/player-news/andrew-...,RotoBaller,1 days
13,115,20797,2023-11-06,True,6,7,1.0,1.0,7,27211,Toronto Maple Leafs center Auston Matthews pot...,2023-11-05,https://www.rotoballer.com/player-news/auston-...,RotoBaller,1 days
14,72,20798,2023-11-06,True,16,8,1.0,1.0,8,27126,Florida Panthers center Sam Bennett was limite...,2023-10-31,https://www.rotoballer.com/player-news/sam-ben...,RotoBaller,6 days
15,159,20784,2023-11-04,True,26,13,1.0,0.0,13,27159,Philadelphia Flyers goaltender Carter Hart had...,2023-11-02,https://www.rotoballer.com/player-news/carter-...,RotoBaller,2 days
17,133,20766,2023-11-02,True,7,1,1.0,1.0,1,27140,Boston Bruins defenseman Charlie McAvoy has re...,2023-11-01,https://www.rotoballer.com/player-news/charlie...,RotoBaller,1 days


## Save data

In [56]:
final_Away_df.to_csv("./Away_data.csv",index=False)
final_Home_df.to_csv("./Home_data.csv",index=False)

In [59]:
final_Away_df["NewsID"].duplicated()

0     False
2     False
4     False
5     False
9     False
10    False
12    False
13    False
14    False
15    False
16    False
18    False
19    False
20    False
21    False
23    False
25    False
26    False
27    False
28    False
Name: NewsID, dtype: bool