In [1]:
import pandas as pd
import numpy as np
import requests, copy
from pprint import pprint
from bs4 import BeautifulSoup
from bball_ref_scraper import BBRefScraper

### Create Initial DataFrame from scraping http://www.prosportstransactions.com/

In [2]:
df_json = pd.read_json("../data/pst_nba_injuries_2020_2021.txt")
df = df_json.copy()
df.head(5)

Unnamed: 0,Date,Team,Healed,Injured,Notes
0,2020-03-11,76ers,Joel Embiid,,activated from IL
1,2020-03-11,Bucks,Eric Bledsoe,,activated from IL
2,2020-03-11,Bucks,George Hill,,activated from IL
3,2020-03-11,Bulls,,Chandler Hutchison,surgery on right shoulder (out for season)
4,2020-03-11,Heat,,Jimmy Butler,placed on IL with left big toe injury


### Reformat the DataFrame

In [3]:

df['Player'] = df['Healed'] + df['Injured']
df['Status'] = np.where(df['Healed'] == '', 'Injured', 'Healed')
df = df[['Date', 'Team', 'Player', 'Status', 'Notes']]
df.head(10)

Unnamed: 0,Date,Team,Player,Status,Notes
0,2020-03-11,76ers,Joel Embiid,Healed,activated from IL
1,2020-03-11,Bucks,Eric Bledsoe,Healed,activated from IL
2,2020-03-11,Bucks,George Hill,Healed,activated from IL
3,2020-03-11,Bulls,Chandler Hutchison,Injured,surgery on right shoulder (out for season)
4,2020-03-11,Heat,Jimmy Butler,Injured,placed on IL with left big toe injury
5,2020-03-11,Heat,Jae Crowder,Healed,returned to lineup
6,2020-03-11,Heat,Tyler Herro,Healed,activated from IL
7,2020-03-11,Hornets,Terry Rozier,Injured,placed on IL with illness
8,2020-03-11,Jazz,Emmanuel Mudiay,Injured,illness (DTD)
9,2020-03-11,Jazz,Rudy Gobert,Injured,COVID-19 coronavirus (out indefinitely)


### Extract players out for COVID or "Health and Safety Protocols"

In [4]:
df_cv19 = df[np.logical_or(df['Notes'].str.contains('COVID'), df['Notes'].str.contains('protocol'))]
df_cv19

Unnamed: 0,Date,Team,Player,Status,Notes
9,2020-03-11,Jazz,Rudy Gobert,Injured,COVID-19 coronavirus (out indefinitely)
19,2020-03-12,Jazz,Donovan Mitchell,Injured,COVID-19 coronavirus (DTD)
21,2020-03-14,Pistons,Christian Wood,Injured,COVID-19 coronavirus (DTD)
22,2020-03-17,Nets,Kevin Durant,Injured,NBA health and safety protocols (DTD)
24,2020-03-19,Celtics,Marcus Smart,Injured,COVID-19 coronavirus (out indefinitely)
...,...,...,...,...,...
2846,2021-05-09,Pistons,Hamidou Diallo,Injured,placed on IL with NBA health and safety protocols
2957,2021-05-14,Thunder,Josh Hall,Injured,placed on IL with NBA health and safety protoc...
2969,2021-05-15,Heat,Omer Yurtseven,Injured,placed on IL with NBA health and safety protocols
3031,2021-05-16,Spurs,DaQuan Jeffries,Injured,placed on IL with NBA health and safety protoc...


240 instances of players being held out due to COVID protocols since 03/11/2020 (The day Rudy Gobert tested positive and the league shut down)

### Just COVID data for 2020-2021 NBA Season

In [5]:
df_cv19_2021 = df_cv19[df_cv19['Date'] >= '12-01-2020']
df_cv19_2021

Unnamed: 0,Date,Team,Player,Status,Notes
415,2020-12-22,Blazers,Nassir Little,Injured,COVID-19 (DTD)
423,2020-12-22,Hawks,Rajon Rondo,Injured,placed on IL with NBA health and safety protocols
468,2020-12-23,Rockets,Kenyon Martin Jr. / K.J. Martin,Injured,placed on IL with NBA health and safety protocols
472,2020-12-25,Bucks,Mamadi Diakite,Injured,placed on IL with NBA health and safety protoc...
475,2020-12-25,Rockets,Ben McLemore,Injured,COVID-19 protocols
...,...,...,...,...,...
2846,2021-05-09,Pistons,Hamidou Diallo,Injured,placed on IL with NBA health and safety protocols
2957,2021-05-14,Thunder,Josh Hall,Injured,placed on IL with NBA health and safety protoc...
2969,2021-05-15,Heat,Omer Yurtseven,Injured,placed on IL with NBA health and safety protocols
3031,2021-05-16,Spurs,DaQuan Jeffries,Injured,placed on IL with NBA health and safety protoc...


230 instances of players being held out due to COVID protocols during 2020-2021 NBA season

### Unique Players

In [6]:
players = df_cv19_2021['Player'].unique()
len(players)

182

182 Unique Players were held out due to COVID protocols during 2020-2021 NBA season

### Generate basketball-reference.com IDs

In [8]:
from bball_ref_id_generator import id_generator


In [14]:
bbref_id_list = []
for player in players:
    bbref_id = id_generator(player)
    if isinstance(bbref_id, tuple):
        print('Something messed up')
    else:
        bbref_id_list.append([player, bbref_id])



  soup = BeautifulSoup(r.content, 'html')


  soup = BeautifulSoup(r.content, 'html')


In [17]:
df_bbref_id = pd.DataFrame(bbref_id_list, columns=['Player', 'BBRef_ID'])
df_bbref_id

Unnamed: 0,Player,BBRef_ID
0,Nassir Little,littlna01
1,Rajon Rondo,rondora01
2,Kenyon Martin Jr. / K.J. Martin,martike01
3,Mamadi Diakite,diakima01
4,Ben McLemore,mclembe01
...,...,...
177,Luca Vildoza,vildolu01
178,Hamidou Diallo,diallha01
179,Omer Yurtseven,yurtsom01
180,DaQuan Jeffries,jeffrda01


### Merging BBRef IDs with DataFrame

In [18]:
df_main = pd.merge(left=df_cv19_2021, right=df_bbref_id, how='left', left_on=df_cv19_2021.Player, right_on=df_bbref_id.Player).drop("key_0", axis=1).drop('Player_y', axis=1)
df_main.rename(columns={'Player_x':'Player'}, inplace=True)
df_main

Unnamed: 0,Date,Team,Player,Status,Notes,BBRef_ID
0,2020-12-22,Blazers,Nassir Little,Injured,COVID-19 (DTD),littlna01
1,2020-12-22,Hawks,Rajon Rondo,Injured,placed on IL with NBA health and safety protocols,rondora01
2,2020-12-23,Rockets,Kenyon Martin Jr. / K.J. Martin,Injured,placed on IL with NBA health and safety protocols,martike01
3,2020-12-25,Bucks,Mamadi Diakite,Injured,placed on IL with NBA health and safety protoc...,diakima01
4,2020-12-25,Rockets,Ben McLemore,Injured,COVID-19 protocols,mclembe01
...,...,...,...,...,...,...
225,2021-05-09,Pistons,Hamidou Diallo,Injured,placed on IL with NBA health and safety protocols,diallha01
226,2021-05-14,Thunder,Josh Hall,Injured,placed on IL with NBA health and safety protoc...,halljo01
227,2021-05-15,Heat,Omer Yurtseven,Injured,placed on IL with NBA health and safety protocols,yurtsom01
228,2021-05-16,Spurs,DaQuan Jeffries,Injured,placed on IL with NBA health and safety protoc...,jeffrda01


### Scrape 

In [19]:
for bbr_id in bbref_id_list:
    

182