In [2]:
# Import libraries
import pandas as pd
import numpy as np
import gzip as gz
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from zipfile import ZipFile

In [3]:
# Load and explore dataset
nba = 'NBAInjuries.zip'
with ZipFile(nba, 'r') as zip:
    zip.extractall('nba_unzipped')
    
df = pd.read_csv('nba_unzipped/injuries_2010-2020.csv')
df.head()

Unnamed: 0,Date,Team,Acquired,Relinquished,Notes
0,2010-10-03,Bulls,,Carlos Boozer,fractured bone in right pinky finger (out inde...
1,2010-10-06,Pistons,,Jonas Jerebko,torn right Achilles tendon (out indefinitely)
2,2010-10-06,Pistons,,Terrico White,broken fifth metatarsal in right foot (out ind...
3,2010-10-08,Blazers,,Jeff Ayres,torn ACL in right knee (out indefinitely)
4,2010-10-08,Nets,,Troy Murphy,strained lower back (out indefinitely)


In [4]:
# Drop Acquired column, null values, and rename Relinquished to Name
df.drop(['Acquired'], axis=1, inplace=True)
df.dropna(inplace=True)
df.rename(columns={'Relinquished':'Name'}, inplace=True)

In [5]:
pd.set_option('display.max_colwidth', None)
df

Unnamed: 0,Date,Team,Name,Notes
0,2010-10-03,Bulls,Carlos Boozer,fractured bone in right pinky finger (out indefinitely)
1,2010-10-06,Pistons,Jonas Jerebko,torn right Achilles tendon (out indefinitely)
2,2010-10-06,Pistons,Terrico White,broken fifth metatarsal in right foot (out indefinitely)
3,2010-10-08,Blazers,Jeff Ayres,torn ACL in right knee (out indefinitely)
4,2010-10-08,Nets,Troy Murphy,strained lower back (out indefinitely)
...,...,...,...,...
27097,2020-09-22,Celtics,Romeo Langford,surgery on right wrist (out for season)
27098,2020-09-23,Heat,Gabe Vincent,sore right knee (DTD)
27099,2020-09-30,Heat,Bam Adebayo,strained left shoulder (DTD)
27101,2020-10-02,Heat,Bam Adebayo,strained neck (DTD)


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17558 entries, 0 to 27102
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    17558 non-null  object
 1   Team    17558 non-null  object
 2   Name    17558 non-null  object
 3   Notes   17558 non-null  object
dtypes: object(4)
memory usage: 685.9+ KB


In [7]:
s1 = df.Notes

In [8]:
s1

0         fractured bone in right pinky finger (out indefinitely)
1                   torn right Achilles tendon (out indefinitely)
2        broken fifth metatarsal in right foot (out indefinitely)
3                       torn ACL in right knee (out indefinitely)
4                          strained lower back (out indefinitely)
                                   ...                           
27097                     surgery on right wrist (out for season)
27098                                       sore right knee (DTD)
27099                                strained left shoulder (DTD)
27101                                         strained neck (DTD)
27102          placed on IL with torn plantar fascia in left foot
Name: Notes, Length: 17558, dtype: object

In [9]:
ACL = s1.str.contains('acl', case=False)
ACL.value_counts()

False    17414
True       144
Name: Notes, dtype: int64

In [10]:
Achilles = s1.str.contains('achilles', case=False)
Achilles.value_counts()

False    17295
True       263
Name: Notes, dtype: int64

In [11]:
patella_tendon = s1.str.contains('patella tendon', case=False)
patella_tendon.value_counts()

False    17533
True        25
Name: Notes, dtype: int64

In [12]:
out_for_season = s1.str.contains('out for season', case=False)
out_for_season.value_counts()

False    16433
True      1125
Name: Notes, dtype: int64

In [13]:
df = df.assign(ACL=pd.Series(ACL).values)
df = df.assign(Achilles=pd.Series(Achilles).values)
df = df.assign(out_for_season=pd.Series(out_for_season).values)
df = df*1
df

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
0,2010-10-03,Bulls,Carlos Boozer,fractured bone in right pinky finger (out indefinitely),0,0,0
1,2010-10-06,Pistons,Jonas Jerebko,torn right Achilles tendon (out indefinitely),0,1,0
2,2010-10-06,Pistons,Terrico White,broken fifth metatarsal in right foot (out indefinitely),0,0,0
3,2010-10-08,Blazers,Jeff Ayres,torn ACL in right knee (out indefinitely),1,0,0
4,2010-10-08,Nets,Troy Murphy,strained lower back (out indefinitely),0,0,0
...,...,...,...,...,...,...,...
27097,2020-09-22,Celtics,Romeo Langford,surgery on right wrist (out for season),0,0,1
27098,2020-09-23,Heat,Gabe Vincent,sore right knee (DTD),0,0,0
27099,2020-09-30,Heat,Bam Adebayo,strained left shoulder (DTD),0,0,0
27101,2020-10-02,Heat,Bam Adebayo,strained neck (DTD),0,0,0


In [14]:
# Dataset of injuries that resulted in player being 'out for season'
ofs = df[(df['Notes'].str.contains('out for season', case=False))]
ofs.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1125 entries, 8 to 27097
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Date            1125 non-null   object
 1   Team            1125 non-null   object
 2   Name            1125 non-null   object
 3   Notes           1125 non-null   object
 4   ACL             1125 non-null   int64 
 5   Achilles        1125 non-null   int64 
 6   out_for_season  1125 non-null   int64 
dtypes: int64(3), object(4)
memory usage: 70.3+ KB


In [15]:
ofs

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
8,2010-10-12,Bucks,Darington Hobson,surgery on left hip (out for season),0,0,1
17,2010-10-26,Blazers,Greg Oden,placed on IL with left knee injury (out for season),0,0,1
75,2010-10-27,Pistons,Jonas Jerebko,placed on IL recovering from surgery to repair torn right Achilles tendon (out for season),0,1,1
76,2010-10-27,Pistons,Terrico White,placed on IL recovering from surgery on right foot to repair broken fifth metatarsal (out for season),0,0,1
175,2010-11-04,Blazers,Elliot Williams,"dislocated patella in right knee (surgery, out for season)",0,0,1
...,...,...,...,...,...,...,...
27077,2020-08-29,Blazers,Wenyen Gabriel,right quadricep injury (out for season),0,0,1
27080,2020-08-30,Mavericks,Courtney Lee,placed on IL with left calf injury (out for season),0,0,1
27095,2020-09-12,Rockets,Tyson Chandler,placed on IL with neck spasms (out for season),0,0,1
27096,2020-09-18,Celtics,Romeo Langford,placed on IL with strained right adductor (out for season),0,0,1


In [16]:
ofs_10 = ofs[(ofs['Date'] < '2011-07-01')]
ofs_11 = ofs[(ofs['Date'].between('2011-07-01','2012-07-01'))]
ofs_12 = ofs[(ofs['Date'].between('2012-07-01','2013-07-01'))]
ofs_13 = ofs[(ofs['Date'].between('2013-07-01','2014-07-01'))]
ofs_14 = ofs[(ofs['Date'].between('2014-07-01','2015-07-01'))]
ofs_15 = ofs[(ofs['Date'].between('2015-07-01','2016-07-01'))]
ofs_16 = ofs[(ofs['Date'].between('2016-07-01','2017-07-01'))]
ofs_17 = ofs[(ofs['Date'].between('2017-07-01','2018-07-01'))]
ofs_18 = ofs[(ofs['Date'].between('2018-07-01','2019-07-01'))]

In [17]:
ofs_10

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
8,2010-10-12,Bucks,Darington Hobson,surgery on left hip (out for season),0,0,1
17,2010-10-26,Blazers,Greg Oden,placed on IL with left knee injury (out for season),0,0,1
75,2010-10-27,Pistons,Jonas Jerebko,placed on IL recovering from surgery to repair torn right Achilles tendon (out for season),0,1,1
76,2010-10-27,Pistons,Terrico White,placed on IL recovering from surgery on right foot to repair broken fifth metatarsal (out for season),0,0,1
175,2010-11-04,Blazers,Elliot Williams,"dislocated patella in right knee (surgery, out for season)",0,0,1
...,...,...,...,...,...,...,...
2599,2011-05-11,Celtics,Shaquille O'Neal,placed on IL (out for season),0,0,1
2605,2011-05-25,Bulls,Omer Asik,stress fracture in left fibula (out for season),0,0,1
2607,2011-05-26,Bulls,Omer Asik,placed on IL with stress fracture in left fibula (out for season),0,0,1
2617,2011-06-09,Mavericks,Brendan Haywood,placed on IL with strained right hip flexor (out for season),0,0,1


In [18]:
# Function to drop duplicates
def DropDuplicates(dataset):
    return dataset.drop_duplicates(subset=['Name'], inplace=True)

In [19]:
# Test function
DropDuplicates(ofs_10)
ofs_10

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return dataset.drop_duplicates(subset=['Name'], inplace=True)


Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
8,2010-10-12,Bucks,Darington Hobson,surgery on left hip (out for season),0,0,1
17,2010-10-26,Blazers,Greg Oden,placed on IL with left knee injury (out for season),0,0,1
75,2010-10-27,Pistons,Jonas Jerebko,placed on IL recovering from surgery to repair torn right Achilles tendon (out for season),0,1,1
76,2010-10-27,Pistons,Terrico White,placed on IL recovering from surgery on right foot to repair broken fifth metatarsal (out for season),0,0,1
175,2010-11-04,Blazers,Elliot Williams,"dislocated patella in right knee (surgery, out for season)",0,0,1
...,...,...,...,...,...,...,...
2589,2011-05-02,Hawks,Kirk Hinrich,placed on IL with strained right hamstring (out for season),0,0,1
2599,2011-05-11,Celtics,Shaquille O'Neal,placed on IL (out for season),0,0,1
2605,2011-05-25,Bulls,Omer Asik,stress fracture in left fibula (out for season),0,0,1
2617,2011-06-09,Mavericks,Brendan Haywood,placed on IL with strained right hip flexor (out for season),0,0,1


In [20]:
# Make list of season datasets
list_ofs = [ofs_10, ofs_11, ofs_12, ofs_13, ofs_14, ofs_15,
            ofs_16, ofs_17, ofs_18]

In [21]:
# Apply function
[DropDuplicates(season) for season in list_ofs]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return dataset.drop_duplicates(subset=['Name'], inplace=True)


[None, None, None, None, None, None, None, None, None]

Import NBA player stats

In [122]:
# Import player stats
p09 = pd.read_excel('player_stats/2009-2010.xlsx')
p10 = pd.read_excel('player_stats/2010-2011.xlsx')
p11 = pd.read_excel('player_stats/2011-2012.xlsx')
p12 = pd.read_excel('player_stats/2012-2013.xlsx')
p13 = pd.read_excel('player_stats/2013-2014.xlsx')
p14 = pd.read_excel('player_stats/2014-2015.xlsx')
p15 = pd.read_excel('player_stats/2015-2016.xlsx')
p16 = pd.read_excel('player_stats/2016-2017.xlsx')
p17 = pd.read_excel('player_stats/2017-2018.xlsx')

In [123]:
# Merge stats dataset with injury dataset
s09 = pd.merge(p09,ofs_10, how='left', on='Name')
s10 = pd.merge(p10,ofs_11, how='left', on='Name')
s11 = pd.merge(p11,ofs_12, how='left', on='Name')
s12 = pd.merge(p12,ofs_13, how='left', on='Name')
s13 = pd.merge(p13,ofs_14, how='left', on='Name')
s14 = pd.merge(p14,ofs_14, how='left', on='Name')
s15 = pd.merge(p15,ofs_14, how='left', on='Name')
s16 = pd.merge(p16,ofs_14, how='left', on='Name')
s17 = pd.merge(p17,ofs_14, how='left', on='Name')

In [124]:
# Check number of entries
s15.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 476 entries, 0 to 475
Data columns (total 63 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Name            476 non-null    object 
 1   TEAM            476 non-null    object 
 2   AGE             476 non-null    int64  
 3   HEIGHT          476 non-null    object 
 4   WEIGHT          476 non-null    int64  
 5   COLLEGE         476 non-null    object 
 6   COUNTRY         476 non-null    object 
 7   DRAFT YEAR      476 non-null    object 
 8   DRAFT ROUND     476 non-null    object 
 9   DRAFT NUMBER    476 non-null    object 
 10  GP              476 non-null    int64  
 11  NETRTG          476 non-null    float64
 12  TS%             476 non-null    float64
 13  W               476 non-null    int64  
 14  L               476 non-null    int64  
 15  MIN             476 non-null    float64
 16  PTS             476 non-null    float64
 17  FGM             476 non-null    flo

In [126]:
df_con = pd.concat([s09,s10,s11,s12,s13,s14,s15,s16,s17], axis=0)

In [127]:
df_con.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4317 entries, 0 to 539
Data columns (total 63 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Name            4317 non-null   object 
 1   TEAM            4317 non-null   object 
 2   AGE             4317 non-null   int64  
 3   HEIGHT          4317 non-null   object 
 4   WEIGHT          4317 non-null   int64  
 5   COLLEGE         4317 non-null   object 
 6   COUNTRY         4317 non-null   object 
 7   DRAFT YEAR      4317 non-null   object 
 8   DRAFT ROUND     4317 non-null   object 
 9   DRAFT NUMBER    4317 non-null   object 
 10  GP              4317 non-null   int64  
 11  NETRTG          4317 non-null   float64
 12  TS%             4317 non-null   float64
 13  W               4317 non-null   int64  
 14  L               4317 non-null   int64  
 15  MIN             4317 non-null   float64
 16  PTS             4317 non-null   float64
 17  FGM             4317 non-null   fl

In [26]:
injury_df = df[(df['Notes'].str.contains('tear|torn|rupture', case=False))]
injury_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 642 entries, 1 to 27102
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Date            642 non-null    object
 1   Team            642 non-null    object
 2   Name            642 non-null    object
 3   Notes           642 non-null    object
 4   ACL             642 non-null    int64 
 5   Achilles        642 non-null    int64 
 6   out_for_season  642 non-null    int64 
dtypes: int64(3), object(4)
memory usage: 40.1+ KB


In [27]:
injury_df.drop_duplicates(subset=['Name'], inplace=True)
injury_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  injury_df.drop_duplicates(subset=['Name'], inplace=True)


Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
1,2010-10-06,Pistons,Jonas Jerebko,torn right Achilles tendon (out indefinitely),0,1,0
3,2010-10-08,Blazers,Jeff Ayres,torn ACL in right knee (out indefinitely),1,0,0
7,2010-10-09,Nuggets,Al Harrington,partially torn plantar fascia in left foot (out indefinitely),0,0,0
18,2010-10-26,Blazers,Joel Przybilla,placed on IL placed on IL recovering from surgery on right knee to repair ruptured patella tendon,0,0,0
20,2010-10-26,Celtics,Kendrick Perkins,placed on IL recovering from surgery on right knee to repair torn ACL,1,0,0
...,...,...,...,...,...,...,...
26496,2020-02-24,Pacers,Jeremy Lamb,torn ACL in left knee (out for season),1,0,1
26832,2020-08-02,Magic,Jonathan Isaac,torn ACL in left knee (out for season),1,0,1
26846,2020-08-04,Grizzlies,Jaren Jackson Jr.,torn meniscus in left knee (out indefinitely),0,0,0
27070,2020-08-25,Celtics,Javonte Green,arthroscopic surgery on right knee to repair torn meniscus (out for season),0,0,1


In [28]:
inj_10 = injury_df[(injury_df['Date'] < '2012-07-01')]
inj_11 = injury_df[(injury_df['Date'].between('2012-07-01','2013-07-01'))]

In [29]:
inj_10

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
1,2010-10-06,Pistons,Jonas Jerebko,torn right Achilles tendon (out indefinitely),0,1,0
3,2010-10-08,Blazers,Jeff Ayres,torn ACL in right knee (out indefinitely),1,0,0
7,2010-10-09,Nuggets,Al Harrington,partially torn plantar fascia in left foot (out indefinitely),0,0,0
18,2010-10-26,Blazers,Joel Przybilla,placed on IL placed on IL recovering from surgery on right knee to repair ruptured patella tendon,0,0,0
20,2010-10-26,Celtics,Kendrick Perkins,placed on IL recovering from surgery on right knee to repair torn ACL,1,0,0
26,2010-10-26,Lakers,Andrew Bynum,placed on IL recovering from surgery on right knee to repair torn ligaments,0,0,0
41,2010-10-27,Bucks,Michael Redd,placed on IL recovering from surgery on left knee to repair torn ACL,1,0,0
57,2010-10-27,Jazz,Mehmet Okur,placed on IL recovering from surgery to repair torn left Achilles tendon,0,1,0
69,2010-10-27,Nuggets,Chris Andersen,placed on IL recovering from surgery on right knee to repair partially torn patella tendon,0,0,0
77,2010-10-27,Raptors,Ed Davis,placed on IL recovering from arthroscopic surgery on right kene to repair torn meniscus,0,0,0


In [30]:
ofs = injury_df[(injury_df['out_for_season'] ==1)]
ach = injury_df[(injury_df['Achilles'] ==1)]
acl = injury_df[(injury_df['ACL'] ==1)]

Multiple entries for same player and injury are noted. The duplicates need to be removed.

In [31]:
acl.Name.value_counts()

Leandro Barbosa       1
Jabari Parker         1
David West            1
Baron Davis           1
Iman Shumpert         1
Michael Redd          1
Jonathan Isaac        1
Derrick Rose          1
Nate Robinson         1
Dejounte Murray       1
Brandon Knight        1
Kendrick Perkins      1
Josh Howard           1
Kendall Marshall      1
Max Strus             1
Rajon Rondo           1
Gani Lawal            1
Jeff Ayres            1
Ricky Rubio           1
Danilo Gallinari      1
Dante Exum            1
O.G. Anunoby          1
Kristaps Porzingis    1
Jeremy Lamb           1
Klay Thompson         1
J.J. Hickson          1
Brandon Rush          1
Zach LaVine           1
Lou Williams          1
Jarrett Jack          1
Tony Wroten Jr.       1
Eric Maynor           1
Name: Name, dtype: int64

In [32]:
len(acl.Name.value_counts())

32

In [33]:
# Drop duplicate entries for same player ACL rupture
acl.drop_duplicates(subset=['Name'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acl.drop_duplicates(subset=['Name'], inplace=True)


In [34]:
acl.Name.value_counts()

Leandro Barbosa       1
Jabari Parker         1
David West            1
Baron Davis           1
Iman Shumpert         1
Michael Redd          1
Jonathan Isaac        1
Derrick Rose          1
Nate Robinson         1
Dejounte Murray       1
Brandon Knight        1
Kendrick Perkins      1
Josh Howard           1
Kendall Marshall      1
Max Strus             1
Rajon Rondo           1
Gani Lawal            1
Jeff Ayres            1
Ricky Rubio           1
Danilo Gallinari      1
Dante Exum            1
O.G. Anunoby          1
Kristaps Porzingis    1
Jeremy Lamb           1
Klay Thompson         1
J.J. Hickson          1
Brandon Rush          1
Zach LaVine           1
Lou Williams          1
Jarrett Jack          1
Tony Wroten Jr.       1
Eric Maynor           1
Name: Name, dtype: int64

In [35]:
len(acl.Name.value_counts())

32

In [36]:
acl

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
3,2010-10-08,Blazers,Jeff Ayres,torn ACL in right knee (out indefinitely),1,0,0
20,2010-10-26,Celtics,Kendrick Perkins,placed on IL recovering from surgery on right knee to repair torn ACL,1,0,0
41,2010-10-27,Bucks,Michael Redd,placed on IL recovering from surgery on left knee to repair torn ACL,1,0,0
99,2010-10-28,Wizards,Josh Howard,placed on IL with torn ACL in left knee,1,0,0
1053,2011-01-06,Suns,Gani Lawal,torn ACL and partially torn MCL in right knee (out for season),1,0,1
2120,2011-03-25,Hornets,David West,placed on IL with torn ACL in left knee (out for season),1,0,1
2877,2012-01-08,Thunder,Eric Maynor,placed on IL with torn ACL in right knee (out for season),1,0,1
3903,2012-03-09,Timberwolves,Ricky Rubio,torn ACL in left knee (out for season),1,0,1
4946,2012-04-28,Bulls,Derrick Rose,torn ACL in left knee (out for season),1,0,1
4951,2012-04-28,Knicks,Iman Shumpert,torn lateral meniscus / ACL (CBC) in left knee (out for season),1,0,1


In [37]:
acl.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32 entries, 3 to 26832
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Date            32 non-null     object
 1   Team            32 non-null     object
 2   Name            32 non-null     object
 3   Notes           32 non-null     object
 4   ACL             32 non-null     int64 
 5   Achilles        32 non-null     int64 
 6   out_for_season  32 non-null     int64 
dtypes: int64(3), object(4)
memory usage: 2.0+ KB


In [38]:
acl_10 = acl[(acl['Date'] < '2012-01-08')]

In [39]:
acl_10

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
3,2010-10-08,Blazers,Jeff Ayres,torn ACL in right knee (out indefinitely),1,0,0
20,2010-10-26,Celtics,Kendrick Perkins,placed on IL recovering from surgery on right knee to repair torn ACL,1,0,0
41,2010-10-27,Bucks,Michael Redd,placed on IL recovering from surgery on left knee to repair torn ACL,1,0,0
99,2010-10-28,Wizards,Josh Howard,placed on IL with torn ACL in left knee,1,0,0
1053,2011-01-06,Suns,Gani Lawal,torn ACL and partially torn MCL in right knee (out for season),1,0,1
2120,2011-03-25,Hornets,David West,placed on IL with torn ACL in left knee (out for season),1,0,1


In [40]:
ach.Name.value_counts()

Darrell Arthur      1
Kevin Durant        1
C.J. Wilcox         1
DeMarcus Cousins    1
Brandon Jennings    1
Elliot Williams     1
Jeff Taylor         1
Mehmet Okur         1
Jonas Jerebko       1
Darius Miller       1
Kobe Bryant         1
Mario Chalmers      1
DeSagana Diop       1
Dwight Powell       1
Rudy Gay            1
Sheldon Mac         1
Rodney Hood         1
John Wall           1
Wes Matthews Jr.    1
David Nwaba         1
Name: Name, dtype: int64

In [41]:
# Drop duplicate entries for same player Achilles rupture
ach.drop_duplicates(subset=['Name'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ach.drop_duplicates(subset=['Name'], inplace=True)


In [42]:
len(ach.Name.value_counts())

20

In [43]:
ach

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
1,2010-10-06,Pistons,Jonas Jerebko,torn right Achilles tendon (out indefinitely),0,1,0
57,2010-10-27,Jazz,Mehmet Okur,placed on IL recovering from surgery to repair torn left Achilles tendon,0,1,0
1003,2011-01-03,Bobcats,DeSagana Diop,placed on IL with ruptured right Achilles tendon (out for season),0,1,1
2630,2011-12-20,Grizzlies,Darrell Arthur,ruptured right Achilles tendon (out for season),0,1,1
5026,2012-09-11,Blazers,Elliot Williams,torn left Achilles tendon (out indefinitely),0,1,0
7213,2013-04-12,Lakers,Kobe Bryant,torn left Achilles tendon (out indefinitely),0,1,0
8384,2013-12-21,Bobcats,Jeff Taylor,torn right Achilles tendon (DNP),0,1,0
11690,2015-01-24,Pistons,Brandon Jennings,torn left Achilles (out for season),0,1,1
12185,2015-03-06,Blazers,Wes Matthews Jr.,torn left Achilles tendon (out for season),0,1,1
15018,2016-03-09,Grizzlies,Mario Chalmers,ruptured right Achilles tendon (out for season),0,1,1


In [44]:
ofs.Name.value_counts()

Udonis Haslem         1
Kristaps Porzingis    1
Robert Covington      1
Jeremy Evans          1
Danilo Gallinari      1
                     ..
Jeremy Lin            1
Nene Hilario          1
Anderson Varejao      1
Leandro Barbosa       1
Isaiah Canaan         1
Name: Name, Length: 68, dtype: int64

In [45]:
len(ofs.Name.value_counts())

68

In [46]:
# Drop duplicate entries for same player Achilles rupture
ofs.drop_duplicates(subset=['Name'], inplace=True)
ofs.Name.value_counts()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ofs.drop_duplicates(subset=['Name'], inplace=True)


Udonis Haslem         1
Kristaps Porzingis    1
Robert Covington      1
Jeremy Evans          1
Danilo Gallinari      1
                     ..
Jeremy Lin            1
Nene Hilario          1
Anderson Varejao      1
Leandro Barbosa       1
Isaiah Canaan         1
Name: Name, Length: 68, dtype: int64

In [47]:
len(ofs.Name.value_counts())

68

In [48]:
ofs

Unnamed: 0,Date,Team,Name,Notes,ACL,Achilles,out_for_season
402,2010-11-22,Heat,Udonis Haslem,placed on IL with torn ligament in left foot (out for season),0,0,1
995,2011-01-02,Mavericks,Caron Butler,placed on IL with torn patella tendon in right knee (out for season),0,0,1
1003,2011-01-03,Bobcats,DeSagana Diop,placed on IL with ruptured right Achilles tendon (out for season),0,1,1
1053,2011-01-06,Suns,Gani Lawal,torn ACL and partially torn MCL in right knee (out for season),1,0,1
1058,2011-01-07,Cavaliers,Anderson Varejao,torn tendon in right ankle (out for season),0,0,1
...,...,...,...,...,...,...,...
26080,2020-01-22,Mavericks,Dwight Powell,torn right Achilles tendon (out for season),0,1,1
26494,2020-02-24,Mavericks,Jalen Brunson,placed on IL with torn labrum in right shoulder (out for season),0,0,1
26496,2020-02-24,Pacers,Jeremy Lamb,torn ACL in left knee (out for season),1,0,1
26832,2020-08-02,Magic,Jonathan Isaac,torn ACL in left knee (out for season),1,0,1
