In [1]:
# Initial imports
import pandas as pd

In [2]:
# Load the NBA-playerlist.csv dataset.
file_path = "../SourceData/NBA-playerlist.csv"
playerlist_full_df = pd.read_csv(file_path)
playerlist_full_df .head()

Unnamed: 0.1,Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,GAMES_PLAYED_FLAG,OTHERLEAGUE_EXPERIENCE_CH,PERSON_ID,PLAYERCODE,ROSTERSTATUS,TEAM_ABBREVIATION,TEAM_CITY,TEAM_CODE,TEAM_ID,TEAM_NAME,TO_YEAR
0,0,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,Y,0,76001,HISTADD_alaa_abdelnaby,0,,,,0,,1994
1,1,Zaid Abdul-Aziz,"Abdul-Aziz, Zaid",1968,Y,0,76002,HISTADD_zaid_abdul-aziz,0,,,,0,,1977
2,2,Kareem Abdul-Jabbar,"Abdul-Jabbar, Kareem",1969,Y,0,76003,HISTADD_kareem_abdul-jabbar,0,,,,0,,1988
3,3,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,Y,0,51,mahmoud_abdul-rauf,0,,,,0,,2000
4,4,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,Y,0,1505,tariq_abdul-wahad,0,,,,0,,2003


In [3]:
# Create a new DataFrame that holds only the Player Name Columns (keepting both, just in case)
# FROM_YEAR and TO_YEAR fields

career_duration_df = playerlist_full_df[['DISPLAY_FIRST_LAST', 'DISPLAY_LAST_COMMA_FIRST', 'FROM_YEAR', 'TO_YEAR' ]].copy()
career_duration_df.head()

Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,TO_YEAR
0,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,1994
1,Zaid Abdul-Aziz,"Abdul-Aziz, Zaid",1968,1977
2,Kareem Abdul-Jabbar,"Abdul-Jabbar, Kareem",1969,1988
3,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,2000
4,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,2003


In [4]:
# Keep only records for players drafted from 1980 forward.
career_duration_df=career_duration_df[career_duration_df['FROM_YEAR'] >= 1980]
career_duration_df.head()

Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,TO_YEAR
0,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,1994
3,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,2000
4,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,2003
5,Shareef Abdur-Rahim,"Abdur-Rahim, Shareef",1996,2007
9,Alex Abrines,"Abrines, Alex",2016,2018


In [5]:
# Identify incomplete rows
career_duration_df.count()

DISPLAY_FIRST_LAST          2809
DISPLAY_LAST_COMMA_FIRST    2809
FROM_YEAR                   2809
TO_YEAR                     2809
dtype: int64

In [6]:
# Check datatypes
career_duration_df.dtypes

DISPLAY_FIRST_LAST          object
DISPLAY_LAST_COMMA_FIRST    object
FROM_YEAR                    int64
TO_YEAR                      int64
dtype: object

In [7]:
# Convert Player column to String
career_duration_df['DISPLAY_FIRST_LAST'] = career_duration_df['DISPLAY_FIRST_LAST'].astype(str)
career_duration_df.head()

Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,TO_YEAR
0,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,1994
3,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,2000
4,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,2003
5,Shareef Abdur-Rahim,"Abdur-Rahim, Shareef",1996,2007
9,Alex Abrines,"Abrines, Alex",2016,2018


In [8]:
# Create Unique Identifier Field - Concatination of Name and Draft Year
# Converted to All Upper Case, All Punctuation and Spaces Stripped
career_duration_df['GUID'] = career_duration_df['DISPLAY_FIRST_LAST'] + career_duration_df['FROM_YEAR'].astype(str)

# Strip out Single Quote/Apostrophe Characters, Commas, and Periods
career_duration_df['GUID'] = career_duration_df['GUID'].str.replace(r'[^\w\s]+', '')

# Strip out spaces
career_duration_df['GUID'].replace('\s+', '', regex=True,inplace=True)


#rookies_df['GUID'] = rookies_df['GUID'].astype(str)
#rookies_df['GUID'].to_string()

#Convert to Upper Case
career_duration_df['GUID'] = career_duration_df['GUID'].str.upper()

career_duration_df.head(100)

  


Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,TO_YEAR,GUID
0,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,1994,ALAAABDELNABY1990
3,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,2000,MAHMOUDABDULRAUF1990
4,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,2003,TARIQABDULWAHAD1997
5,Shareef Abdur-Rahim,"Abdur-Rahim, Shareef",1996,2007,SHAREEFABDURRAHIM1996
9,Alex Abrines,"Abrines, Alex",2016,2018,ALEXABRINES2016
...,...,...,...,...,...
120,Trevor Ariza,"Ariza, Trevor",2004,2018,TREVORARIZA2004
122,Joe Arlauckas,"Arlauckas, Joe",1987,1987,JOEARLAUCKAS1987
123,B.J. Armstrong,"Armstrong, B.J.",1989,1999,BJARMSTRONG1989
125,Brandon Armstrong,"Armstrong, Brandon",2001,2003,BRANDONARMSTRONG2001


In [9]:
# Calculate Career Duration (TO_YEAR - FROM YEAR)
# I cross-referenced a number of players with a YEARS_PLAYED of 0 
# and they don't appear to have actually appeared in any games
career_duration_df['Years_Played'] = career_duration_df['TO_YEAR'] - career_duration_df['FROM_YEAR'] +1
career_duration_df.head(50)

Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,TO_YEAR,GUID,Years_Played
0,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,1994,ALAAABDELNABY1990,5
3,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,2000,MAHMOUDABDULRAUF1990,11
4,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,2003,TARIQABDULWAHAD1997,7
5,Shareef Abdur-Rahim,"Abdur-Rahim, Shareef",1996,2007,SHAREEFABDURRAHIM1996,12
9,Alex Abrines,"Abrines, Alex",2016,2018,ALEXABRINES2016,3
10,Alex Acker,"Acker, Alex",2005,2008,ALEXACKER2005,4
12,Mark Acres,"Acres, Mark",1987,1992,MARKACRES1987,6
14,Quincy Acy,"Acy, Quincy",2012,2018,QUINCYACY2012,7
17,Hassan Adams,"Adams, Hassan",2006,2008,HASSANADAMS2006,3
18,Jaylen Adams,"Adams, Jaylen",2018,2018,JAYLENADAMS2018,1


In [10]:
# Calculate HOF Elgibility Year (TO_YEAR + 5)
# It appears that a player currently has to have been retired 4 full seasons 
# and is elgible begining the 5th season after their retirement
career_duration_df['HOF_Elgibility_Year'] = career_duration_df['TO_YEAR'] +5
career_duration_df.head(50)

Unnamed: 0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,TO_YEAR,GUID,Years_Played,HOF_Elgibility_Year
0,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,1994,ALAAABDELNABY1990,5,1999
3,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,2000,MAHMOUDABDULRAUF1990,11,2005
4,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,2003,TARIQABDULWAHAD1997,7,2008
5,Shareef Abdur-Rahim,"Abdur-Rahim, Shareef",1996,2007,SHAREEFABDURRAHIM1996,12,2012
9,Alex Abrines,"Abrines, Alex",2016,2018,ALEXABRINES2016,3,2023
10,Alex Acker,"Acker, Alex",2005,2008,ALEXACKER2005,4,2013
12,Mark Acres,"Acres, Mark",1987,1992,MARKACRES1987,6,1997
14,Quincy Acy,"Acy, Quincy",2012,2018,QUINCYACY2012,7,2023
17,Hassan Adams,"Adams, Hassan",2006,2008,HASSANADAMS2006,3,2013
18,Jaylen Adams,"Adams, Jaylen",2018,2018,JAYLENADAMS2018,1,2023


In [11]:
# Set new index and drop existing index
career_duration_df.set_index('GUID')

Unnamed: 0_level_0,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,FROM_YEAR,TO_YEAR,Years_Played,HOF_Elgibility_Year
GUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALAAABDELNABY1990,Alaa Abdelnaby,"Abdelnaby, Alaa",1990,1994,5,1999
MAHMOUDABDULRAUF1990,Mahmoud Abdul-Rauf,"Abdul-Rauf, Mahmoud",1990,2000,11,2005
TARIQABDULWAHAD1997,Tariq Abdul-Wahad,"Abdul-Wahad, Tariq",1997,2003,7,2008
SHAREEFABDURRAHIM1996,Shareef Abdur-Rahim,"Abdur-Rahim, Shareef",1996,2007,12,2012
ALEXABRINES2016,Alex Abrines,"Abrines, Alex",2016,2018,3,2023
...,...,...,...,...,...,...
STEPHENZIMMERMAN2016,Stephen Zimmerman,"Zimmerman, Stephen",2016,2016,1,2021
PAULZIPSER2016,Paul Zipser,"Zipser, Paul",2016,2017,2,2022
ANTEZIZIC2017,Ante Zizic,"Zizic, Ante",2017,2018,2,2023
JIMZOET1982,Jim Zoet,"Zoet, Jim",1982,1982,1,1987


In [12]:
# Export to CSV
#export_path = "Resources/draft_position.csv"
#career_duration_df.to_csv(export_path)

In [13]:
# import the necessary packages
import psycopg2
from sqlalchemy import create_engine
  
# Create the engine to connect to the database 
engine = create_engine(
    'postgresql+psycopg2://postgres:tC1sfkNBaUqSH4noRcz6@nba-hof-project.cxpeww6dbftb.us-east-2.rds.amazonaws.com:5432/postgres')

# Write data into the table in database
career_duration_df.to_sql('new_career_duration', engine)