In [1]:
# Import Dependencies
import pandas as pd
import sqlite3 as sql
import os

In [2]:
## Based on an example found at https://www.kaggle.com/code/agilesifaka/historic-nba-drafting-game-and-player-analysis

# Connect to SQL database
db_path = 'Basketball_Dataset/basketball.sqlite'
connection = sql.connect(db_path) # create connection object to database
print("SQL database connected")
table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table'", connection)
print(table)

SQL database connected
                     name
0                  Player
1                    Team
2         Team_Attributes
3            Team_History
4       Player_Attributes
5          Game_Officials
6   Game_Inactive_Players
7             Team_Salary
8           Player_Salary
9                   Draft
10          Draft_Combine
11          Player_Photos
12            Player_Bios
13                   Game
14                   News
15           News_Missing


In [3]:
# Query the Draft table
query = """
    SELECT 
        ID,
        DISPLAY_FIRST_LAST,
        BIRTHDATE,
        POSITION,
        DRAFT_YEAR
    FROM Player_Attributes
    WHERE DRAFT_YEAR >= 1980 AND DRAFT_YEAR != 'Undrafted';
"""
basketball_db_Player_Attributes_df = pd.read_sql(query, connection)
basketball_db_Player_Attributes_df

Unnamed: 0,ID,DISPLAY_FIRST_LAST,BIRTHDATE,POSITION,DRAFT_YEAR
0,76001,Alaa Abdelnaby,1968-06-24T00:00:00,Forward,1990
1,51,Mahmoud Abdul-Rauf,1969-03-09T00:00:00,Guard,1990
2,1505,Tariq Abdul-Wahad,1974-11-03T00:00:00,Forward-Guard,1997
3,949,Shareef Abdur-Rahim,1976-12-11T00:00:00,Forward,1996
4,203518,Alex Abrines,1993-08-01T00:00:00,Guard,2013
...,...,...,...,...,...
2126,2583,Derrick Zimmerman,1981-12-02T00:00:00,Guard,2003
2127,1627757,Stephen Zimmerman,1996-09-09T00:00:00,Center,2016
2128,1627835,Paul Zipser,1994-02-18T00:00:00,Forward,2016
2129,1627790,Ante Zizic,1997-01-04T00:00:00,Center,2016


In [4]:
basketball_db_Player_Attributes_df.dtypes

ID                    object
DISPLAY_FIRST_LAST    object
BIRTHDATE             object
POSITION              object
DRAFT_YEAR            object
dtype: object

In [5]:
# Force numeric values to integers
basketball_db_Player_Attributes_df['ID'] = basketball_db_Player_Attributes_df['ID'].astype(int)
basketball_db_Player_Attributes_df['DISPLAY_FIRST_LAST'] = basketball_db_Player_Attributes_df['DISPLAY_FIRST_LAST'].astype(str)
#basketball_db_Player_Attributes_df['BIRTHDATE'] = basketball_db_Player_Attributes_df['DISPLAY_FIRST_LAST'].astype('datetime64[ns]')
basketball_db_Player_Attributes_df['BIRTHDATE'] = pd.to_datetime(basketball_db_Player_Attributes_df['BIRTHDATE'], infer_datetime_format=True)
basketball_db_Player_Attributes_df['POSITION'] = basketball_db_Player_Attributes_df['POSITION'].astype(str)
basketball_db_Player_Attributes_df['DRAFT_YEAR'] = basketball_db_Player_Attributes_df['DRAFT_YEAR'].astype(int)
basketball_db_Player_Attributes_df.dtypes

ID                             int32
DISPLAY_FIRST_LAST            object
BIRTHDATE             datetime64[ns]
POSITION                      object
DRAFT_YEAR                     int32
dtype: object

In [6]:
basketball_db_Player_Attributes_df.head()

Unnamed: 0,ID,DISPLAY_FIRST_LAST,BIRTHDATE,POSITION,DRAFT_YEAR
0,76001,Alaa Abdelnaby,1968-06-24,Forward,1990
1,51,Mahmoud Abdul-Rauf,1969-03-09,Guard,1990
2,1505,Tariq Abdul-Wahad,1974-11-03,Forward-Guard,1997
3,949,Shareef Abdur-Rahim,1976-12-11,Forward,1996
4,203518,Alex Abrines,1993-08-01,Guard,2013


In [7]:
# Create Unique Identifier Field - Concatination of Name and Draft Year
# Converted to All Upper Case, All Punctuation and Spaces Stripped
basketball_db_Player_Attributes_df['GUID'] = basketball_db_Player_Attributes_df['DISPLAY_FIRST_LAST'] + basketball_db_Player_Attributes_df['DRAFT_YEAR'].astype(str)

# Strip out Single Quote/Apostrophe Characters, Commas, and Periods
basketball_db_Player_Attributes_df['GUID'] = basketball_db_Player_Attributes_df['GUID'].str.replace(r'[^\w\s]+', '')

# Strip out spaces
basketball_db_Player_Attributes_df['GUID'].replace('\s+', '', regex=True,inplace=True)

#Convert to Upper Case
basketball_db_Player_Attributes_df['GUID'] = basketball_db_Player_Attributes_df['GUID'].str.upper()

basketball_db_Player_Attributes_df.head(100)

  


Unnamed: 0,ID,DISPLAY_FIRST_LAST,BIRTHDATE,POSITION,DRAFT_YEAR,GUID
0,76001,Alaa Abdelnaby,1968-06-24,Forward,1990,ALAAABDELNABY1990
1,51,Mahmoud Abdul-Rauf,1969-03-09,Guard,1990,MAHMOUDABDULRAUF1990
2,1505,Tariq Abdul-Wahad,1974-11-03,Forward-Guard,1997,TARIQABDULWAHAD1997
3,949,Shareef Abdur-Rahim,1976-12-11,Forward,1996,SHAREEFABDURRAHIM1996
4,203518,Alex Abrines,1993-08-01,Guard,2013,ALEXABRINES2013
...,...,...,...,...,...,...
95,76084,Carl Bailey,1958-04-23,Center,1980,CARLBAILEY1980
96,1847,Thurl Bailey,1961-04-07,Forward,1983,THURLBAILEY1983
97,1753,Toby Bailey,1975-11-19,Guard,1998,TOBYBAILEY1998
98,203946,Cameron Bairstow,1990-12-07,Forward-Center,2014,CAMERONBAIRSTOW2014


In [8]:
# Calculate Age at October 15 of Rookie Season

# Concatenate Draft Year and October 15 Constant
basketball_db_Player_Attributes_df['APROX_START_ROOKIE_SEASON'] = basketball_db_Player_Attributes_df['DRAFT_YEAR'].astype(str)+'-10-15' 

# Convert to Date-Time Data Type
basketball_db_Player_Attributes_df['APROX_START_ROOKIE_SEASON'] = pd.to_datetime(basketball_db_Player_Attributes_df['APROX_START_ROOKIE_SEASON'], infer_datetime_format=True)

# Calculate Age
basketball_db_Player_Attributes_df['AGE_ROOKIE_SEASON'] = (basketball_db_Player_Attributes_df['APROX_START_ROOKIE_SEASON'] - basketball_db_Player_Attributes_df['BIRTHDATE'])
basketball_db_Player_Attributes_df['AGE_ROOKIE_SEASON'] = basketball_db_Player_Attributes_df['AGE_ROOKIE_SEASON'].astype('<m8[Y]')
basketball_db_Player_Attributes_df['AGE_ROOKIE_SEASON'] = basketball_db_Player_Attributes_df['AGE_ROOKIE_SEASON'].astype(int)
basketball_db_Player_Attributes_df.head(100)

Unnamed: 0,ID,DISPLAY_FIRST_LAST,BIRTHDATE,POSITION,DRAFT_YEAR,GUID,APROX_START_ROOKIE_SEASON,AGE_ROOKIE_SEASON
0,76001,Alaa Abdelnaby,1968-06-24,Forward,1990,ALAAABDELNABY1990,1990-10-15,22
1,51,Mahmoud Abdul-Rauf,1969-03-09,Guard,1990,MAHMOUDABDULRAUF1990,1990-10-15,21
2,1505,Tariq Abdul-Wahad,1974-11-03,Forward-Guard,1997,TARIQABDULWAHAD1997,1997-10-15,22
3,949,Shareef Abdur-Rahim,1976-12-11,Forward,1996,SHAREEFABDURRAHIM1996,1996-10-15,19
4,203518,Alex Abrines,1993-08-01,Guard,2013,ALEXABRINES2013,2013-10-15,20
...,...,...,...,...,...,...,...,...
95,76084,Carl Bailey,1958-04-23,Center,1980,CARLBAILEY1980,1980-10-15,22
96,1847,Thurl Bailey,1961-04-07,Forward,1983,THURLBAILEY1983,1983-10-15,22
97,1753,Toby Bailey,1975-11-19,Guard,1998,TOBYBAILEY1998,1998-10-15,22
98,203946,Cameron Bairstow,1990-12-07,Forward-Center,2014,CAMERONBAIRSTOW2014,2014-10-15,23


In [9]:
# Create bins in which to group players based on Draft Year
bins = [1979, 1989, 1999, 2009, 2019]

# Create labels for these bins
group_labels = ["1980s","1990s","2000s","2010s"]

# Slice the data and place it into bins
pd.cut(basketball_db_Player_Attributes_df["DRAFT_YEAR"], bins, labels=group_labels).head()

# Place the data series into a new column inside of the DataFrame
basketball_db_Player_Attributes_df["Draft_Decade"] = pd.cut(basketball_db_Player_Attributes_df["DRAFT_YEAR"], bins, labels=group_labels)
basketball_db_Player_Attributes_df.head(50)

Unnamed: 0,ID,DISPLAY_FIRST_LAST,BIRTHDATE,POSITION,DRAFT_YEAR,GUID,APROX_START_ROOKIE_SEASON,AGE_ROOKIE_SEASON,Draft_Decade
0,76001,Alaa Abdelnaby,1968-06-24,Forward,1990,ALAAABDELNABY1990,1990-10-15,22,1990s
1,51,Mahmoud Abdul-Rauf,1969-03-09,Guard,1990,MAHMOUDABDULRAUF1990,1990-10-15,21,1990s
2,1505,Tariq Abdul-Wahad,1974-11-03,Forward-Guard,1997,TARIQABDULWAHAD1997,1997-10-15,22,1990s
3,949,Shareef Abdur-Rahim,1976-12-11,Forward,1996,SHAREEFABDURRAHIM1996,1996-10-15,19,1990s
4,203518,Alex Abrines,1993-08-01,Guard,2013,ALEXABRINES2013,2013-10-15,20,2010s
5,101165,Alex Acker,1983-01-21,Guard,2005,ALEXACKER2005,2005-10-15,22,2000s
6,76009,Mark Acres,1962-11-15,Center,1985,MARKACRES1985,1985-10-15,22,1980s
7,203112,Quincy Acy,1990-10-06,Forward,2012,QUINCYACY2012,2012-10-15,22,2010s
8,200801,Hassan Adams,1984-06-20,Forward,2006,HASSANADAMS2006,2006-10-15,22,2000s
9,203919,Jordan Adams,1994-07-08,Guard,2014,JORDANADAMS2014,2014-10-15,20,2010s


In [10]:
# Set new index and drop existing index
basketball_db_Player_Attributes_df.set_index('GUID')

Unnamed: 0_level_0,ID,DISPLAY_FIRST_LAST,BIRTHDATE,POSITION,DRAFT_YEAR,APROX_START_ROOKIE_SEASON,AGE_ROOKIE_SEASON,Draft_Decade
GUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ALAAABDELNABY1990,76001,Alaa Abdelnaby,1968-06-24,Forward,1990,1990-10-15,22,1990s
MAHMOUDABDULRAUF1990,51,Mahmoud Abdul-Rauf,1969-03-09,Guard,1990,1990-10-15,21,1990s
TARIQABDULWAHAD1997,1505,Tariq Abdul-Wahad,1974-11-03,Forward-Guard,1997,1997-10-15,22,1990s
SHAREEFABDURRAHIM1996,949,Shareef Abdur-Rahim,1976-12-11,Forward,1996,1996-10-15,19,1990s
ALEXABRINES2013,203518,Alex Abrines,1993-08-01,Guard,2013,2013-10-15,20,2010s
...,...,...,...,...,...,...,...,...
DERRICKZIMMERMAN2003,2583,Derrick Zimmerman,1981-12-02,Guard,2003,2003-10-15,21,2000s
STEPHENZIMMERMAN2016,1627757,Stephen Zimmerman,1996-09-09,Center,2016,2016-10-15,20,2010s
PAULZIPSER2016,1627835,Paul Zipser,1994-02-18,Forward,2016,2016-10-15,22,2010s
ANTEZIZIC2016,1627790,Ante Zizic,1997-01-04,Center,2016,2016-10-15,19,2010s


In [11]:
# Check datatypes
basketball_db_Player_Attributes_df.dtypes

ID                                    int32
DISPLAY_FIRST_LAST                   object
BIRTHDATE                    datetime64[ns]
POSITION                             object
DRAFT_YEAR                            int32
GUID                                 object
APROX_START_ROOKIE_SEASON    datetime64[ns]
AGE_ROOKIE_SEASON                     int32
Draft_Decade                       category
dtype: object

In [12]:
# Identify incomplete rows
basketball_db_Player_Attributes_df.count()

ID                           2131
DISPLAY_FIRST_LAST           2131
BIRTHDATE                    2131
POSITION                     2131
DRAFT_YEAR                   2131
GUID                         2131
APROX_START_ROOKIE_SEASON    2131
AGE_ROOKIE_SEASON            2131
Draft_Decade                 2131
dtype: int64

In [13]:
# import the necessary packages
import psycopg2
from sqlalchemy import create_engine
  
# Create the engine to connect to the database 
engine = create_engine(
    '')

# Write data into the table in database
basketball_db_Player_Attributes_df.to_sql('basketball_db_Player_Attributes', engine)