In [1]:
# Initial imports
import pandas as pd

In [2]:
# Load the NBA_Full_Draft_1947-2018.csv dataset.
file_path = "../SourceData/NBA_Full_Draft_1947-2018.csv"
full_draft_df = pd.read_csv(file_path)
full_draft_df.head()

Unnamed: 0.1,Unnamed: 0,Team,Player,College,Play_Yrs,Games,Year,Pick
0,1,PIT,Clifton McNeeley,University of Texas at El Paso,,,1947,1
1,2,TRH,Glen Selbo,University of Wisconsin,1.0,13.0,1947,2
2,3,BOS,Bulbs Ehlers,Purdue University,2.0,99.0,1947,3
3,4,PRO,Walt Dropo,University of Connecticut,,,1947,4
4,5,NYK,Dick Holub,Long Island University,1.0,48.0,1947,5


In [3]:
# Create a new DataFrame that holds only the Player Name, Year, and Pick Columns.
draft_df = full_draft_df[['Player', 'Year', 'Pick','Team','College']].copy()
draft_df.head()

Unnamed: 0,Player,Year,Pick,Team,College
0,Clifton McNeeley,1947,1,PIT,University of Texas at El Paso
1,Glen Selbo,1947,2,TRH,University of Wisconsin
2,Bulbs Ehlers,1947,3,BOS,Purdue University
3,Walt Dropo,1947,4,PRO,University of Connecticut
4,Dick Holub,1947,5,NYK,Long Island University


In [8]:
# Keep only records for players drafted from 1980 forward.
draft_df = draft_df[draft_df['Year'] >= 1980]
draft_df.head(100)

Unnamed: 0,Player,Year,Pick,Team,College
4545,Joe Barry Carroll,1980,1,GSW,Purdue University
4546,Darrell Griffith,1980,2,UTA,University of Louisville
4547,Kevin McHale,1980,3,BOS,University of Minnesota
4548,Kelvin Ransey,1980,4,CHI,Ohio State University
4549,James Ray,1980,5,DEN,Jacksonville University
...,...,...,...,...,...
4640,Mike Campbell,1980,96,CHI,Northwestern University
4641,James Patrick,1980,97,DEN,Texas State University
4642,Aaron Curry,1980,98,NJN,University of Oklahoma
4643,Wally Rank,1980,99,SDC,San Jose State University


In [9]:
# Identify incomplete rows
draft_df.count()

Player     3418
Year       3418
Pick       3418
Team       3418
College    3086
dtype: int64

In [10]:
# Check datatypes
draft_df.dtypes

Player     object
Year        int64
Pick        int64
Team       object
College    object
dtype: object

In [11]:
# Convert Player column to String
draft_df['Player'] = draft_df['Player'].astype(str)
draft_df.head()

Unnamed: 0,Player,Year,Pick,Team,College
4545,Joe Barry Carroll,1980,1,GSW,Purdue University
4546,Darrell Griffith,1980,2,UTA,University of Louisville
4547,Kevin McHale,1980,3,BOS,University of Minnesota
4548,Kelvin Ransey,1980,4,CHI,Ohio State University
4549,James Ray,1980,5,DEN,Jacksonville University


In [12]:
# Create Unique Identifier Field - Concatination of Name and Draft Year
# Converted to All Upper Case, All Punctuation and Spaces Stripped
draft_df['GUID'] = draft_df['Player'] + draft_df['Year'].astype(str)

# Strip out Single Quote/Apostrophe Characters, Commas, and Periods
draft_df['GUID'] = draft_df['GUID'].str.replace(r'[^\w\s]+', '')

# Strip out spaces
draft_df['GUID'].replace('\s+', '', regex=True,inplace=True)


#rookies_df['GUID'] = rookies_df['GUID'].astype(str)
#rookies_df['GUID'].to_string()

#Convert to Upper Case
draft_df['GUID'] = draft_df['GUID'].str.upper()

draft_df.head(100)

  


Unnamed: 0,Player,Year,Pick,Team,College,GUID
4545,Joe Barry Carroll,1980,1,GSW,Purdue University,JOEBARRYCARROLL1980
4546,Darrell Griffith,1980,2,UTA,University of Louisville,DARRELLGRIFFITH1980
4547,Kevin McHale,1980,3,BOS,University of Minnesota,KEVINMCHALE1980
4548,Kelvin Ransey,1980,4,CHI,Ohio State University,KELVINRANSEY1980
4549,James Ray,1980,5,DEN,Jacksonville University,JAMESRAY1980
...,...,...,...,...,...,...
4640,Mike Campbell,1980,96,CHI,Northwestern University,MIKECAMPBELL1980
4641,James Patrick,1980,97,DEN,Texas State University,JAMESPATRICK1980
4642,Aaron Curry,1980,98,NJN,University of Oklahoma,AARONCURRY1980
4643,Wally Rank,1980,99,SDC,San Jose State University,WALLYRANK1980


In [13]:
# Set new index and drop existing index
draft_df.set_index('GUID')

Unnamed: 0_level_0,Player,Year,Pick,Team,College
GUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
JOEBARRYCARROLL1980,Joe Barry Carroll,1980,1,GSW,Purdue University
DARRELLGRIFFITH1980,Darrell Griffith,1980,2,UTA,University of Louisville
KEVINMCHALE1980,Kevin McHale,1980,3,BOS,University of Minnesota
KELVINRANSEY1980,Kelvin Ransey,1980,4,CHI,Ohio State University
JAMESRAY1980,James Ray,1980,5,DEN,Jacksonville University
...,...,...,...,...,...
RAYSPALDING2018,Ray Spalding,2018,56,PHI,University of Louisville
KEVINHERVEY2018,Kevin Hervey,2018,57,OKC,University of Texas at Arlington
THOMASWELSH2018,Thomas Welsh,2018,58,DEN,"University of California, Los Angeles"
GEORGEKING2018,George King,2018,59,PHO,University of Colorado


In [14]:
# Export to CSV
#export_path = "Resources/draft_position.csv"
#draft_df.to_csv(export_path)

In [15]:
# import the necessary packages
import psycopg2
from sqlalchemy import create_engine
  
# Create the engine to connect to the database 
engine = create_engine(
    'postgresql+psycopg2://postgres:[password]@[hostname]:[port]/[DB]')

# Write data into the table in database
draft_df.to_sql('new_draft', engine)