# NBA Historical View

# Imports

In [9]:
import os

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PowerTransformer


import psycopg2
from dotenv import load_dotenv

# Constants

In [3]:
dotenv_path = ("/Users/maukanmir/Documents/Machine-Learning/NBA Projects/NBA-History-Analysis/dot.env")
load_dotenv(dotenv_path)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
TABLE_NAME = "salaries_stats_heights"

In [6]:
conn = psycopg2.connect(
    host=DB_HOST,
    database=DB_NAME,
    user=DB_USER,
    password=DB_PASSWORD,
    port=DB_PORT
)

query = f"SELECT * FROM {TABLE_NAME}"
df = pd.read_sql_query(query, conn)
conn.close()



In [7]:
df

Unnamed: 0,pos,height,weight,age,nationality,college-team,draft-status,salary,Name,Team,...,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,season
0,SG,6-6,215,27,United States,North Carolina,1984 Rnd 1 Pick 3,2500000.0,Michael Jordan,CHI,...,8.2,0.851,1.4,4.6,6.0,5.5,2.7,1.0,2.5,1990-1991
1,PF,6-9,265,26,United States,Louisiana Tech,1985 Rnd 1 Pick 13,2260000.0,Karl Malone,UTA,...,10.8,0.770,2.9,8.9,11.8,3.3,1.1,1.0,3.0,1990-1991
2,SF,6-7,205,33,United States,Tennessee,1977 Rnd 1 Pick 7,1600000.0,Bernard King,WAS,...,7.6,0.790,1.8,3.2,5.0,4.6,0.9,0.3,4.0,1990-1991
3,PF,6-6,252,27,United States,Auburn,1984 Rnd 1 Pick 5,2900000.0,Charles Barkley,PHI,...,9.8,0.722,3.9,6.3,10.1,4.2,1.6,0.5,3.1,1990-1991
4,C,7-0,240,27,United StatesJamaica,Georgetown,1985 Rnd 1 Pick 1,4250000.0,Patrick Ewing,NYK,...,7.7,0.745,2.4,8.8,11.2,3.0,1.0,3.2,3.6,1990-1991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7159,PF,6-7,210,24,Canada,Syracuse,"2019 NBA Draft, Undrafted",2165000.0,Oshae Brissett,BOS,...,1.5,0.602,1.1,1.8,2.9,0.8,0.3,0.1,0.4,2023-2024
7160,G,6-0,185,26,United States,USC,"2018 NBA Draft, Undrafted",2420000.0,Jordan McLaughlin,MIN,...,0.3,0.722,0.3,1.0,1.3,2.0,0.6,0.1,0.3,2023-2024
7161,PF,6-10,240,31,Canada,Stanford,2014 Rnd 2 Pick 15,4000000.0,Dwight Powell,DAL,...,1.4,0.708,1.5,1.9,3.4,1.3,0.4,0.3,0.5,2023-2024
7162,PF,6-9,240,22,United States,Arizona,2020 Rnd 1 Pick 22,4306281.0,Zeke Nnaji,DEN,...,1.1,0.677,1.1,1.1,2.2,0.6,0.3,0.7,0.5,2023-2024


# Functions

In [None]:
def check_for_nulls_dupes(df):
  print(f"The amount of Null Values: {df.isna().sum()}")
  print(f"The amount of Duplicated Values {df.duplicated().sum()}")

def create_sklearn_pipeline(model, kbest=None):
  steps = [
    ("Scaler", MinMaxScaler()),
    ("PowerTransformer", PowerTransformer()),
    ("model", model)
  ]
  
  if kbest:
    steps.insert(2, ("Feature Selection", kbest))
  
  return Pipeline(steps=steps)