## Libraries

In [23]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Dependencies and Setup
import time
from pprint import pprint
import requests
from datetime import date, timedelta, datetime
import json
from pprint import pprint
from tqdm import tqdm
from tqdm import tqdm_notebook
# sqlite Dependencies
# ----------------------------------
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine
# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Text, DateTime, Float, Boolean, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session

In [24]:
from nba_api.stats.endpoints import playercareerstats, drafthistory, commonplayerinfo, playerawards

In [3]:
# Not used
# cumestatsplayer,draftcombinedrillresults,playerdashboardbyteamperformance, leagueleaders, leaguedashplayerstats, draftcombinenonstationaryshooting, draftcombinestats, commonallplayers,

-------------

## Open Saved JSON

In [46]:
with open('./_players_all_data.json') as json_file:
    data = json.load(json_file)

In [47]:
#  Player positions
player_positions = pd.read_csv('../../datasets/nba_positions_no_team.csv') 

--------------

#### Key: 

* GP: Games Played
* MIN: Minutes Played
* FGM: Field Goals Made
* FGA: Field Goals Attempted
* FG%: Field Goal Percentage
* 3PM: 3 Point Field Goals Made
* 3PA: 3 Point Field Goals Attempted
* 3P%: 3 Point Field Goals Percentage
* FTM: Free Throws Made
* FTA: Free Throws Attempted
* FT%: Free Throw Percentage
* OREB: Offensive Rebounds
* DREB: Defensive Rebounds
* REB: Rebounds
* AST: Assists
* TOV: Turnovers
* STL: Steals
* BLK: Blocks
* PF: Personal Fouls
* DD2: Double Doubles
* TD3: Trible Doubles
* PTS: Points
* YIL: Year in League




## Clean df and add columns

add Points Per Games Played (PPGP), add position

In [48]:
# df1.merge(df2, left_on='lkey', right_on='rkey',

#           suffixes=('_left', '_right'))

In [49]:
five_year_all_players = pd.read_json(data)
twenty_years_all_players_raw = five_year_all_players[five_year_all_players['SEASON_ID'] > '1999-00']
twenty_years_all_players_raw = twenty_years_all_players_raw.merge(player_positions, left_on='PLAYER_NAME', right_on='PLAYER_NAME',suffixes=('_left', '_right')).drop_duplicates( keep='first').reset_index(drop=True).dropna(how='any')
twenty_years_all_players_raw['PPGP'] = round(twenty_years_all_players_raw['PTS'] / twenty_years_all_players_raw['GP'], 2)
twenty_years_all_players_raw['POSITION'] = twenty_years_all_players_raw['POSITION'].str.strip()

In [52]:
# Create seperate dataframes for each position
twenty_years_all_players_raw['POSITION'].value_counts()

SG    985
C     985
SF    858
PF    856
PG    824
G     449
F     360
GF      5
NA      5
Name: POSITION, dtype: int64

## Save combined data to json

In [53]:
twenty_years_all_players_raw=twenty_years_all_players_raw.to_json(orient='records')

# SAVE: Player_position
with open(f'./clean_combined_data_for_ml.json', 'w') as fp:
    json.dump(twenty_years_all_players_raw, fp)

## Open combined data from json

In [25]:
with open('./clean_combined_data_for_ml.json') as json_file:
    data = json.load(json_file)

In [26]:
# Default DF. Do not touch.
default = pd.read_json(data).copy()

## Dataframes for all positions 

#### Key: 

* GP: Games Played
* MIN: Minutes Played
* FGM: Field Goals Made
* FGA: Field Goals Attempted
* FG_PCT: Field Goal Percentage
* 3PM: 3 Point Field Goals Made
* 3PA: 3 Point Field Goals Attempted
* FG3_PCT: 3 Point Field Goals Percentage
* FTM: Free Throws Made
* FTA: Free Throws Attempted
* FT_PCT: Free Throw Percentage
* OREB: Offensive Rebounds
* DREB: Defensive Rebounds
* REB: Rebounds
* AST: Assists
* TOV: Turnovers
* STL: Steals
* BLK: Blocks
* PF: Personal Fouls
* DD2: Double Doubles
* TD3: Trible Doubles
* PTS: Points
* YIL: Year in League


In [27]:
# SG
sg = default[(default['POSITION'] == 'SG') & (default['GP'] > 40)]
sg = sg.drop(columns=['PLAYER_NAME','POSITION'])

In [28]:
sg = sg[['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
        'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'YIL', 'PPGP','FG3_PCT']] .sort_values(by='FG3_PCT', ascending=False).reset_index(drop=True)
sg = sg.drop(sg.index[0])
sg.copy()

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,REB,AST,STL,BLK,TOV,PF,PTS,YIL,PPGP,FG3_PCT
1,2574,2006-07,0,1610612748,MIA,26,67,35,1767,278,...,180,81,38,2,70,135,730,4,10.90,0.514
2,2574,2007-08,0,1610612761,TOR,27,81,7,1530,243,...,120,65,34,2,58,131,580,5,7.16,0.483
3,2574,2003-04,0,1610612739,CLE,23,41,3,427,52,...,55,14,13,2,21,41,145,1,3.54,0.477
4,200789,2009-10,0,1610612739,CLE,24,56,10,1068,125,...,73,70,25,7,36,79,355,4,6.34,0.477
5,1629744,2019-20,0,1610612761,TOR,25,41,1,440,73,...,61,22,10,1,16,38,202,1,4.93,0.475
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,201603,2009-10,0,1610612761,TOR,23,69,19,1368,236,...,192,101,39,25,65,126,518,2,7.51,0.133
560,2774,2005-06,0,1610612766,CHA,25,66,18,1303,157,...,219,77,81,10,74,141,422,2,6.39,0.095
561,2052,2001-02,0,1610612762,UTA,21,67,23,1131,143,...,131,116,29,24,68,82,325,2,4.85,0.080
562,2208,2002-03,0,1610612738,BOS,22,51,5,667,61,...,140,20,34,13,23,68,145,2,2.84,0.077


In [8]:
# C
c = default[(default['POSITION'] == 'C') & (default['GP'] > 40)]
c = c.drop(columns=['PLAYER_NAME','POSITION'])

In [37]:
# sg.iloc[:, 5:-1].values

In [9]:
# SF
sf = default[default['POSITION'] == 'SF']
# PF
pf = default[default['POSITION'] == 'PF']
# PG
pg = default[default['POSITION'] == 'PG']
# G
g = default[default['POSITION'] == 'G']
# F
f = default[default['POSITION'] == 'F']

------------

In [10]:
# dataset = position name
dataset = sg

## Multiple Linear Regressions
X = dataset.iloc[:, 5:-1].values
y = dataset.iloc[:, -1].values

In [11]:
y

array([0.514, 0.483, 0.477, 0.477, 0.475, 0.467, 0.465, 0.461, 0.456,
       0.455, 0.452, 0.45 , 0.446, 0.444, 0.444, 0.441, 0.441, 0.44 ,
       0.44 , 0.439, 0.438, 0.438, 0.437, 0.437, 0.436, 0.435, 0.433,
       0.431, 0.43 , 0.429, 0.427, 0.426, 0.425, 0.423, 0.423, 0.422,
       0.422, 0.421, 0.421, 0.42 , 0.42 , 0.419, 0.418, 0.417, 0.417,
       0.417, 0.417, 0.415, 0.415, 0.415, 0.415, 0.414, 0.414, 0.414,
       0.413, 0.412, 0.412, 0.412, 0.412, 0.411, 0.41 , 0.41 , 0.41 ,
       0.409, 0.409, 0.408, 0.407, 0.407, 0.407, 0.407, 0.407, 0.406,
       0.405, 0.405, 0.405, 0.405, 0.405, 0.404, 0.404, 0.404, 0.404,
       0.403, 0.403, 0.403, 0.402, 0.402, 0.402, 0.402, 0.402, 0.401,
       0.401, 0.4  , 0.4  , 0.4  , 0.399, 0.399, 0.399, 0.398, 0.398,
       0.398, 0.397, 0.397, 0.397, 0.397, 0.397, 0.396, 0.396, 0.396,
       0.395, 0.395, 0.395, 0.395, 0.395, 0.394, 0.393, 0.393, 0.393,
       0.392, 0.391, 0.391, 0.391, 0.391, 0.391, 0.39 , 0.39 , 0.39 ,
       0.39 , 0.39 ,

In [19]:
## Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [20]:
## Training the Multiple Linear Regression model on the Training set
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [21]:
## Predicting the Test set results
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0.3  0.27]
 [0.3  0.33]
 [0.42 0.41]
 [0.3  0.28]
 [0.32 0.33]
 [0.35 0.36]
 [0.28 0.27]
 [0.34 0.35]
 [0.37 0.37]
 [0.3  0.27]
 [0.34 0.37]
 [0.32 0.32]
 [0.41 0.4 ]
 [0.44 0.42]
 [0.42 0.4 ]
 [0.32 0.35]
 [0.37 0.38]
 [0.35 0.37]
 [0.28 0.26]
 [0.35 0.36]
 [0.37 0.36]
 [0.35 0.36]
 [0.39 0.38]
 [0.36 0.38]
 [0.34 0.38]
 [0.34 0.36]
 [0.4  0.4 ]
 [0.32 0.33]
 [0.48 0.41]
 [0.33 0.27]
 [0.3  0.34]
 [0.36 0.45]
 [0.37 0.38]
 [0.32 0.32]
 [0.32 0.23]
 [0.29 0.27]
 [0.34 0.31]
 [0.32 0.33]
 [0.31 0.13]
 [0.38 0.4 ]
 [0.32 0.36]
 [0.28 0.31]
 [0.33 0.28]
 [0.34 0.36]
 [0.31 0.14]
 [0.32 0.32]
 [0.3  0.33]
 [0.32 0.36]
 [0.33 0.33]
 [0.3  0.28]
 [0.45 0.44]
 [0.34 0.37]
 [0.34 0.34]
 [0.35 0.35]
 [0.34 0.36]
 [0.52 0.44]
 [0.31 0.35]
 [0.3  0.28]
 [0.32 0.27]
 [0.34 0.3 ]
 [0.32 0.32]
 [0.39 0.42]
 [0.3  0.31]
 [0.31 0.26]
 [0.37 0.38]
 [0.28 0.3 ]
 [0.32 0.25]
 [0.26 0.29]
 [0.33 0.33]
 [0.44 0.4 ]
 [0.32 0.33]
 [0.45 0.42]
 [0.42 0.39]
 [0.34 0.28]
 [0.45 0.44]
 [0.38 0.39]
 [0.39 0.4 ]

Multiple linear aggression is well adapted to the dataset. 

In [22]:
## R-Square Coefficient 
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.4332583153207932

## Linear Regression 

In [None]:
# Assign the data to X and y

X = twenty_years_all_players[["MIN", "GP"]]
y = twenty_years_all_players["PPGP"].values.reshape(-1, 1)
print(X.shape, y.shape)

In [None]:
# Use train_test_split to create training and testing data

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Create the model using LinearRegression

from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [None]:
# Fit the model to the training data and calculate the scores for the training and testing data

model.fit(X_train, y_train)
training_score = model.score(X_train, y_train)
testing_score = model.score(X_test, y_test)



print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

In [None]:
# Plot the Residuals for the Training and Testing data

# Use `model.predict()` to get a prediction array from X_train and X_test
y_train_prediction = model.predict(X_train)
y_test_prediction = model.predict(X_test)

# Plot the residual
plt.scatter(y_train_prediction, y_train_prediction - y_train, c="blue", label="Training Data (Points)")
plt.scatter(y_test_prediction, y_test_prediction - y_test, c="orange", label="Testing Data (MIN, GP)")
plt.legend()
plt.hlines(y=0, xmin=y.min(), xmax=y.max())
plt.title("Residual Plot")


--------------------------

## Logistic Regression
* Assiging 'dummies'.        
Creating columns for logistic regression out of categorical data in specific columns. ex: positon of player

In [None]:
# twenty_years_all_players

In [None]:
ml_test = twenty_years_all_players.drop(columns=['PLAYER_NAME','TEAM_ABBREVIATION','SEASON_ID'])

In [None]:
ml_test2 = pd.get_dummies(ml_test)
ml_test2.columns = ml_test2.columns.str.replace(' ','')
ml_test2.head()

OREB: Offensive Rebounds  
DREB: Defensive Rebounds   
REB: Rebounds  
AST: Assists  
TOV: Turnovers  
STL: Steals  
BLK: Blocks  

In [None]:
X = ml_test2[["MIN", "GP"]]
# Select the 'FEV' column for y, and then use values.reshape() to reshape it to a 2d array
y = ml_test2["POSITION_C"].values.reshape(-1, 1)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.33,)



In [None]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

In [None]:
classifier.fit(X_train, y_train)

In [None]:
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

In [None]:
# Generate a new data point (the red circle)
import numpy as np
new_data = np.array([[800, 36]])
plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y) 
plt.scatter(new_data[0, 0], new_data[0, 1], c="r", marker="o", s=100)

In [None]:
# Predict the class (purple or yellow) of the new data point
predictions = classifier.predict(new_data)
print("Classes are either 0 (purple) or 1 (yellow)")
print(f"The new point was classified as: {predictions}")

In [None]:
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

----------

## TEST: DNN for regression    
https://towardsdatascience.com/deep-neural-networks-for-regression-problems-81321897ca33

In [None]:
X = ml_test2[["MIN", "GP"]]
# Select the 'FEV' column for y, and then use values.reshape() to reshape it to a 2d array
y = ml_test2["POSITION_C"].values.reshape(-1, 1)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=101, test_size=0.33)

In [None]:
def get_cols_with_no_nans(df,col_type):
    '''
    Arguments :
    df : The dataframe to process
    col_type : 
          num : to only get numerical columns with no nans
          no_num : to only get nun-numerical columns with no nans
          all : to get any columns with no nans    
    '''
    if (col_type == 'num'):
        predictors = df.select_dtypes(exclude=['object'])
    elif (col_type == 'no_num'):
        predictors = df.select_dtypes(include=['object'])
    elif (col_type == 'all'):
        predictors = df
    else :
        print('Error : choose a type (num, no_num, all)')
        return 0
    cols_with_no_nans = []
    for col in predictors.columns:
        if not df[col].isnull().any():
            cols_with_no_nans.append(col)
    return cols_with_no_nans

In [None]:
# define a function to get the columns that don’t have any missing values
num_cols = get_cols_with_no_nans(ml_test2 , 'num')
cat_cols = get_cols_with_no_nans(ml_test2 , 'no_num')

In [None]:
print ('Number of numerical columns with no nan values :',len(num_cols))
print ('Number of nun-numerical columns with no nan values :',len(cat_cols))

In [None]:
combined = ml_test2[num_cols + cat_cols]
combined.hist(figsize = (12,10))
plt.show()

In [None]:
import seaborn as sb

In [None]:
train_data = combined[num_cols + cat_cols]
train_data['POSITION_C'] = y 

C_mat = train_data.corr()
fig = plt.figure(figsize = (15,15))

sb.heatmap(C_mat, vmax = .8, square = True)
plt.show()

In [None]:
def oneHotEncode(df,colNames):
    for col in colNames:
        if( df[col].dtype == np.dtype('object')):
            dummies = pd.get_dummies(df[col],prefix=col)
            df = pd.concat([df,dummies],axis=1)

            #drop the encoded column
            df.drop([col],axis = 1 , inplace=True)
    return df
    

print('There were {} columns before encoding categorical features'.format(combined.shape[1]))
combined = oneHotEncode(combined, cat_cols)
print('There are {} columns after encoding categorical features'.format(combined.shape[1]))

In [None]:
def split_combined():
    global combined
    train = combined[:1460]
    test = combined[1460:]

    return train , test 
  
train, test = split_combined()

-----------

## TEST: DNN.
Sequential model, dense layers, relu as activation function for hidden layers, normal initializer as kernel_initializer. Mean absolute error as loss function. Linear as activation function for output

In [None]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from tensorflow.keras.utils import to_categorical

In [None]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
X_scaler = StandardScaler().fit(X_train)

In [None]:
# Transform the training and testing data using the X_scaler

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [None]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=2, name='hidden'))
model.add(Dense(units=2, activation='softmax', name='output'))

In [None]:
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)

In [None]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    validation_data=(X_test_scaled, y_test_categorical),
    epochs=60,
    shuffle=True,
    verbose=2
)

### TEST: Deep Learning

In [None]:
deep_model = Sequential(name='deep_model')
deep_model.add(Dense(units=6, activation='relu', input_dim=2, name='hidden'))
deep_model.add(Dense(units=6, activation='relu', name='hidden_2'))
deep_model.add(Dense(units=2, activation='softmax', name='output'))

In [None]:
deep_model.summary()

In [None]:
# deep_model.compile(optimizer='adam',
#                    loss='categorical_crossentropy',
#                    metrics=['accuracy'])

# deep_model.compile(optimizer='adam',
#                    loss='mean_absolute_error',
#                    metrics=['mean_absolute_error'])
# deep_model.fit(
#     X_train_scaled,
#     y_train_categorical,
#     validation_data=(X_test_scaled, y_test_categorical),
#     epochs=100,
#     shuffle=True,
#     verbose=2
# )

In [None]:
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    validation_data=(X_test_scaled, y_test_categorical),
    epochs=100,
    shuffle=True,
    verbose=2
)

### Compare the models below

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

-----------------

In [None]:
import tensorflow.keras as keras

In [None]:
checkpoint_name = 'Weights-{epoch:03d}--{val_loss:.5f}.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')
callbacks_list = [checkpoint]

--------

### Mean Square Error

In [None]:
# Used X_test_scaled, y_test_scaled, and model.predict(X_test_scaled) to calculate MSE and R2

from sklearn.metrics import mean_squared_error

MSE = mean_squared_error(y_test_scaled, y_test_prediction)
r2 = model.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

In [None]:
# LASSO model
# Note: Use an alpha of .01 when creating the model for this activity
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=.01)

lasso.fit(X_train_scaled, y_train_scaled)

predictions = lasso.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = lasso.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

In [None]:
# Ridge model
# Note: Use an alpha of .01 when creating the model for this activity
from sklearn.linear_model import Ridge

### BEGIN SOLUTION
ridge = Ridge(alpha=.01)
ridge.fit(X_train_scaled, y_train_scaled)

predictions = ridge.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = ridge.score(X_test_scaled, y_test_scaled)
### END SOLUTION

print(f"MSE: {MSE}, R2: {r2}")

In [None]:
# ElasticNet model
# Note: Use an alpha of .01 when creating the model for this activity
from sklearn.linear_model import ElasticNet

### BEGIN SOLUTION
elasticnet = ElasticNet(alpha=.01)
elasticnet.fit(X_train_scaled, y_train_scaled)

predictions = elasticnet.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = elasticnet.score(X_test_scaled, y_test_scaled)
### END SOLUTION

print(f"MSE: {MSE}, R2: {r2}")


---------------

## YIL variables

In [None]:
### DF for each year. decending
first_year = twenty_years_all_players[twenty_years_all_players['YIL'] == 1]
second_year = twenty_years_all_players[twenty_years_all_players['YIL'] == 2]
third_year = twenty_years_all_players[twenty_years_all_players['YIL'] == 3]
fourth_year = twenty_years_all_players[twenty_years_all_players['YIL'] == 4]
fifth_year = twenty_years_all_players[twenty_years_all_players['YIL'] == 5]

## Plot YIL to Total Points

First Year Total Points per Position

In [None]:
import plotly.express as px

fig = px.scatter(twenty_years_all_players, x="MIN", y="PTS", log_x=True, hover_name="PLAYER_NAME", hover_data=["PTS", 'SEASON_ID', 'TEAM_ABBREVIATION'], color="YIL",
                 size='PTS', title="First Year Total Points per Position")
fig.show()

Second Year Total Points per Position

In [None]:
import plotly.express as px

fig = px.scatter(second_year, x="PTS", y="POSITION", log_x=True, hover_name="PLAYER_NAME", hover_data=["PTS"], color="PTS",
                 size='PTS', title="Second Year Total Points per Position")
fig.show()

In [None]:
test = list(first_year.columns.values)

In [None]:
fig = px.scatter_polar(first_year, r="PTS", theta="POSITION", color="PTS",
                    template="plotly_dark", title="First Year Total Points per Position")
fig.show()

In [None]:
twenty_years_all_players[['YIL','PTS','STL','BLK','REB','MIN','AST','TOV']].columns

https://plotly.com/python/polar-chart/

In [None]:
# import plotly.express as px
# df = px.data.wind()
# fig = px.line_polar(twenty_years_all_players, r="PTS", theta="POSITION", color="YIL", line_close=True,
#                     color_discrete_sequence=px.colors.sequential.Plasma_r,
#                     template="plotly_dark",)
# fig.show()

In [None]:
# df = px.data.wind()
# df