## Libraries

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Dependencies and Setup
import time
from pprint import pprint
import requests
from datetime import date, timedelta, datetime
import json
from pprint import pprint
from tqdm import tqdm
from tqdm import tqdm_notebook
# sqlite Dependencies
# ----------------------------------
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine
# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Text, DateTime, Float, Boolean, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session

In [None]:
from nba_api.stats.endpoints import playercareerstats, drafthistory, commonplayerinfo, playerawards

In [None]:
# Not used
# cumestatsplayer,draftcombinedrillresults,playerdashboardbyteamperformance, leagueleaders, leaguedashplayerstats, draftcombinenonstationaryshooting, draftcombinestats, commonallplayers,

-------------

## Open Saved JSON

In [None]:
with open('./_players_all_data.json') as json_file:
    data = json.load(json_file)

In [None]:
#  Player salaries by year and inflation. Position
player_positions = pd.read_csv('../../datasets/nba_salaries.csv') 

--------------

#### Key: 

* GP: Games Played
* MIN: Minutes Played
* FGM: Field Goals Made
* FGA: Field Goals Attempted
* FG%: Field Goal Percentage
* 3PM: 3 Point Field Goals Made
* 3PA: 3 Point Field Goals Attempted
* 3P%: 3 Point Field Goals Percentage
* FTM: Free Throws Made
* FTA: Free Throws Attempted
* FT%: Free Throw Percentage
* OREB: Offensive Rebounds
* DREB: Defensive Rebounds
* REB: Rebounds
* AST: Assists
* TOV: Turnovers
* STL: Steals
* BLK: Blocks
* PF: Personal Fouls
* DD2: Double Doubles
* TD3: Trible Doubles
* PTS: Points
* YIL: Year in League




## Clean df and add columns

add Points Per Games Played (PPGP), add position

In [None]:
# df1.merge(df2, left_on='lkey', right_on='rkey',

#           suffixes=('_left', '_right'))

In [None]:
player_positions = player_positions.rename(columns={'Player':'PLAYER_NAME'}) 

In [None]:
five_year_all_players = pd.read_json(data)
five_year_all_players = five_year_all_players.drop(columns=['LEAGUE_ID','TEAM_ID','TEAM_ABBREVIATION'])

In [None]:
twenty_years_all_players_raw = five_year_all_players[five_year_all_players['SEASON_ID'] > '1999-00']
twenty_years_all_players_raw = twenty_years_all_players_raw.merge(player_positions, left_on='PLAYER_NAME', right_on='PLAYER_NAME',suffixes=('_left', '_right')).drop_duplicates( keep='first').reset_index(drop=True).dropna(how='any')
twenty_years_all_players_raw['PPGP'] = round(twenty_years_all_players_raw['PTS'] / twenty_years_all_players_raw['GP'], 2)
twenty_years_all_players_raw = twenty_years_all_players_raw.rename(str.upper, axis='columns')
twenty_years_all_players_raw['POSITION'] = twenty_years_all_players_raw['POSITION'].str.strip()
twenty_years_all_players_raw = twenty_years_all_players_raw[(twenty_years_all_players_raw.POSITION != 'GF') & (twenty_years_all_players_raw.POSITION != 'NA')]
twenty_years_all_players_raw = twenty_years_all_players_raw[['YEAR','PLAYER_ID','POSITION', 'PLAYER_NAME','SEASON_ID','YIL', 'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA',
       'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB',
       'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PPGP', 'INFLATION']]

## Save combined data to json

In [None]:
twenty_years_all_players_raw=twenty_years_all_players_raw.to_json(orient='records')

# SAVE: Player_position
with open(f'./clean_combined_data_for_ml.json', 'w') as fp:
    json.dump(twenty_years_all_players_raw, fp)

## Open combined data from json

In [None]:
with open('./clean_combined_data_for_ml.json') as json_file:
    data = json.load(json_file)

In [None]:
# Default DF. Do not touch.
default = pd.read_json(data).copy()

## Dataframes for all positions 

In [None]:
# SG
sg = default[default['POSITION'] == 'SG']

In [None]:
# C
c = default[default['POSITION'] == 'C']

In [None]:
# SF
sf = default[default['POSITION'] == 'SF']

In [None]:
# PF
pf = default[default['POSITION'] == 'PF']

In [None]:
# PG
pg = default[default['POSITION'] == 'PG']

In [None]:
# G
g = default[default['POSITION'] == 'G']

In [None]:
# F
f = default[default['POSITION'] == 'F']

------------