## Libraries

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Dependencies and Setup
import time
from pprint import pprint
import requests
from datetime import date, timedelta, datetime
import json
from pprint import pprint
from tqdm import tqdm
from tqdm import tqdm_notebook
# sqlite Dependencies
# ----------------------------------
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine
# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Text, DateTime, Float, Boolean, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
from config import APIKEY

In [None]:
from nba_api.stats.endpoints import playercareerstats, drafthistory, commonplayerinfo, playerawards

In [None]:
# Not used
# cumestatsplayer,draftcombinedrillresults,playerdashboardbyteamperformance, leagueleaders, leaguedashplayerstats, draftcombinenonstationaryshooting, draftcombinestats, commonallplayers,

-------------

In [None]:
# Display all columns
pd.set_option('display.max_columns', 500)
# Anthony Davis
draft_data = drafthistory.DraftHistory()
draft_history_filtered = draft_data.get_data_frames()[0]
draft_history_filtered['SEASON'] = draft_history_filtered['SEASON'].astype(int)
draft_history_filtered = draft_history_filtered[draft_history_filtered['SEASON'] >= 2000]
draft_history_filtered = draft_history_filtered[['PERSON_ID','ROUND_NUMBER','OVERALL_PICK']]
draft_history_filtered['ROUND_NUMBER'] = draft_history_filtered['ROUND_NUMBER'].fillna(2)
draft_history_filtered['OVERALL_PICK'] = draft_history_filtered['OVERALL_PICK'].fillna(61)

In [None]:
draft_history_filtered

In [None]:
draft_history_filtered['ROUND_NUMBER'] = draft_history_filtered['ROUND_NUMBER'].fillna(2)
draft_history_filtered['ALLSTAR'] = draft_history_filtered['OVERALL_PICK'].fillna(61)

### Injury

In [None]:
# Injury clean
injuries = pd.read_csv('../../datasets/injuries_2010_2020.csv')
# injuries["PLAYER_NAME"] = injuries["Acquired"] +" "+ injuries["Relinquished"]
injuries["PLAYER_NAME"] = injuries['Relinquished'].combine_first(injuries['Acquired'])
injuries = injuries.drop(columns=['Acquired', 'Relinquished','Team'])
injuries = injuries[['Date','PLAYER_NAME','Notes']]
injuries.copy()

## Open Saved JSON

In [None]:
with open('./_players_all_data.json') as json_file:
    data = json.load(json_file)

In [None]:
#  Player positions
player_positions = pd.read_csv('../../datasets/nba_positions_no_team.csv') 

In [None]:
#  Player nba_salaries
nba_salaries = pd.read_csv('../../datasets/nba_salaries.csv') 

--------------

#### Key: 

* GP: Games Played
* MIN: Minutes Played
* FGM: Field Goals Made
* FGA: Field Goals Attempted
* FG%: Field Goal Percentage
* 3PM: 3 Point Field Goals Made
* 3PA: 3 Point Field Goals Attempted
* 3P%: 3 Point Field Goals Percentage
* FTM: Free Throws Made
* FTA: Free Throws Attempted
* FT%: Free Throw Percentage
* OREB: Offensive Rebounds
* DREB: Defensive Rebounds
* REB: Rebounds
* AST: Assists
* TOV: Turnovers
* STL: Steals
* BLK: Blocks
* PF: Personal Fouls
* DD2: Double Doubles
* TD3: Trible Doubles
* PTS: Points
* YIL: Year in League




## Clean df and add columns

add Points Per Games Played (PPGP), add position

In [None]:
# df1.merge(df2, left_on='lkey', right_on='rkey',

#           suffixes=('_left', '_right'))

In [None]:
five_year_all_players = pd.read_json(data)
twenty_years_all_players_raw = five_year_all_players[five_year_all_players['SEASON_ID'] > '1999-00']
twenty_years_all_players_raw = twenty_years_all_players_raw.merge(player_positions, how="inner", left_on='PLAYER_NAME', right_on='PLAYER_NAME',suffixes=('_left', '_right')).drop_duplicates(keep='first').reset_index(drop=True)
# .dropna(how='any')
twenty_years_all_players_raw['PPGP'] = round(twenty_years_all_players_raw['PTS'] / twenty_years_all_players_raw['GP'], 2)
twenty_years_all_players_raw['POSITION'] = twenty_years_all_players_raw['POSITION'].str.strip()
twenty_years_all_players_raw = twenty_years_all_players_raw[(twenty_years_all_players_raw['POSITION'] != 'GF') & (twenty_years_all_players_raw['POSITION'] != 'NA')]

In [None]:
twenty_years_all_players_raw[twenty_years_all_players_raw['PLAYER_NAME'] == 'Alex Caruso']

In [None]:
format_year = []
contract_year = []
for year in twenty_years_all_players_raw['SEASON_ID']:
    contract = int(year[:-3])
    fix_year = year[-2:]
    new_year = int("20" + fix_year)
    contract_year.append(contract)
    format_year.append(new_year) 
    
twenty_years_all_players_raw['YEAR'] = format_year
# twenty_years_all_players_raw['CYEAR'] = contract_year
twenty_years_all_players_raw.copy()

### NBA Salary add

In [None]:
# Rename salary columns
nba_salaries = nba_salaries.rename(columns={"Year":"YEAR","Player":"PLAYER_NAME","Position":"POSITION","Salary":"SALARY","Inflation":"INFLATION",})

In [None]:
nba_salaries[nba_salaries['PLAYER_NAME'] == 'Alex Caruso']

In [None]:
# Merge df and clean
twenty_years_all_players_raw = twenty_years_all_players_raw.merge(nba_salaries, how="outer", left_on=['YEAR','PLAYER_NAME'], right_on=['YEAR','PLAYER_NAME'],suffixes=('_left', '_right')).drop_duplicates( keep='first').reset_index(drop=True)
# .dropna(how='any')
twenty_years_all_players_raw = twenty_years_all_players_raw.rename(columns={"POSITION_left":"POSITION"})
twenty_years_all_players_raw['POSITION'] = twenty_years_all_players_raw['POSITION'].fillna(twenty_years_all_players_raw['POSITION_right'])
twenty_years_all_players_raw = twenty_years_all_players_raw.drop(columns=['POSITION_right'])     
twenty_years_all_players_raw = twenty_years_all_players_raw.dropna(thresh=5).fillna(0)
twenty_years_all_players_raw

In [None]:
# twenty_years_all_players_raw2 = twenty_years_all_players_raw.copy()
# twenty_years_all_players_raw2

In [None]:
# names = twenty_years_all_players_raw2.loc[:, 'PLAYER_NAME']

In [None]:
# for test in names:
# #     twenty_years_all_players_raw2.loc[:, 'PLAYER_NAME':'SALARY']
#     print(test)

In [None]:
# for player in twenty_years_all_players_raw2['PLAYER_NAME']:
#     lowest_sal = twenty_years_all_players_raw2[twenty_years_all_players_raw2['PLAYER_NAME'] == player]['SALARY'].min()
#     twenty_years_all_players_raw2.loc[:,-6] == player]['SALARY'] = twenty_years_all_players_raw2[twenty_years_all_players_raw2['PLAYER_NAME'] == player]['SALARY'].fillna(lowest_sal)

# twenty_years_all_players_raw2
# twenty_years_all_players_raw2[twenty_years_all_players_raw2['PLAYER_NAME'] == 'Alex Caruso']

In [None]:
draft_history_pass = twenty_years_all_players_raw.merge(draft_history_filtered, how="outer", left_on='PLAYER_ID', right_on='PERSON_ID',suffixes=('_left', '_right')).drop_duplicates( keep='first').dropna(thresh=5).reset_index(drop=True)
draft_history_pass = draft_history_pass.drop(columns="PERSON_ID")
draft_history_pass['ROUND_NUMBER'] = draft_history_pass['ROUND_NUMBER'].fillna(2)
draft_history_pass['OVERALL_PICK'] = draft_history_pass['OVERALL_PICK'].fillna(61)
draft_history_pass['ALLSTAR'] = draft_history_pass['ALLSTAR'].fillna(0)
draft_history_pass

## Save combined data to json

In [None]:
draft_history_pass=draft_history_pass.to_json(orient='records')

# SAVE: Player_position
with open(f'./clean_combined_data_for_ml.json', 'w') as fp:
    json.dump(draft_history_pass, fp)

## Open combined data from json

In [None]:
with open('./clean_combined_data_for_ml.json') as json_file:
    data = json.load(json_file)

In [None]:
# Default DF. Do not touch.
default = pd.read_json(data).copy()
default[default['SEASON_ID'] == '2019-20']

In [None]:
default[default['PLAYER_NAME'] == 'Nikola Jokic']

-------------

### Clean injuries

In [None]:
import datetime

dnp = injuries[injuries['Notes'] != 'returned to lineup' ]
string = dnp['Date']

new_dates = []
for datee in dnp['Date']:
    date_time_obj = datetime.datetime.strptime(datee, '%m/%d/%Y')
    only_year = date_time_obj.year
    new_dates.append(only_year)

In [None]:
dnp['Date'] = new_dates
dnp

In [None]:

dnp_clean = dnp.groupby(['PLAYER_NAME','Date']).count().sort_values("Date").reset_index()

In [None]:
# only_year = date_time_obj.year

In [None]:
dnp_clean=dnp_clean.to_json(orient='records')

# SAVE: Player_position
with open(f'./injury_formatted.json', 'w') as fp:
    json.dump(dnp_clean, fp)

------------