In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import time
from pprint import pprint
import requests
from datetime import date, timedelta, datetime
import json
from pprint import pprint
from tqdm import tqdm
from tqdm import tqdm_notebook
# sqlite Dependencies
# ----------------------------------
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine
# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Text, DateTime, Float, Boolean, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session

In [2]:
from nba_api.stats.endpoints import playercareerstats, drafthistory, commonplayerinfo, playerawards

In [3]:
# cumestatsplayer,draftcombinedrillresults,playerdashboardbyteamperformance, leagueleaders, leaguedashplayerstats, draftcombinenonstationaryshooting, draftcombinestats, commonallplayers,

In [4]:
# pd.show_versions()

## Static Examples

In [2]:
import cudf, io, requests
from io import StringIO


CUDARuntimeError: cudaErrorInsufficientDriver: CUDA driver version is insufficient for CUDA runtime version

In [None]:
import cudf, io, requests
from io import StringIO

url = "https://github.com/plotly/datasets/raw/master/tips.csv"
content = requests.get(url).content.decode('utf-8')

tips_df = cudf.read_csv(StringIO(content))
tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100

# display average tip by dining party size
print(tips_df.groupby('size').tip_percentage.mean())

In [5]:
from nba_api.stats.static import players
# Find players by full name.
players.find_players_by_full_name('james')

# Find players by first name.
players.find_players_by_first_name('lebron')

# Find players by last name.
players.find_players_by_last_name('^(james|love)$')

# Get all players.
# players.get_players()

[{'id': 77115,
  'full_name': 'Aaron James',
  'first_name': 'Aaron',
  'last_name': 'James',
  'is_active': False},
 {'id': 203108,
  'full_name': 'Bernard James',
  'first_name': 'Bernard',
  'last_name': 'James',
  'is_active': False},
 {'id': 202345,
  'full_name': 'Damion James',
  'first_name': 'Damion',
  'last_name': 'James',
  'is_active': False},
 {'id': 77116,
  'full_name': 'Gene James',
  'first_name': 'Gene',
  'last_name': 'James',
  'is_active': False},
 {'id': 1080,
  'full_name': 'Henry James',
  'first_name': 'Henry',
  'last_name': 'James',
  'is_active': False},
 {'id': 1744,
  'full_name': 'Jerome James',
  'first_name': 'Jerome',
  'last_name': 'James',
  'is_active': False},
 {'id': 1629713,
  'full_name': 'Justin James',
  'first_name': 'Justin',
  'last_name': 'James',
  'is_active': True},
 {'id': 2544,
  'full_name': 'LeBron James',
  'first_name': 'LeBron',
  'last_name': 'James',
  'is_active': True},
 {'id': 2229,
  'full_name': 'Mike James',
  'first_nam

## Begin Database Connection (sqlite)

In [20]:
# Base = declarative_base()
# engine = create_engine('sqlite:///db.sqlite', echo=False)
# conn = engine.connect()
# Create (if not already in existence) the tables associated with our classes.
# Base.metadata.create_all(engine)
# # Create a Session Object to Connect to DB
# # ----------------------------------
# session = Session(bind=engine)

In [21]:
# Use this to clear out the db
# ----------------------------------
# Session.rollback(self)
# Base.metadata.drop_all(engine)
# session.commit()

## List of all players
find specific player

In [6]:
from nba_api.stats.static import players
# get_players returns a list of dictionaries, each representing a player.
nba_players = players.get_players()
print('Number of players fetched: {}'.format(len(nba_players)))
nba_players[:]
all_players = pd.DataFrame(nba_players)

Number of players fetched: 4501


### Find specific player

In [7]:
## loop to find player
player_name = [player for player in nba_players
                   if player['full_name'] == 'Tim Duncan'][0]
player_name

{'id': 1495,
 'full_name': 'Tim Duncan',
 'first_name': 'Tim',
 'last_name': 'Duncan',
 'is_active': False}

## Get Career Stats by player id number
Get player_id number from get_players above

#### Key: 

* GP: Games Played
* MIN: Minutes Played
* FGM: Field Goals Made
* FGA: Field Goals Attempted
* FG%: Field Goal Percentage
* 3PM: 3 Point Field Goals Made
* 3PA: 3 Point Field Goals Attempted
* 3P%: 3 Point Field Goals Percentage
* FTM: Free Throws Made
* FTA: Free Throws Attempted
* FT%: Free Throw Percentage
* OREB: Offensive Rebounds
* DREB: Defensive Rebounds
* REB: Rebounds
* AST: Assists
* TOV: Turnovers
* STL: Steals
* BLK: Blocks
* PF: Personal Fouls
* DD2: Double Doubles
* TD3: Trible Doubles
* PTS: Points




example

In [8]:
# Display all columns
pd.set_option('display.max_columns', 500)
# Anthony Davis
career = playercareerstats.PlayerCareerStats(player_id='203076')
career_df = career.get_data_frames()[0]

career_dict = career_df.to_dict('records')
first_5_years = career_df[0:5]

In [9]:
career_df.columns

Index(['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS'],
      dtype='object')

-----------

#### PySpark databases 

In [None]:
# import os
# import findspark
# findspark.init()

In [None]:
# # Start Spark session
# from pyspark.sql import SparkSession
# from pyspark import SparkFiles
# spark = SparkSession.builder.appName("nbaStats").getOrCreate()

In [None]:
# with open("sample.json", "w") as outfile:  
#     json.dump(career_dict, outfile) 

In [None]:
# sc = spark.sparkContext
# path = "./sample.json"
# players_json_spark = spark.read.json(path)

In [None]:
# players_json_spark.printSchema()

In [None]:
# Creates a temporary view using the DataFrame
# players_json_spark.createOrReplaceTempView("people")

In [None]:
# players_json_spark.show()

In [None]:
# # playersDF_spark = spark.range(3).toDF("myCol")
# newRow = players_json_spark
# appended = players_json_spark.union(newRow)
# display(appended)

In [None]:
# appended.count()

In [None]:
# def customFunction(row):

#    return (row)

# sample2 = appended.rdd.map(customFunction)
# sample2

In [None]:
# for ids in all_players_ids: 
#     player_to_pass = ids

In [None]:
# for f in appended.collect(): 
#         print (f)

In [None]:
# playersDF_spark.withColumn('newprice', dataframe]).show()

In [None]:
# otherPeople = spark.read.json(path)
# otherPeople.show()
# otherPeople.select(otherPeople["AST"]).show()

In [None]:
# Create DataFrame manually
# dataframe = spark.createDataFrame(path, schema=main_df_columns)
# dataframe.show()

------------

## Get first 5 years of each player id.

In [10]:
all_players_ids = list(all_players['id'].values.astype(str))
len(all_players_ids)

4501

In [11]:
all_players_col = ['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS']
len(all_players_col)

27

In [12]:
main_df = pd.DataFrame(columns=all_players_col)

pbar = tqdm(total=len(all_players_ids))
try: 
    for ids in all_players_ids: 
        player_to_pass = ids

        career = playercareerstats.PlayerCareerStats(player_id=player_to_pass)
        career_df = career.get_data_frames()[0]
        first_5_years = career_df[0:5]
        main_df = main_df.append(first_5_years, ignore_index = True)
        pbar.update(1)
    pbar.close()
except:
    print("something happened")

 97%|███████████████████████████████████████████████████████████████████████████▍  | 4352/4501 [36:48<01:17,  1.91it/s]

something happened


In [None]:
all_players_ids

In [44]:
five_year_all_players = main_df.sort_values(by='SEASON_ID', ascending=False).copy()
five_year_all_players = five_year_all_players.reset_index(drop=True)
five_year_all_players

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1629010,2019-20,00,1610612746,LAC,23.0,42,1,476,46,136,0.338,19,67,0.284,11,19,0.579,5,55,60,46,13,8,26,55,122
1,1629640,2019-20,00,1610612759,SAS,20.0,17,1,301,53,89,0.596,13,22,0.591,35,44,0.795,10,47,57,15,14,2,13,28,154
2,1628413,2019-20,00,0,TOT,24.0,7,0,47,4,11,0.364,0,4,0,2,4,0.5,3,6,9,0,3,2,2,10,10
3,1628984,2019-20,00,1610612766,CHA,25.0,63,53,2211,368,963,0.382,218,585,0.373,191,233,0.82,42,170,212,471,62,15,181,115,1145
4,1629648,2019-20,00,1610612765,DET,22.0,10,0,53,5,20,0.25,2,10,0.2,0,0,0,0,4,4,8,1,0,2,6,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14637,77642,1946-47,00,1610610032,PRO,24.0,11,,,4,13,0.308,,,,7,12,0.583,,,,3,,,,3,15
14638,78230,1946-47,00,1610610032,PRO,24.0,4,,,0,7,0,,,,1,2,0.5,,,,0,,,,3,1
14639,78115,1946-47,00,1610612744,PHW,25.0,58,,,142,531,0.267,,,,82,124,0.661,,,,34,,,,83,366
14640,77660,1946-47,00,1610612752,NYK,24.0,26,,0,27,104,0.26,,,,22,34,0.647,,,0,18,,,,32,76


In [45]:
five_year_all_players_to_json = five_year_all_players.to_json(orient='records')

In [46]:
# SAVE: Player_position
with open(f'./_players_all_data.json', 'w') as fp:
    json.dump(five_year_all_players_to_json, fp)

-------------

In [62]:
five_year_all_players.set_index('SEASON_ID')

Unnamed: 0_level_0,PLAYER_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
SEASON_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
2019-20,1629010,00,1610612746,LAC,23.0,42,1,476,46,136,0.338,19,67,0.284,11,19,0.579,5,55,60,46,13,8,26,55,122
2019-20,1629640,00,1610612759,SAS,20.0,17,1,301,53,89,0.596,13,22,0.591,35,44,0.795,10,47,57,15,14,2,13,28,154
2019-20,1628413,00,0,TOT,24.0,7,0,47,4,11,0.364,0,4,0,2,4,0.5,3,6,9,0,3,2,2,10,10
2019-20,1628984,00,1610612766,CHA,25.0,63,53,2211,368,963,0.382,218,585,0.373,191,233,0.82,42,170,212,471,62,15,181,115,1145
2019-20,1629648,00,1610612765,DET,22.0,10,0,53,5,20,0.25,2,10,0.2,0,0,0,0,4,4,8,1,0,2,6,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1946-47,77642,00,1610610032,PRO,24.0,11,,,4,13,0.308,,,,7,12,0.583,,,,3,,,,3,15
1946-47,78230,00,1610610032,PRO,24.0,4,,,0,7,0,,,,1,2,0.5,,,,0,,,,3,1
1946-47,78115,00,1610612744,PHW,25.0,58,,,142,531,0.267,,,,82,124,0.661,,,,34,,,,83,366
1946-47,77660,00,1610612752,NYK,24.0,26,,0,27,104,0.26,,,,22,34,0.647,,,0,18,,,,32,76


In [None]:
## Save all players data to sqlite db 'all_players'
# all_players.to_sql('all_players', con=engine)

## Retreive all players from sqlite db
# engine.execute("SELECT * FROM all_players").fetchall()

## Draft History

In [None]:
draft_history = drafthistory.DraftHistory()
draft_history.get_data_frames()[0]

## Player Awards

In [None]:
player_awards = playerawards.PlayerAwards(player_id='203076')
player_awards.get_data_frames()[0]

## common_player_info

In [None]:
common_player_info = commonplayerinfo.CommonPlayerInfo(player_id='203076')
common_player_info.get_data_frames()[0]

## common_all_players

In [None]:
common_all_players = commonallplayers.CommonAllPlayers()
common_all_players.get_data_frames()[0]

------------

Not so useful 

## league_dash_player_stats

In [None]:
league_dash_player_bio_stats = leaguedashplayerbiostats.LeagueDashPlayerBioStats()
league_dash_player_bio_stats_df = league_dash_player_bio_stats.get_data_frames()[0]
league_dash_player_bio_stats_df

In [None]:
league_dash_player_bio_stats_df.columns

## League Leaders

In [None]:
league_leaders_df.columns

In [None]:
league_leaders = leagueleaders.LeagueLeaders()
league_leaders_df = league_leaders.get_data_frames()[0]
league_leaders_df

## player_dashboard_by_team_performance
requires player_id number

In [None]:
player_dashboard_by_team_performance = playerdashboardbyteamperformance.PlayerDashboardByTeamPerformance(player_id='203076')
player_dashboard_by_team_performance.get_data_frames()[0]

In [None]:
boxscoreadvancedv22 = drafthistory.DraftHistory()
boxscoreadvancedv22.get_data_frames()[0]

## Draft Combine Drill Results

In [None]:
## Draft Combine Drill Results
draft_combine_drill = draftcombinedrillresults.DraftCombineDrillResults()
draft_combine_drill.get_data_frames()[0]

## Draft Combine Non Stationary Shooting

In [None]:
draft_combine_non_stationary_shooting = draftcombinenonstationaryshooting.DraftCombineNonStationaryShooting()
draft_combine_non_stationary_shooting.get_data_frames()[0]

## Draft Combine Stats

In [None]:
draft_combine_stats = draftcombinestats.DraftCombineStats()
draft_combine_stats.get_data_frames()[0]