In [4]:
import pandas as pd
from basketball_reference_web_scraper import client

# Player season data

In [42]:
FROM_YEAR = 2009
TO_YEAR = 2019

## Get data from API

In [23]:
def get_player_seasons_over_years(first_year, last_year):
    curr_year = first_year
    player_season_df = pd.DataFrame()

    while curr_year <= last_year:
        season_start_year = curr_year - 1

        season_df = pd.DataFrame(client.players_season_totals(season_end_year=curr_year))
        season_df.insert(0, 'end_year', curr_year)
        season_df.insert(0, 'start_year', season_start_year)
        player_season_df = pd.concat([player_season_df, season_df])

        curr_year += 1
    
    return player_season_df

In [24]:
player_seasons_df = get_player_seasons_over_years(FROM_YEAR, TO_YEAR)

## Clean data

### Rename columns

In [25]:
player_seasons_df = player_seasons_df.rename(columns={
    'slug': 'player_id',
    'made_field_goals': 'fgm',
    'attempted_field_goals': 'fga',
    'made_three_point_field_goals': '3pm',
    'attempted_three_point_field_goals': '3pa',
    'made_free_throws': 'ftm',
    'attempted_free_throws': 'fta',
    'offensive_rebounds': 'orb',
    'defensive_rebounds': 'drb',
    'assists': 'ast',
    'steals': 'stl',
    'blocks': 'blk',
    'turnovers': 'tvr',
    'personal_fouls': 'pf'
})

### Add season total fantasy points

In [26]:
def get_fantasy_points(row):
    shot_points = 3 * row['3pm'] + 2 * row['fgm'] + row['ftm']
    rebound_points = 1.2 * (row['orb'] + row['drb'])
    assist_points = 1.5 * row['ast']
    block_points = 3.0 * row['blk']
    steal_points = 3.0 * row['stl']

    return shot_points + rebound_points + assist_points + block_points + steal_points

In [27]:
player_seasons_df['fp'] = player_seasons_df.apply(lambda x: get_fantasy_points(x), axis = 1)

## Consolidate players who switched teams mid-season

In [38]:
stats = ['fgm', 'fga', '3pm', '3pa', 'ftm', 'fta', 'orb', 'drb', 'ast', 'stl', 'blk', 'tvr', 'pf', 'fp']

In [39]:
agg_stats = stats + ['games_played']
unique_player_seasons = player_seasons_df.groupby(by=['end_year', 'player_id'])[agg_stats].sum().reset_index()

## Calculate average season stats (per game)

In [41]:
seas_avg_stats = []
for stat in stats:
    seas_avg_stats.append(stat + "_seas_avg")
    unique_player_seasons[stat + "_seas_avg"] = unique_player_seasons.apply(
        lambda x: x[stat] / x['games_played'] if x['games_played'] > 0 else 0,
        axis=1
    )

In [43]:
path = "../data/" + str(FROM_YEAR) + "_" + str(TO_YEAR) + "_player_seasons_unique.csv"

unique_player_seasons.to_csv(path, index=False)