In [1]:
import pandas as pd
from datetime import datetime
from datetime import timedelta
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import Location, Outcome
import json

# Player matches data

In [29]:
ROLLING_GAMES_WINDOW = 5

## Grab all of the daily data

In [8]:
def get_player_matches_over_dates(start_date, end_date):
    player_match_df = pd.DataFrame()
    curr_date = start_date

    while curr_date < end_date:
        day = curr_date.day
        month = curr_date.month
        year = curr_date.year

        daily_matches = client.player_box_scores(day=day, month=month, year=year)
        daily_df = pd.DataFrame(daily_matches)

        if len(daily_df) > 0:
            daily_df.insert(0, 'date', datetime(year, month, day))
            daily_df.insert(1, 'year', year)
            daily_df.insert(2, 'month', month)
            daily_df.insert(3, 'day', day)
            player_match_df = pd.concat([player_match_df, daily_df])

        curr_date = curr_date + timedelta(days=1)
    
    return player_match_df.reset_index()

In [9]:
start_date = datetime(2019, 11, 15) # datetime(2010, 1, 1)
end_date = datetime(2019, 11, 18)

player_match_df = get_player_matches_over_dates(start_date, end_date)

## Clean up the data

In [11]:
with open('team_name_mapping.json', 'r') as f:
    team_name_mapping = json.load(f)

### Clean up enums and rename columns

In [12]:
clean_pm_df = player_match_df.assign(
    team_key = lambda df: df['team'].map(lambda team: team_name_mapping[team.value]),
    opponent_key = lambda df: df['opponent'].map(lambda team: team_name_mapping[team.value]),
    home_game = lambda df: df['location'].map(lambda loc: loc == Location.HOME),
    win = lambda df: df['outcome'].map(lambda outcome: outcome == Outcome.WIN)
)

clean_pm_df = clean_pm_df.rename(columns={
    'slug': 'player_id',
    'seconds_played': 'secs',
    'made_field_goals': 'fgm',
    'attempted_field_goals': 'fga',
    'made_three_point_field_goals': '3pm',
    'attempted_three_point_field_goals': '3pa',
    'made_free_throws': 'ftm',
    'attempted_free_throws': 'fta',
    'offensive_rebounds': 'orb',
    'defensive_rebounds': 'drb',
    'assists': 'ast',
    'steals': 'stl',
    'blocks': 'blk',
    'turnovers': 'tvr',
    'personal_fouls': 'pf'
})

clean_pm_df = clean_pm_df.drop(columns=['team', 'opponent', 'location', 'outcome'])

### Add game ID, season start year

In [24]:
def format_date(date):
    return datetime.strftime(date, format = "%Y%m%d")

In [25]:
def get_game_id(date, home, away):
    return format_date(date) + "_" + home + "_" + away

In [14]:
def get_game_id_for_row(row):
    home = row['team_key'] if row['home_game'] else row['opponent_key']
    away = row['opponent_key'] if row['home_game'] else row['team_key']

    return get_game_id(row['date'], home, away)

In [15]:
def get_season_start_year_for_row(row):
    year = row['year']
    month = row['month']

    if month < 7:
        return year - 1
    else:
        return year

In [16]:
# Create unique game ID
clean_pm_df.insert(0, 'game_id', clean_pm_df.apply(get_game_id_for_row, axis=1))

In [17]:
# Add season start year
clean_pm_df.insert(5, 'season_start_year', clean_pm_df.apply(get_season_start_year_for_row, axis=1))

## Add fantasy points

In [18]:
def get_fantasy_points(row):
    shot_points = 3 * row['3pm'] + 2 * row['fgm'] + row['ftm']
    rebound_points = 1.2 * (row['orb'] + row['drb'])
    assist_points = 1.5 * row['ast']
    block_points = 3.0 * row['blk']
    steal_points = 3.0 * row['stl']

    return shot_points + rebound_points + assist_points + block_points + steal_points

In [19]:
def calculate_fantasy_points_for_df(df):
    return df.apply(lambda x: get_fantasy_points(x), axis = 1)

In [20]:
clean_pm_df['fp'] = calculate_fantasy_points_for_df(clean_pm_df)

## Add "last-n-games" data

In [21]:
stats = ['secs', 'fgm', 'fga', '3pm', '3pa', 'ftm', 'fta', 'orb', 'drb', 'ast', 'stl', 'blk', 'tvr', 'pf', 'fp']

In [23]:
for stat in stats:
    newCol = stat + "_l" + str(numPastGames)
    rolStat = (clean_pm_df.groupby('player_id')[stat].apply(
        lambda x: x.rolling(ROLLING_GAMES_WINDOW, min_periods = 1).mean().shift(1).fillna(0)
    )).reset_index(0, drop=True)
    clean_pm_df[newCol] = list(rolStat)

## Save to dataframe

In [28]:
path = "../data/" + format_date(start_date) + "_" + format_date(end_date) + "_" + "player_matches.csv"

clean_pm_df.to_csv(path, index = False)