In [None]:
import pandas as pd, time

# Make jupyter notebook cells wider
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
# Number of games per week by team for 2020-2021 NBA season
sched = pd.read_excel('csv/weekly_schedule.xlsx')

In [None]:
nba_teams = sched.columns.to_list()[3:] # Pull teams from dataframe columns
playoff_games_by_team_week = []

for team in nba_teams:
    playoff_games_by_team_week.append([team] + sched.query("Week >= 19")[team].to_list())
    
playoff_games_weekly = pd.DataFrame(playoff_games_by_team_week, columns=['TEAM', 'Week1', 'Week2', 'Week3'])
playoff_games_weekly['Week1/2'] = playoff_games_weekly['Week1'] + playoff_games_weekly['Week2']
playoff_games_weekly['Week2/3'] = playoff_games_weekly['Week2'] + playoff_games_weekly['Week3']
playoff_games_weekly['Week1/3'] = playoff_games_weekly['Week1'] + playoff_games_weekly['Week3']

# List of lists that contain team abbreviation, and number of games for playoffs week 1, 2, 3
playoff_games_by_team_week[:5]

In [None]:
# Data from hashtagbasketball uses non-standard team abbreviations so they'll be replaced later on.
team_name_map = {'BRO': 'BKN',
                 'PHX': 'PHO',
                 'NOP': 'NOR',
                 'OKL': 'OKC'}

# Dict that maps team name to abbreviation
team_name_to_abbv = {'Atlanta': 'ATL',
 'Boston': 'BOS',
 'Brooklyn': 'BKN',
 'Charlotte': 'CHA',
 'Chicago': 'CHI',
 'Cleveland': 'CLE',
 'Dallas': 'DAL',
 'Denver': 'DEN',
 'Detroit': 'DET',
 'Golden State': 'GSW',
 'Houston': 'HOU',
 'Indiana': 'IND',
 'LA Clippers': 'LAC',
 'LA Lakers': 'LAL',
 'Memphis': 'MEM',
 'Miami': 'MIA',
 'Milwaukee': 'MIL',
 'Minnesota': 'MIN',
 'New Orleans': 'NOR',
 'New York': 'NYK',
 'Oklahoma City': 'OKC',
 'Orlando': 'ORL',
 'Philadelphia': 'PHI',
 'Phoenix': 'PHO',
 'Portland': 'POR',
 'Sacramento': 'SAC',
 'San Antonio': 'SAS',
 'Toronto': 'TOR',
 'Utah': 'UTA',
 'Washington': 'WAS'}

In [None]:
sched.iloc[:, :3].head() # All teams share these same 3 columns

In [None]:
"""Extract these common columns. Will be used tranpose the data."""
base_sched = sched.iloc[:, :3]

In [None]:
schedule_by_team = []

def process_schedule(df, team):
    # Horizontally join the base schedule with the team's number of games
    team_sched = pd.concat([base_sched, df], axis=1)
    # Rename columns
    team_sched.columns = ['Week_Type', 'Week', 'Date', 'Number_of_Games']
    # Add new team column
    team_sched['Team'] = team
    # Append to list used to concat all teams schedules into one dataframe
    schedule_by_team.append(team_sched)

[process_schedule(sched[team], team) for team in nba_teams]

len(schedule_by_team) == 30 # Should be 30 entries

In [None]:
"""Vertically append all teams schedules into one dataframe. Now
the data looks more relational instead of like a pivot table.
"""
final_schedule = pd.concat(schedule_by_team)

In [None]:
final_schedule.to_csv('weekly_schedule_transposed.csv', index=False)

### Maxmimum number of games

#### Teams and weeks with maximum number of games

In [None]:
max_games = max(final_schedule['Number_of_Games'])

max_games

In [None]:
final_schedule.query('Number_of_Games == @max_games')

#### Teams and weeks with maximum number of playoff games

In [None]:
max_playoff_games = max(final_schedule.query("Week_Type == 'playoffs'")['Number_of_Games'])

max_playoff_games

In [None]:
final_schedule.query("Number_of_Games == @max_playoff_games and Week_Type == 'playoffs'")

In [None]:
column_renames = {'Number_of_Games': 'Total Playoff Games', 'Team': 'TEAM'}

total_playoff_games_by_team = final_schedule[['Team', 'Week', 'Number_of_Games']].query("Week >= 19")\
.groupby(by=['Team']).sum().reset_index().rename(columns=column_renames)

total_playoff_games_by_team[['TEAM', 'Total Playoff Games']]\
.reset_index()\
.set_index('TEAM')\
.join(playoff_games_weekly.set_index('TEAM'))\
.reset_index()\
.drop(columns=['index'])\
.sort_values(by=['Total Playoff Games', 'Week1', 'Week2', 'Week3'], ascending=False)\
.style.background_gradient(cmap='RdYlGn')

### Player projections for 2020-2021 season

In [None]:
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen

def html_table_to_df(url, css_id):
    
    site = url
    hdr  = {'User-Agent': 'Mozilla/5.0'}
    req  = Request(site,headers=hdr)
    page = urlopen(req)
    time.sleep(5)
    soup = BeautifulSoup(page)

    html_table = soup.find(id = css_id)
    df = pd.read_html(html_table.decode())
    
    return df
    
player_projections = html_table_to_df('https://hashtagbasketball.com/fantasy-basketball-points-league-rankings', 'ContentPlaceHolder1_GridView1')

In [None]:
cols_to_rename = {'R#': 'Per Game Rank', 
        'NAME': 'Player', 
        'TOTAL': 'Per Game Avg', 
        'Total_Playoff_Games': 'Total Playoff Games',
        'POS': 'Position'}

def fix_team_name(team):
    if team in team_name_map.keys():
        return team_name_map[team]
    
    return team

# Standardize team names
player_projections[0]['TEAM'] = player_projections[0]['TEAM'].apply(fix_team_name)

players_projection_games = player_projections[0].set_index('TEAM')\
.join(total_playoff_games_by_team.set_index('TEAM'))\
.reset_index()\
.rename(columns=cols_to_rename)\
.drop(columns=['Week'])\
.query("TEAM != 'TEAM'") # Filter out the filler rows that hashtagbasketball puts in their tables

players_projection_games.to_csv('players_projection_games.csv', index=False)
players_projection_games.query("TEAM == 'ATL'").head()

convert_to_num_cols = ['Per Game Avg', 'GP', 'PTS', 'TREB', 'AST', 'STL', 'BLK', 'TO']
column_order = ['TEAM', 'Player', 'Position', 'Per Game Rank']\
               + convert_to_num_cols + ['Total Playoff Games', 'Week1', 'Week2', 'Week3', 'Week1/2', 'Week2/3', 'Week1/3']

players_projection_games[convert_to_num_cols] = players_projection_games[convert_to_num_cols].apply(pd.to_numeric)

### Number of back to backs by team

In [None]:
b2b_by_team = html_table_to_df('https://www.nbastuffer.com/2020-2021-nba-schedule-rest-days-analysis/', 'tablepress-61')

In [None]:
b2bs_by_team = b2b_by_team[0].reset_index().drop(columns=['RANK'])
b2bs_by_team['TEAMS'] = b2bs_by_team['TEAMS'].apply(lambda team: team_name_to_abbv[team])

b2bs_by_team.head()

### Interactive table

In [None]:
from itables import show

final = players_projection_games.set_index('TEAM')\
.join(playoff_games_weekly.set_index('TEAM'))\

show(final\
.reset_index()\
[column_order]\
)

### Full dataframe of joined player projections and playoff games data

In [None]:
sorting_config = {'a': 
                        {'by': ['Per Game Avg'],
                         'ascending': [False]
                    }
                 }

float_cols = [key for key, value in dict(final.dtypes).items() if str(value) == "float64"]

final[convert_to_num_cols] = final[convert_to_num_cols].round(2)

final\
.reset_index()\
[column_order]\
.to_csv('final.csv')
#.sort_values(**sorting_config['a'])\
#.style.background_gradient(cmap='RdYlGn')