In [6]:
import pandas as pd
import time
from datetime import datetime, timedelta

teams = [
    'ATL', 'BOS', 'BRK', 'CHO', 'CHI', 'CLE', 
    'DAL', 'DEN', 'DET', 'GSW', 'HOU', 'IND', 
    'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 
    'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHO',
    'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS'
    ]

column_headers = ['Date', 'H/A', 'Opp', 'W/L', 'P', 'OppP', 'FG', 'FGA', 'FG%', '3P',
       '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'TRB%',
       'AST%', 'STL%', 'BLK%', 'OeFG%', 'OTOV%', 'OORB%', 'OFT/FGA', 'DeFG%',
       'DTOV%', 'DDRB%', 'DFT/FGA']


import requests
from bs4 import BeautifulSoup

for team in teams:
    #this is so our requests dont get timed out
    print(team)
    time.sleep(5)
    
    #GET current df
    df = pd.read_csv('./data/' + team + '.csv')
    df = df.drop(df.columns[[0]],axis = 1)

    #create a new df
    new_df = pd.DataFrame(columns=column_headers)

    #get data of most recent entry in the .csv
    most_recent_date = df.iloc[df.shape[0] - 1]['Date']
    most_recent_date_object = datetime.strptime(most_recent_date, '%Y-%m-%d').date()

    #GET normal data not advanced
    URL = 'https://www.basketball-reference.com/teams/' + team + '/2024/gamelog/'
    page = requests.get(URL)
    delay = page.headers.get("Retry-After", "None")

    if delay != "None":
        d = datetime.now()
        d = d + timedelta(0, int(delay))
        print("Try again at : ")
        print(d)
        break
    
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("table")

    table_body = table.find('tbody')

    rows = table_body.find_all('tr')

    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]

        #make sure this column is a new column not in the dataset
        date = cols[1]
        date_object = datetime.strptime(date, '%Y-%m-%d').date()
        if date_object <= most_recent_date_object:
            break
        
        
        #sanitize this col
        cols.pop(0)
        cols[1] = 'A' if cols[1] == '@' else 'H'
        cols.pop(22)
        cols = cols[:22]

        #add to the df
        column_head = ['Date', 'H/A', 'Opp', 'W/L', 'P', 'OppP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF']
        new_df = pd.concat([new_df,pd.DataFrame([cols], columns=column_head)])

    #GET advanced data not normal
    URL = 'https://www.basketball-reference.com/teams/' + team + '/2024/gamelog-advanced/'
    page = requests.get(URL)

    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("table")
    
    table_body = table.find('tbody')

    rows = table_body.find_all('tr')

    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]

        #make sure this column is a new column not in the dataset
        date = cols[1]
        date_object = datetime.strptime(date, '%Y-%m-%d').date()
        if date_object <= most_recent_date_object:
            break
        
        
        #sanitize this col
        cols.pop(0)
        cols.pop(1)
        cols.pop(1)
        cols.pop(1)
        cols.pop(13)
        cols.pop(17)
        cols.pop(1)
        cols.pop(1)
        cols.pop(0)
        
        #add to the df
        column_head = ['ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'TRB%',
       'AST%', 'STL%', 'BLK%', 'OeFG%', 'OTOV%', 'OORB%', 'OFT/FGA', 'DeFG%',
       'DTOV%', 'DDRB%', 'DFT/FGA']

        #find index of date
        new_df.loc[new_df['Date'] == date, column_head] = cols

    #add new_df to df
    df = pd.concat([df, new_df])
    

    #save to .csv
    df.to_csv("./data/" + team + ".csv")




ATL
BOS
BRK
CHO
CHI
CLE
DAL
DEN
DET
GSW
HOU
IND
LAC
LAL
MEM
MIA
MIL
MIN
NOP
NYK
OKC
ORL
PHI
PHO
POR
SAC
SAS
TOR
UTA
WAS


In [7]:
#Calculates current_team_data.csv
dataset_df = pd.DataFrame()

for team in teams:
    df = pd.read_csv('./data/' + team + '.csv')
        
    #calculate average over last 3 games
    wins_last_3 = df.iloc[-3:]['W/L'].value_counts().get('W', 0)
    home_last_3 = df.iloc[-3:]['H/A'].value_counts().get('H', 0)
    numerical_stats_last_3 = df.iloc[-3:,5:].mean().apply(lambda x: float("{:.2f}".format(x)))
    last_3 = numerical_stats_last_3
    last_3['W'] = wins_last_3
    last_3['H'] = home_last_3
    last_3.index = list(map(lambda n: "last_3_" + n, last_3.index.to_list()))
    
    #calculate average over last 10 games
    wins_last_10 = df.iloc[-10:]['W/L'].value_counts().get('W', 0)
    home_last_10 = df.iloc[-10:]['H/A'].value_counts().get('H', 0)
    numerical_stats_last_10 = df.iloc[-10:,5:].mean().apply(lambda x: float("{:.2f}".format(x)))
    last_10 = numerical_stats_last_10
    last_10['W'] = wins_last_10
    last_10['H'] = home_last_10
    last_10.index = list(map(lambda n: "last_10_" + n, last_10.index.to_list()))

    #calculate average over last 50 games
    wins_last_50 = df.iloc[-50:]['W/L'].value_counts().get('W', 0)
    home_last_50 = df.iloc[-50:]['H/A'].value_counts().get('H', 0)
    numerical_stats_last_50 = df.iloc[-50:,5:].mean().apply(lambda x: float("{:.2f}".format(x)))
    last_50 = numerical_stats_last_50
    last_50['W'] = wins_last_50
    last_50['H'] = home_last_50
    last_50.index = list(map(lambda n: "last_50_" + n, last_50.index.to_list()))
    #add all averages to one series
    stats = pd.concat([pd.Series([team], index=["Team"]),last_3, last_10, last_50])

    #add stats to the dataset
    dataset_df = pd.concat([dataset_df, stats.to_frame().T])

#save data
dataset_df.to_csv('./data/current_team_data.csv')


        
    
