In [1]:
from time import strftime,localtime
import pandas as pd
from os.path import exists

In [10]:
class Team_standings:
    
    def __init__(self):
        self.REG = 0
        self.PRE = 1
        self.labels =("regular_season", "pre_season")
        team_abbrvs = pd.read_csv("team_names.csv",usecols=["prefix_1"])
        self.team_names = "|".join(list(team_abbrvs["prefix_1"].str.upper()))
        del team_abbrvs


        
    def correct_teams(self, df):
        teams = df
        first = teams.columns[0]
        row_0 = pd.DataFrame([first],columns=["Team"])
        teams = teams.rename(columns={first:"Team"})
        teams = pd.concat([row_0,teams],ignore_index=True)

        return teams

    def build_table(self, url):
        dfs = pd.read_html(url,match=".+ | \n")
        stats = dfs[1] 
        teams = self.correct_teams(dfs[0]) # Needed to correct empty table header on espn site

        return teams.join(stats)

    def gen_key(self, year, season_type):
        return year+"_"+season_type
    
    def preprocess(self,df, complete_league = True):
        
        if(complete_league):
            df["Team"] = df["Team"].str.extract(r'(^.+--)(.+)', expand = True)[1]
            
        processed = df["Team"].str.extract(fr'({self.team_names})(.+)', expand = True)
        
        df.insert(0,"ID",processed[0])
        df["Team"] = processed[1]
        
        return df
    
    def save_standings(self,filepath,year,preseason=False):
        if(exists(filepath)):
            return
        
        
        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        if(preseason):
            preseason_root = root + "/seasontype/pre/season"
            season = self.build_url(preseason_root,year,tail)
            table = self.build_table(season)
            self.preprocess(table,False)
        else:
            reg_season_root = root + "/season"
            season = self.build_url(reg_season_root,year,tail)
            table = self.build_table(season)
            self.preprocess(table)
            
        table.to_csv(filepath,index = False)
        
        return

    def update_team_standings(self):
        this_year = strftime("%Y",localtime())

        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        filename = self.gen_key(this_year,self.labels[self.REG]) + ".csv"
        filepath = "./team_standings/reg_season/" + filename
        
        reg_season = root + tail
        table = self.build_table(reg_season)
        self.preprocess(table,False)
        table.to_csv(filepath,index = False)
        
        filename = self.gen_key(this_year,self.labels[self.PRE]) + ".csv"
        filepath = "./team_standings/pre_season/" + filename
        
        pre_season = root + "/seasontype/pre" + tail
        table = self.build_table(pre_season)
        
        self.preprocess(table,False)
        table.to_csv(filepath,index = False)
        
        
    
    def get_team_standings(self,last_n_years, include_preseason = False):
        this_year = strftime("%Y",localtime())
        
        year = int(this_year)

        for y in range(year,year-last_n_years,-1):
            filename = self.gen_key(str(y-1),self.labels[self.REG]) + ".csv"
            filepath = "./team_standings/reg_season/" + filename
            
            self.save_standings(filepath,year = str(y-1))
            
            filename = self.gen_key(str(y-1),self.labels[self.PRE]) + ".csv"
            filepath = "./team_standings/pre_season/" + filename
            
            self.save_standings(filepath,year = str(y-1))
        
        return
    
    def build_url(self, root,year_n, tail,preseason = False):
        if(not preseason):
            return root +  "/" + year_n + tail
        else:
            return root + "/" + year_n + tail
        

In [13]:
obj = Team_standings()
obj.get_team_standings(5)

In [12]:
obj.update_team_standings()

In [14]:
df = pd.read_csv("./team_standings/reg_season/2022_regular_season.csv")

In [15]:
df

Unnamed: 0,ID,Team,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
0,PHX,Phoenix Suns,30,8,0.789,-,17-4,13-4,5-4,20-7,112.4,104.5,7.9,W3,7-3
1,GS,Golden State Warriors,29,9,0.763,1,17-3,12-6,7-1,16-6,110.2,101.4,8.8,L2,6-4
2,UTA,HUtah Jazz,28,10,0.737,2,14-7,14-3,9-0,18-4,116.0,106.2,9.8,W2,8-2
3,CHI,Chicago Bulls,25,10,0.714,3.5,13-4,12-6,4-2,16-7,110.7,107.0,3.7,W8,8-2
4,BKN,Brooklyn Nets,24,12,0.667,5,10-9,14-3,6-1,18-7,110.8,107.8,3.0,W1,6-4
5,MEM,Memphis Grizzlies,26,14,0.65,5,14-8,12-6,4-2,19-9,111.4,107.6,3.8,W7,7-3
6,MIL,Milwaukee Bucks,25,15,0.625,6,13-7,12-8,6-2,17-11,112.1,108.0,4.1,L2,6-4
7,MIA,Miami Heat,24,15,0.615,6.5,12-4,12-11,6-1,16-7,108.1,104.3,3.8,W1,7-3
8,PHI,Philadelphia 76ers,21,16,0.568,8.5,7-8,14-8,3-6,14-10,107.1,106.1,1.0,W5,6-4
9,CLE,Cleveland Cavaliers,21,17,0.553,9,11-9,10-8,4-1,14-9,107.7,102.6,5.1,L1,6-4
