In [1]:
from time import strftime,localtime
import pandas as pd
from os.path import exists

In [2]:
df = pd.read_html("https://www.espn.com/nba/stats/player/_/season/2021/seasontype/2/table/offensive/sort/avgPoints/dir/desc")

In [3]:
dfs = df[0].join(df[1])

In [4]:
dfs[['Name','Team']] = dfs['Name'].str.extract('^(.*?)([A-Z]+)$', expand=True)

In [5]:
dfs.head()

Unnamed: 0,RK,Name,POS,GP,MIN,PTS,FGM,FGA,FG%,3PM,...,FT%,REB,AST,STL,BLK,TO,DD2,TD3,PER,Team
0,1,Stephen Curry,PG,63,34.2,32.0,10.4,21.7,48.2,5.3,...,91.6,5.5,5.8,1.2,0.1,3.4,8,0,26.37,GS
1,2,Bradley Beal,SG,60,35.8,31.3,11.2,23.0,48.5,2.2,...,88.9,4.7,4.4,1.2,0.4,3.1,4,0,22.81,WSH
2,3,Damian Lillard,PG,67,35.8,28.8,9.0,19.9,45.1,4.1,...,92.8,4.2,7.5,0.9,0.3,3.0,16,0,25.65,POR
3,4,Joel Embiid,C,51,31.1,28.5,9.0,17.6,51.3,1.1,...,85.9,10.6,2.8,1.0,1.4,3.1,31,0,30.32,PHI
4,5,Giannis Antetokounmpo,PF,61,33.0,28.1,10.3,18.0,56.9,1.1,...,68.5,11.0,5.9,1.2,1.2,3.4,41,7,29.24,MIL


In [6]:
def get_team_stats(teams, last_n_years, include_postseason = False):
    this_year = int(strftime("%Y",localtime()))

In [100]:
class Team_standings:
    
    def __init__(self):
        team_abbrvs = pd.read_csv("team_names.csv",usecols=["prefix_1"])
        self.team_names = "|".join(list(team_abbrvs["prefix_1"].str.upper()))
        del team_abbrvs
    
    
    def build_table(self,url):
        dfs = pd.read_html(url)
        stats = dfs[1] 
        teams = dfs[0]
        teams = teams.rename(columns={0:"Team"})

        return teams.join(stats)
    
    def gen_key(self, year, season_type):
        return year+"_"+season_type

    def build_url(self, root,year_n, tail,preseason = False):
        if(not preseason):
            return root +  "/" + year_n + tail
        else:
            return root + "/" + year_n + tail
        
    def update_team_standings(self):
        REG = 0
        PRE = 1
        labels =("regular_season", "pre_season")
        this_year = strftime("%Y",localtime())

        
        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        filename = self.gen_key(this_year, labels[REG]) + ".csv"
        filepath = "./team_standings/reg_season/" +filename

        this_year_reg_season = root + tail
        table = self.build_table(this_year_reg_season)
        
        self.preprocess(table,False)
        table.to_csv(filepath, index = False)
        
        filename = self.gen_key(this_year, labels[PRE]) + ".csv"
        filepath = "./team_standings/pre_season/" +filename
        
        this_year_pre_season = root + "/seasontype/pre" + tail
        table = self.build_table(this_year_pre_season)
        
        self.preprocess(table,False)
        table.to_csv(filepath, index = False)

        return
    
    def preprocess(self, df, complete_league = True):
        
        if(complete_league):
            df["Team"] = df["Team"].str.extract(r'(^.+--)(.+)',expand = True)[1]
        
        processed = df["Team"].str.extract(fr'({self.team_names})(.+)',expand=True)
        
        df.insert(0,"ID",processed[0])
        df["Team"] = processed[1]
        
        return df
        
        
    def save_standings(self,filepath,year,preseason = False):
        if(not exists(filepath)):
            return
        
        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        
        if(preseason):
            preseason_root = root + "/seasontype/pre/season"
            season = self.build_url(preseason_root, year, tail)
            table = self.build_table(season)
            self.preprocess(table,False)
        else:
            reg_season_root = root + "/season"
            season = self.build_url(reg_season_root, year, tail)
            table = self.build_table(season)
            self.preprocess(table)
            
        table.to_csv(filepath, index = False)
        
        return

    
    def get_team_standings(self, last_n_years, include_preseason = False):
        REG = 0
        PRE = 1
        labels =("regular_season", "pre_season")
        this_year = strftime("%Y",localtime())

        year = int(this_year)

        for y in range(year,year-last_n_years,-1):
            filename = self.gen_key(str(y-1),labels[REG]) + ".csv"
            filepath = "./team_standings/reg_season/" + filename
            
            self.save_standings(filepath,year = str(y-1))
            
            filename = self.gen_key(str(y-1),labels[PRE]) + ".csv"
            filepath = "./team_standings/pre_season/" + filename
            
            self.save_standings(filepath,year=str(y-1),preseason=True)

                
        return 

In [90]:
standings_scraper = Team_standings()

In [91]:
standings_scraper.get_team_standings(1, True)

In [101]:
standings_scraper.update_team_standings()

In [98]:
df = pd.read_csv("team_standings/reg_season/2022_regular_season.csv")

In [99]:
df

Unnamed: 0,ID,Team,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
0,GS,Golden State Warriors,29,8,0.784,-,17-3,12-5,7-1,16-5,110.5,101.4,9.1,L1,7-3
1,PHX,Phoenix Suns,29,8,0.784,-,16-4,13-4,4-4,19-7,112.6,104.9,7.7,W2,7-3
2,UTAH,Utah Jazz,28,10,0.737,1.5,14-7,14-3,9-0,18-4,116.0,106.2,9.8,W2,8-2
3,CHI,Chicago Bulls,25,10,0.714,3,13-4,12-6,4-2,16-7,110.7,107.0,3.7,W8,8-2
4,BKN,Brooklyn Nets,24,12,0.667,4.5,10-9,14-3,6-1,18-7,110.8,107.8,3.0,W1,6-4
5,MEM,Memphis Grizzlies,25,14,0.641,5,13-8,12-6,4-2,19-9,111.3,108.1,3.2,W6,7-3
6,MIL,Milwaukee Bucks,25,15,0.625,5.5,13-7,12-8,6-2,17-11,112.1,108.0,4.1,L2,6-4
7,MIA,Miami Heat,24,15,0.615,6,12-4,12-11,6-1,16-7,108.1,104.3,3.8,W1,7-3
8,PHI,Philadelphia 76ers,21,16,0.568,8,7-8,14-8,3-6,14-10,107.1,106.1,1.0,W5,6-4
9,CLE,Cleveland Cavaliers,21,17,0.553,8.5,11-9,10-8,4-1,14-9,107.7,102.6,5.1,L1,6-4
