In [1]:
from time import strftime,localtime
import pandas as pd
from os.path import exists

In [2]:
df = pd.read_html("https://www.espn.com/nba/stats/player/_/season/2021/seasontype/2/table/offensive/sort/avgPoints/dir/desc")

In [3]:
dfs = df[0].join(df[1])

In [4]:
dfs[['Name','Team']] = dfs['Name'].str.extract('^(.*?)([A-Z]+)$', expand=True)

In [5]:
dfs.head()

Unnamed: 0,RK,Name,POS,GP,MIN,PTS,FGM,FGA,FG%,3PM,...,FT%,REB,AST,STL,BLK,TO,DD2,TD3,PER,Team
0,1,Stephen Curry,PG,63,34.2,32.0,10.4,21.7,48.2,5.3,...,91.6,5.5,5.8,1.2,0.1,3.4,8,0,26.37,GS
1,2,Bradley Beal,SG,60,35.8,31.3,11.2,23.0,48.5,2.2,...,88.9,4.7,4.4,1.2,0.4,3.1,4,0,22.81,WSH
2,3,Damian Lillard,PG,67,35.8,28.8,9.0,19.9,45.1,4.1,...,92.8,4.2,7.5,0.9,0.3,3.0,16,0,25.65,POR
3,4,Joel Embiid,C,51,31.1,28.5,9.0,17.6,51.3,1.1,...,85.9,10.6,2.8,1.0,1.4,3.1,31,0,30.32,PHI
4,5,Giannis Antetokounmpo,PF,61,33.0,28.1,10.3,18.0,56.9,1.1,...,68.5,11.0,5.9,1.2,1.2,3.4,41,7,29.24,MIL


In [6]:
def get_team_stats(teams, last_n_years, include_postseason = False):
    this_year = int(strftime("%Y",localtime()))

In [49]:
class Team_standings:
    
    def __init__(self):
        team_abbrvs = pd.read_csv("team_names.csv",usecols=["prefix_1"])
        self.team_names = "|".join(list(team_abbrvs["prefix_1"].str.upper()))
        del team_abbrvs
    
    
    def build_table(self,url):
        dfs = pd.read_html(url)
        stats = dfs[1] 
        teams = dfs[0]
        teams = teams.rename(columns={0:"Team"})

        return teams.join(stats)
    
    def gen_key(self, year, season_type):
        return year+"_"+season_type

    def build_url(self, root,year_n, tail,preseason = False):
        if(not preseason):
            return root +  "/" + year_n + tail
        else:
            return root + "/" + year_n + tail
        
    def update_team_standings(self):
        REG = 0
        PRE = 1
        labels =("regular_season", "pre_season")
        this_year = strftime("%Y",localtime())

        
        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        filename = self.gen_key(this_year, labels[REG]) + ".csv"
        filepath = "./team_standings/reg_season/" +filename

        this_year_reg_season = root + tail
        table = self.build_table(this_year_reg_season)
        
        self.preprocess(table)
        table.to_csv(filepath, index = False)
        
        filename = self.gen_key(this_year, labels[PRE]) + ".csv"
        filepath = "./team_standings/pre_season/" +filename
        
        this_year_pre_season = root + "/seasontype/pre" + tail
        table = self.build_table(this_year_pre_season)
        
        self.preprocess(table)
        table.to_csv(filepath, index = False)
        
        return
    
    def preprocess(self, df):
        df["Team"] = df["Team"].str.extract(r'(^.+--)(.+)',expand = True)[1]
        
        processed = df["Team"].str.extract(fr'({self.team_names})(.+)',expand=True)
        
        df.insert(0,"ID",processed[0])
        df["Team"] = processed[1]
        
        return df
        
        
    def save_standings(self,filepath,year,preseason = False):
        if(exists(filepath)):
            return
        
        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        
        if(preseason):
            preseason_root = root + "/seasontype/pre/season"
            season = self.build_url(preseason_root, year, tail)
            table = self.build_table(season)

        else:
            reg_season_root = root + "/season"
            season = self.build_url(reg_season_root, year, tail)
            table = self.build_table(season)
            
        self.preprocess(table)
        table.to_csv(filepath, index = False)
        
        print(filepath + " saved")
        
        return

    
    def get_team_standings(self, last_n_years, include_preseason = False):
        REG = 0
        PRE = 1
        labels =("regular_season", "pre_season")
        this_year = strftime("%Y",localtime())

        year = int(this_year)

        for y in range(year,year-last_n_years,-1):
            filename = self.gen_key(str(y-1),labels[REG]) + ".csv"
            filepath = "./team_standings/reg_season/" + filename
            
            self.save_standings(filepath,year = str(y-1))
            
            filename = self.gen_key(str(y-1),labels[PRE]) + ".csv"
            filepath = "./team_standings/pre_season/" + filename
            
            self.save_standings(filepath,year=str(y-1),preseason=True)

                
        return 

In [50]:
standings_scraper = Team_standings()

In [53]:
standings_scraper.get_team_standings(2, True)

In [47]:
df = pd.read_csv("team_standings/reg_season/2021_regular_season.csv")

In [48]:
df

Unnamed: 0,ID,Team,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
0,UTAH,Utah Jazz,52,20,0.722,-,31-5,21-15,7-5,28-14,116.4,107.2,9.2,W2,7-3
1,PHX,Phoenix Suns,51,21,0.708,1,27-9,24-12,7-5,30-12,115.3,109.5,5.8,W3,7-3
2,PHI,Philadelphia 76ers,49,23,0.681,3,29-7,20-16,10-2,31-11,113.6,108.1,5.5,W2,8-2
3,BKN,Brooklyn Nets,48,24,0.667,4,28-8,20-16,8-4,26-16,118.6,114.1,4.5,W5,6-4
4,DEN,Denver Nuggets,47,25,0.653,5,25-11,22-14,9-3,26-16,115.1,110.1,5.0,L1,6-4
5,LAC,LA Clippers,47,25,0.653,5,26-10,21-15,9-3,27-15,114.0,107.8,6.2,L2,4-6
6,MIL,Milwaukee Bucks,46,26,0.639,6,26-10,20-16,11-1,30-12,120.1,114.2,5.9,L1,8-2
7,DAL,Dallas Mavericks,42,30,0.583,10,21-15,21-15,7-5,21-21,112.4,110.2,2.2,L1,8-2
8,LAL,Los Angeles Lakers,42,30,0.583,10,21-15,21-15,4-8,25-17,109.5,106.8,2.7,W5,6-4
9,POR,Portland Trail Blazers,42,30,0.583,10,20-16,22-14,6-6,23-19,116.1,114.3,1.8,W1,8-2
