In [1]:
from time import strftime,localtime
import pandas as pd
from os.path import exists

In [4]:
class Team_standings:
    
    def __init__(self):
        self.REG = 0
        self.PRE = 1
        self.labels=("regular_season", "pre_season")
        team_abbrvs = pd.read_csv("team_names.csv",usecols=["prefix_1"])
        self.team_names = "|".join(list(team_abbrvs["prefix_1"].str.upper()))
        del team_abbrvs
    
    
    def build_table(self,url):
        dfs = pd.read_html(url)
        stats = dfs[1] 
        teams = dfs[0]
        teams = teams.rename(columns={0:"Team"})

        return teams.join(stats)
    
    def gen_key(self, year, season_type):
        return year+"_"+season_type

    def build_url(self, root,year_n, tail,preseason = False):
        if(not preseason):
            return root +  "/" + year_n + tail
        else:
            return root + "/" + year_n + tail
        
    def update_team_standings(self):
        self.REG = 0
        self.PRE = 1
        this_year = strftime("%Y",localtime())

        
        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        filename = self.gen_key(this_year, self.labels[self.REG]) + ".csv"
        filepath = "./team_standings/reg_season/" +filename

        this_year_reg_season = root + tail
        table = self.build_table(this_year_reg_season)
        
        self.preprocess(table,False)
        table.to_csv(filepath, index = False)
        
        filename = self.gen_key(this_year, self.labels[self.PRE]) + ".csv"
        filepath = "./team_standings/pre_season/" +filename
        
        this_year_pre_season = root + "/seasontype/pre" + tail
        table = self.build_table(this_year_pre_season)
        
        self.preprocess(table,False)
        table.to_csv(filepath, index = False)

        return
    
    def preprocess(self, df, complete_league = True):
        
        if(complete_league):
            df["Team"] = df["Team"].str.extract(r'(^.+--)(.+)',expand = True)[1]
        
        processed = df["Team"].str.extract(fr'({self.team_names})(.+)',expand=True)
        
        df.insert(0,"ID",processed[0])
        df["Team"] = processed[1]
        
        return df
        
        
    def save_standings(self,filepath,year,preseason = False):
        if(exists(filepath)):
            return
        
        root = "https://www.espn.com/nba/standings/_"
        tail = "/group/league"
        
        
        if(preseason):
            preseason_root = root + "/seasontype/pre/season"
            season = self.build_url(preseason_root, year, tail)
            table = self.build_table(season)
            self.preprocess(table,False)
        else:
            reg_season_root = root + "/season"
            season = self.build_url(reg_season_root, year, tail)
            table = self.build_table(season)
            self.preprocess(table)
            
        table.to_csv(filepath, index = False)
        
        return

    
    def get_team_standings(self, last_n_years):
        self.REG = 0
        self.PRE = 1
        this_year = strftime("%Y",localtime())

        year = int(this_year)

        for y in range(year,year-last_n_years,-1):
            filename = self.gen_key(str(y-1),self.labels[self.REG]) + ".csv"
            filepath = "./team_standings/reg_season/" + filename
            
            self.save_standings(filepath,year = str(y-1))
            
            filename = self.gen_key(str(y-1),self.labels[self.PRE]) + ".csv"
            filepath = "./team_standings/pre_season/" + filename
            
            self.save_standings(filepath,year=str(y-1),preseason=True)

                
        return 

In [5]:
standings_scraper = Team_standings()

In [6]:
standings_scraper.get_team_standings(5)

In [7]:
standings_scraper.update_team_standings()

In [8]:
df = pd.read_csv("team_standings/reg_season/2016_regular_season.csv")

In [9]:
df

Unnamed: 0,ID,Team,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
0,GS,Golden State Warriors,73,9,0.89,-,39-2,34-7,15-1,46-6,114.9,104.1,10.8,W4,8-2
1,SA,San Antonio Spurs,67,15,0.817,6,40-1,27-14,14-2,43-9,103.5,92.9,10.6,W2,6-4
2,CLE,Cleveland Cavaliers,57,25,0.695,16,33-8,24-17,8-8,35-17,104.3,98.3,6.0,L1,6-4
3,TOR,Toronto Raptors,56,26,0.683,17,32-9,24-17,14-2,39-13,102.7,98.2,4.5,W4,7-3
4,OKC,Oklahoma City Thunder,55,27,0.671,18,32-9,23-18,13-3,37-15,110.2,102.9,7.3,L1,5-5
5,LAC,LA Clippers,53,29,0.646,20,29-12,24-17,9-7,31-21,104.5,100.2,4.3,L1,8-2
6,MIA,Miami Heat,48,34,0.585,25,28-13,20-21,10-6,31-21,100.0,98.4,1.6,L1,6-4
7,ATL,Atlanta Hawks,48,34,0.585,25,27-14,21-20,8-8,29-23,102.8,99.2,3.6,L2,6-4
8,BOS,Boston Celtics,48,34,0.585,25,28-13,20-21,10-6,31-21,105.7,102.5,3.2,W1,6-4
9,CHA,Charlotte Hornets,48,34,0.585,25,30-11,18-23,8-8,33-19,103.4,100.7,2.7,W2,7-3
