In [2]:
from time import strftime,localtime
import pandas as pd
from os.path import exists

In [269]:
class Web_scraper:
    def __init__(self):
        self.teams = self.Teams()
        self.players = self.Players()
    
    class Teams:
        def __init__(self):
            self.root = "https://www.espn.com/nba/standings/_"
            self.tail = "/group/league"
            team_abbrvs = pd.read_csv("team_names.csv",usecols=["prefix_1"])
            self.team_names = "|".join(list(team_abbrvs["prefix_1"].str.upper()))
            del team_abbrvs
            
        def build_url(self,year_n,current = True,preseason = False):
            if(current and not preseason):
                return self.root + self.tail
            elif(current and preseason):
                return self.root + "/seasontype/pre" + self.tail
            elif(not current and preseason):
                return self.root + "/seasontype/pre/season/" + year_n + self.tail
            else:
                return self.root + "/season/" + year_n + self.tail
            

        def correct_teams(self, df):
            teams = df
            first = teams.columns[0]
            row_0 = pd.DataFrame([first],columns=["Team"])
            teams = teams.rename(columns={first:"Team"})
            teams = pd.concat([row_0,teams],ignore_index=True)

            return teams
        
        def preprocess(self,df, complete_league = True):
            if(complete_league):
                df["Team"] = df["Team"].str.extract(r'(.+--)(.+)', expand = True)[1]

            processed = df["Team"].str.extract(fr'({self.team_names})(.+)', expand = True)

            df.insert(0,"ID",processed[0])
            df["Team"] = processed[1]

            return df
    
        def build_table(self, url,complete_league=True):
            dfs = pd.read_html(url,match=".+ | \n")
            stats = dfs[1] 
            teams = self.correct_teams(dfs[0]) # Needed to correct empty table header on espn site
            table = teams.join(stats)

            return self.preprocess(table,complete_league)
        
    class Players:
        def __init__(self):
            self.REG = "2"
            self.POST = "3"
            self.root = "https://www.espn.com/nba/team/stats/_/name" #[teamname -prefix_1]
            self.mid = "/season" #[year] YYYY
            self.tail = "/seasontype" #[2|3] 2 = regular season, 3 = postseason
            team_abbrvs = pd.read_csv("team_names.csv",usecols=["prefix_1"])
            self.team_names = "|".join(list(team_abbrvs["prefix_1"].str.upper()))
            del team_abbrvs
            
        def build_url(self,year,team,postseason = False):
            if(postseason):
                return self.root + "/" + team + self.mid + "/" +year + self.tail + "/" +SELF.POST
            
            return self.root + "/" + team + self.mid + "/" +year + self.tail + "/" +SELF.REG
        
        def preprocess(self,df,shooting = True):
            processed = df["Name"].str.extract(r'(.+ | Total)([A-Z]+\**)',expand = True)
            
            processed = processed.drop([len(processed)-1])
            
            if(shooting):
                df.insert(1,"POS",processed[1])
                
            df["Name"] = processed[0]

#             df["Name"]= df["Name"].fillna("Total")
#             df["POS"]= df["POS"].fillna("")
            return df
            
        def build_table(self, url,complete_league=True):
            dfs = pd.read_html(url,match=".+ | \n")
            
            players = dfs[0].join(dfs[1])
            shooting = dfs[2].join(dfs[3])
            
            players = self.preprocess(players,False)
            shooting = self.preprocess(shooting)

            return shooting.join(players.set_index("Name"),on = "Name")

In [None]:
class Player_stats:
    def __init__(self,players):
        self.players = players
        self.teams = list(pd.read_csv("team_names.csv",usecols=["prefix_1"]))
        
    def get_player_stats(self,last_n_years):
        this_year = strftime("%Y",localtime())
        year = int(this_year)
        
        for y in range(year,year-last_n_years,-1):
            

In [270]:
class Team_standings:
    
    def __init__(self, teams):
        self.REG = 0
        self.PRE = 1
        self.labels =("regular_season", "pre_season")
        self.teams = teams
        self.reg_fp = "./team_standings/reg_season/"
        self.pre_fp = "./team_standings/pre_season/"


    def gen_key(self, year, season_type):
        return year+"_"+season_type

    
    def save_standings(self,filepath,year,preseason=False):
        if(exists(filepath)):
            return
        
        current = False
        season = self.teams.build_url(year,current, preseason)

        if(preseason):
            table = self.teams.build_table(season,False)
        else:
            table = self.teams.build_table(season)
            
        table.to_csv(filepath,index = False)
        
        return

    def update_team_standings(self):
        this_year = strftime("%Y",localtime())

        filename = self.gen_key(this_year,self.labels[self.REG]) + ".csv"
        filepath = self.reg_fp + filename
        
        reg_season = self.teams.build_url(this_year)
        table = self.teams.build_table(reg_season,False)
        table.to_csv(filepath,index = False)
        
        
        filename = self.gen_key(this_year,self.labels[self.PRE]) + ".csv"
        filepath =  self.pre_fp + filename
        
        pre_season = self.teams.build_url(this_year,preseason = True)
        table = self.teams.build_table(pre_season,False)
        table.to_csv(filepath,index = False)
    
        return
        
        
    
    def get_team_standings(self,last_n_years):
        this_year = strftime("%Y",localtime())
        year = int(this_year)

        for y in range(year,year-last_n_years,-1):
            filename = self.gen_key(str(y-1),self.labels[self.REG]) + ".csv"
            filepath = self.reg_fp + filename
            self.save_standings(filepath,str(y-1))
            
            
            filename = self.gen_key(str(y-1),self.labels[self.PRE]) + ".csv"
            filepath = self.pre_fp + filename
            self.save_standings(filepath,str(y-1),True)
        
        return

In [271]:
wbs = Web_scraper()
obj = Team_standings(wbs.teams)

In [272]:
obj.get_team_standings(5)
obj.update_team_standings()

In [273]:
df = pd.read_csv("./team_standings/reg_season/2022_regular_season.csv")

In [274]:
df

Unnamed: 0,ID,Team,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
0,PHX,Phoenix Suns,30,8,0.789,-,17-4,13-4,5-4,20-7,112.4,104.5,7.9,W3,7-3
1,GS,Golden State Warriors,29,9,0.763,1,17-3,12-6,7-1,16-6,110.2,101.4,8.8,L2,6-4
2,UTAH,Utah Jazz,28,10,0.737,2,14-7,14-3,9-0,18-4,116.0,106.2,9.8,W2,8-2
3,CHI,Chicago Bulls,25,10,0.714,3.5,13-4,12-6,4-2,16-7,110.7,107.0,3.7,W8,8-2
4,BKN,Brooklyn Nets,24,12,0.667,5,10-9,14-3,6-1,18-7,110.8,107.8,3.0,W1,6-4
5,MEM,Memphis Grizzlies,26,14,0.65,5,14-8,12-6,4-2,19-9,111.4,107.6,3.8,W7,7-3
6,MIL,Milwaukee Bucks,25,15,0.625,6,13-7,12-8,6-2,17-11,112.1,108.0,4.1,L2,6-4
7,MIA,Miami Heat,24,15,0.615,6.5,12-4,12-11,6-1,16-7,108.1,104.3,3.8,W1,7-3
8,PHI,Philadelphia 76ers,21,16,0.568,8.5,7-8,14-8,3-6,14-10,107.1,106.1,1.0,W5,6-4
9,CLE,Cleveland Cavaliers,21,17,0.553,9,11-9,10-8,4-1,14-9,107.7,102.6,5.1,L1,6-4


In [261]:
stats = pd.read_html("https://www.espn.com/nba/team/stats/_/name/utah/season/2021/seasontype/2")

In [319]:
players = stats[0].join(stats[1])

In [320]:
players = players.drop([len(players)-1])

In [321]:
players

Unnamed: 0,Name,GP,GS,MIN,PTS,OR,DR,REB,AST,STL,BLK,TO,PF,AST/TO,PER
0,Donovan Mitchell SG,53,53.0,33.4,26.4,0.9,3.5,4.4,5.2,1.0,0.3,2.8,2.2,1.9,21.39
1,Jordan Clarkson PG,68,1.0,26.7,18.4,0.7,3.3,4.0,2.5,0.9,0.1,1.7,1.6,1.5,17.2
2,Bojan Bogdanovic SF,72,72.0,30.8,17.0,0.6,3.3,3.9,1.9,0.6,0.1,1.8,1.6,1.1,14.03
3,Mike Conley PG,51,51.0,29.4,16.2,0.7,2.8,3.5,6.0,1.4,0.2,1.9,1.9,3.1,19.26
4,Rudy Gobert C,71,71.0,30.8,14.3,3.4,10.1,13.5,1.3,0.6,2.7,1.7,2.3,0.8,23.54
5,Joe Ingles SG,67,30.0,27.9,12.1,0.4,3.2,3.6,4.7,0.7,0.2,1.7,1.8,2.7,15.92
6,Royce O'Neale PF,71,71.0,31.5,7.0,1.2,5.6,6.8,2.5,0.8,0.5,1.2,2.6,2.2,9.88
7,Georges Niang SF,72,10.0,16.0,6.9,0.4,2.1,2.4,0.8,0.3,0.1,0.7,1.6,1.2,11.4
8,Derrick Favors C,68,0.0,15.3,5.4,2.0,3.5,5.5,0.6,0.5,1.0,0.5,2.1,1.2,19.69
9,Ersan Ilyasova PF,17,1.0,8.7,3.8,0.4,1.3,1.7,0.2,0.6,0.2,0.5,1.7,0.5,12.1


In [322]:
shooting = stats[2].join(stats[3])

In [323]:
shooting = shooting.drop([len(shooting)-1])

In [324]:
shooting

Unnamed: 0,Name,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,2PM,2PA,2P%,SC-EFF,SH-EFF
0,Donovan Mitchell SG,9.0,20.6,43.8,3.4,8.7,38.6,5.0,6.0,84.5,5.7,11.9,47.6,1.284,0.52
1,Jordan Clarkson PG,6.7,15.8,42.5,3.1,8.8,34.7,1.9,2.1,89.6,3.7,7.0,52.3,1.164,0.52
2,Bojan Bogdanovic SF,5.6,12.8,43.9,2.5,6.4,39.0,3.2,3.7,87.9,3.1,6.4,48.7,1.324,0.54
3,Mike Conley PG,5.6,12.5,44.4,2.7,6.6,41.2,2.4,2.8,85.2,2.9,6.0,47.9,1.292,0.55
4,Rudy Gobert C,5.5,8.2,67.5,0.0,0.1,0.0,3.3,5.3,62.3,5.5,8.1,68.0,1.753,0.68
5,Joe Ingles SG,4.1,8.4,48.9,2.7,6.1,45.1,1.1,1.3,84.4,1.4,2.3,59.0,1.44,0.65
6,Royce O'Neale PF,2.5,5.5,44.4,1.5,3.9,38.5,0.5,0.6,84.8,1.0,1.6,58.6,1.259,0.58
7,Georges Niang SF,2.5,5.6,43.7,1.7,4.1,42.5,0.3,0.3,95.7,0.7,1.6,46.9,1.235,0.59
8,Derrick Favors C,2.3,3.6,63.8,0.0,0.0,0.0,0.9,1.2,73.8,2.3,3.5,64.3,1.519,0.64
9,Ersan Ilyasova PF,1.2,3.2,38.9,1.1,2.4,43.9,0.3,0.3,100.0,0.2,0.8,23.1,1.204,0.56


In [325]:
processed = players["Name"].str.extract(r'(.+ | Total)([A-Z]+\**)'
                                         ,expand = True)

In [326]:
processed

Unnamed: 0,0,1
0,Donovan Mitchell,SG
1,Jordan Clarkson,PG
2,Bojan Bogdanovic,SF
3,Mike Conley,PG
4,Rudy Gobert,C
5,Joe Ingles,SG
6,Royce O'Neale,PF
7,Georges Niang,SF
8,Derrick Favors,C
9,Ersan Ilyasova,PF


In [327]:
# players.insert(1,"POS",processed[1])
players["Name"] = processed[0]

In [328]:
players

Unnamed: 0,Name,GP,GS,MIN,PTS,OR,DR,REB,AST,STL,BLK,TO,PF,AST/TO,PER
0,Donovan Mitchell,53,53.0,33.4,26.4,0.9,3.5,4.4,5.2,1.0,0.3,2.8,2.2,1.9,21.39
1,Jordan Clarkson,68,1.0,26.7,18.4,0.7,3.3,4.0,2.5,0.9,0.1,1.7,1.6,1.5,17.2
2,Bojan Bogdanovic,72,72.0,30.8,17.0,0.6,3.3,3.9,1.9,0.6,0.1,1.8,1.6,1.1,14.03
3,Mike Conley,51,51.0,29.4,16.2,0.7,2.8,3.5,6.0,1.4,0.2,1.9,1.9,3.1,19.26
4,Rudy Gobert,71,71.0,30.8,14.3,3.4,10.1,13.5,1.3,0.6,2.7,1.7,2.3,0.8,23.54
5,Joe Ingles,67,30.0,27.9,12.1,0.4,3.2,3.6,4.7,0.7,0.2,1.7,1.8,2.7,15.92
6,Royce O'Neale,71,71.0,31.5,7.0,1.2,5.6,6.8,2.5,0.8,0.5,1.2,2.6,2.2,9.88
7,Georges Niang,72,10.0,16.0,6.9,0.4,2.1,2.4,0.8,0.3,0.1,0.7,1.6,1.2,11.4
8,Derrick Favors,68,0.0,15.3,5.4,2.0,3.5,5.5,0.6,0.5,1.0,0.5,2.1,1.2,19.69
9,Ersan Ilyasova,17,1.0,8.7,3.8,0.4,1.3,1.7,0.2,0.6,0.2,0.5,1.7,0.5,12.1


In [329]:
processed = shooting["Name"].str.extract(r'(.+ | Total)([A-Z]+\**)'
                                         ,expand = True)
shooting["Name"] = processed[0]
shooting.insert(1,"POS", processed[1])

In [330]:
shooting

Unnamed: 0,Name,POS,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,2PM,2PA,2P%,SC-EFF,SH-EFF
0,Donovan Mitchell,SG,9.0,20.6,43.8,3.4,8.7,38.6,5.0,6.0,84.5,5.7,11.9,47.6,1.284,0.52
1,Jordan Clarkson,PG,6.7,15.8,42.5,3.1,8.8,34.7,1.9,2.1,89.6,3.7,7.0,52.3,1.164,0.52
2,Bojan Bogdanovic,SF,5.6,12.8,43.9,2.5,6.4,39.0,3.2,3.7,87.9,3.1,6.4,48.7,1.324,0.54
3,Mike Conley,PG,5.6,12.5,44.4,2.7,6.6,41.2,2.4,2.8,85.2,2.9,6.0,47.9,1.292,0.55
4,Rudy Gobert,C,5.5,8.2,67.5,0.0,0.1,0.0,3.3,5.3,62.3,5.5,8.1,68.0,1.753,0.68
5,Joe Ingles,SG,4.1,8.4,48.9,2.7,6.1,45.1,1.1,1.3,84.4,1.4,2.3,59.0,1.44,0.65
6,Royce O'Neale,PF,2.5,5.5,44.4,1.5,3.9,38.5,0.5,0.6,84.8,1.0,1.6,58.6,1.259,0.58
7,Georges Niang,SF,2.5,5.6,43.7,1.7,4.1,42.5,0.3,0.3,95.7,0.7,1.6,46.9,1.235,0.59
8,Derrick Favors,C,2.3,3.6,63.8,0.0,0.0,0.0,0.9,1.2,73.8,2.3,3.5,64.3,1.519,0.64
9,Ersan Ilyasova,PF,1.2,3.2,38.9,1.1,2.4,43.9,0.3,0.3,100.0,0.2,0.8,23.1,1.204,0.56


In [331]:
s = shooting.join(players.set_index("Name"),on = "Name")

In [332]:
s

Unnamed: 0,Name,POS,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,...,OR,DR,REB,AST,STL,BLK,TO,PF,AST/TO,PER
0,Donovan Mitchell,SG,9.0,20.6,43.8,3.4,8.7,38.6,5.0,6.0,...,0.9,3.5,4.4,5.2,1.0,0.3,2.8,2.2,1.9,21.39
1,Jordan Clarkson,PG,6.7,15.8,42.5,3.1,8.8,34.7,1.9,2.1,...,0.7,3.3,4.0,2.5,0.9,0.1,1.7,1.6,1.5,17.2
2,Bojan Bogdanovic,SF,5.6,12.8,43.9,2.5,6.4,39.0,3.2,3.7,...,0.6,3.3,3.9,1.9,0.6,0.1,1.8,1.6,1.1,14.03
3,Mike Conley,PG,5.6,12.5,44.4,2.7,6.6,41.2,2.4,2.8,...,0.7,2.8,3.5,6.0,1.4,0.2,1.9,1.9,3.1,19.26
4,Rudy Gobert,C,5.5,8.2,67.5,0.0,0.1,0.0,3.3,5.3,...,3.4,10.1,13.5,1.3,0.6,2.7,1.7,2.3,0.8,23.54
5,Joe Ingles,SG,4.1,8.4,48.9,2.7,6.1,45.1,1.1,1.3,...,0.4,3.2,3.6,4.7,0.7,0.2,1.7,1.8,2.7,15.92
6,Royce O'Neale,PF,2.5,5.5,44.4,1.5,3.9,38.5,0.5,0.6,...,1.2,5.6,6.8,2.5,0.8,0.5,1.2,2.6,2.2,9.88
7,Georges Niang,SF,2.5,5.6,43.7,1.7,4.1,42.5,0.3,0.3,...,0.4,2.1,2.4,0.8,0.3,0.1,0.7,1.6,1.2,11.4
8,Derrick Favors,C,2.3,3.6,63.8,0.0,0.0,0.0,0.9,1.2,...,2.0,3.5,5.5,0.6,0.5,1.0,0.5,2.1,1.2,19.69
9,Ersan Ilyasova,PF,1.2,3.2,38.9,1.1,2.4,43.9,0.3,0.3,...,0.4,1.3,1.7,0.2,0.6,0.2,0.5,1.7,0.5,12.1
