In [21]:
## This script will grab the current Atlanta Braves team stats from the Baseball Reference page, and pull into a dataframe

from bs4 import BeautifulSoup as Soup
import requests
import pandas as pd

braves = requests.get('https://www.baseball-reference.com/teams/ATL/2022.shtml')

In [22]:
## Locating the main table and getting to row data 

braves_soup = Soup(braves.text)

tables = braves_soup.find_all('table')
rows = braves_soup.find_all('tr')

In [6]:
## There's lots of rows, but only some with roster names. This establishes which rows we need

first_row = rows[23]
last_row = rows[63]

In [33]:
## Define a function to parse through each row and convert to string data

def parse_row (row):

    return [str(x.string) for x in row.find_all('td')]

list_of_parsed_rows = [parse_row(row) for row in rows[23:63]]

In [34]:
## Adding function output to a dataframe

df = pd.DataFrame(list_of_parsed_rows)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,C,Travis d'Arnaud,33,65,266,249,38,65,15,0,...,0.308,0.454,0.762,108,113,7,6,0,0,0
1,1B,,28,96,423,372,45,95,35,0,...,0.34,0.495,0.835,128,184,8,2,0,2,4
2,2B,,25,62,263,242,34,59,15,0,...,0.289,0.405,0.694,90,98,0,2,0,4,0
3,SS,Dansby Swanson,28,96,402,367,63,108,20,1,...,0.353,0.477,0.83,127,175,5,3,0,1,0
4,3B,Austin Riley,25,94,408,370,58,108,23,1,...,0.353,0.586,0.939,154,217,6,7,0,2,1


In [36]:
## Column headers are wrapped in header tags, so a new function is needed to grab those

def parse_row2 (row):

    return [str(x.string) for x in row.find_all('th')]

headers = parse_row2(rows[22])
headers_new = headers[1:]
headers_new

['Pos',
 'Name',
 'Age',
 'G',
 'PA',
 'AB',
 'R',
 'H',
 '2B',
 '3B',
 'HR',
 'RBI',
 'SB',
 'CS',
 'BB',
 'SO',
 'BA',
 'OBP',
 'SLG',
 'OPS',
 'OPS+',
 'TB',
 'GDP',
 'HBP',
 'SH',
 'SF',
 'IBB']

In [37]:
## Adding headers to dataframe

df.columns = headers_new
df.head()

Unnamed: 0,Pos,Name,Age,G,PA,AB,R,H,2B,3B,...,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB
0,C,Travis d'Arnaud,33,65,266,249,38,65,15,0,...,0.308,0.454,0.762,108,113,7,6,0,0,0
1,1B,,28,96,423,372,45,95,35,0,...,0.34,0.495,0.835,128,184,8,2,0,2,4
2,2B,,25,62,263,242,34,59,15,0,...,0.289,0.405,0.694,90,98,0,2,0,4,0
3,SS,Dansby Swanson,28,96,402,367,63,108,20,1,...,0.353,0.477,0.83,127,175,5,3,0,1,0
4,3B,Austin Riley,25,94,408,370,58,108,23,1,...,0.353,0.586,0.939,154,217,6,7,0,2,1


In [121]:
## Some names arent showing up since they have special text characters after. This function, which grabs just links, will help standardize the names

def parse_row_links (row):

    return [x.string for x in row.find_all('a')]

new_list = [parse_row_links(row) for row in rows[23:63]]
new_names = pd.DataFrame(new_list)


In [118]:
df.head()

Unnamed: 0,Pos,Name,Age,G,PA,AB,R,H,2B,3B,...,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB
0,C,Travis d'Arnaud,33,65,266,249,38,65,15,0,...,0.308,0.454,0.762,108,113,7,6,0,0,0
1,1B,Matt Olson,28,96,423,372,45,95,35,0,...,0.34,0.495,0.835,128,184,8,2,0,2,4
2,2B,Ozzie Albies,25,62,263,242,34,59,15,0,...,0.289,0.405,0.694,90,98,0,2,0,4,0
3,SS,Dansby Swanson,28,96,402,367,63,108,20,1,...,0.353,0.477,0.83,127,175,5,3,0,1,0
4,3B,Austin Riley,25,94,408,370,58,108,23,1,...,0.353,0.586,0.939,154,217,6,7,0,2,1


In [143]:
## Using our code, this defines a new function that lets you plug in a new team of choice

def get_team_data(team):

    get_new_team = requests.get(f'https://www.baseball-reference.com/teams/{team}/2022.shtml')
    
    team_soup = Soup(get_new_team.text)
    
    tables = team_soup.find_all('table')

    rows = team_soup.find_all('tr')

    list_of_parsed_rows = [parse_row(row) for row in rows[23:63]]

    new_df = pd.DataFrame(list_of_parsed_rows)

    ## Filtering out Pitchers, since these are only hitting stats

    new_df[0].astype(str)

    new_df.drop(new_df[new_df[0] == 'P'].index, inplace=True)

    new_df['team'] = team

    ## Using the name function here to standardize all names, assigning them to index 1

    def parse_row_links (row):

        return [x.string for x in row.find_all('a')]

    new_list = [parse_row_links(row) for row in rows[23:63]]

    new_df[1] = pd.DataFrame(new_list)  
    
    return new_df

In [145]:
Brewers = get_team_data('MIL')
Brewers.sample(5)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,team
15,C,Pedro Severino,28.0,5.0,16.0,14.0,0.0,3.0,2.0,0.0,...,0.357,0.67,89.0,5.0,0.0,0.0,0.0,0.0,0.0,MIL
12,3B,Mike Brosseau,28.0,41.0,100.0,89.0,9.0,26.0,3.0,0.0,...,0.461,0.831,133.0,41.0,0.0,1.0,0.0,0.0,0.0,MIL
14,2B,Mark Mathias,27.0,6.0,17.0,16.0,2.0,2.0,0.0,0.0,...,0.313,0.43,16.0,5.0,0.0,0.0,0.0,1.0,0.0,MIL
10,C,Victor Caratini,28.0,46.0,158.0,133.0,16.0,32.0,5.0,0.0,...,0.436,0.791,122.0,58.0,7.0,5.0,0.0,1.0,0.0,MIL
18,,,,,,,,,,,...,,,,,,,,,,MIL


In [130]:
## Function for writing dataframe to csv file with current date

from datetime import date

def bref_tocsv (team):
    DATA_DIR = '/Users/cstone'
    today = date.today()
    get_team_data(team).to_csv(f'{team}stats{today}.csv')



In [131]:
## Saving a copy of the Boston Red Sox 2022 stats as of today

bref_tocsv('BOS')
