In [2]:
# Set-up
import pandas as pd
import requests
from bs4 import BeautifulSoup


# Read in the standings from one day and get our soup
url = 'https://www.baseball-reference.com/boxes/?month=7&day=1&year=2019'

response = requests.get(url)



soup = BeautifulSoup(response.text, 'html.parser')




In [3]:
# We want the first six stats tables, which give us the standings in each division on that day. First, find the stats tables.
tables = soup.select('table.stats_table')
# Then, re-save tables as the first six
tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]


In [4]:
# Let's try to figure out how to call each individual value we want.
# Then, build the df from there

# Call the first team's winning percentage.
tables[0]('tr')[1]('td')[2].text

'.659'

In [5]:
# Start to build a for loop with this approach
stats = []
for item in tables[0]('tr')[1]:
    stats.append(item.text)
print(stats[3])

.659


In [6]:
# The names come in ths. Let's make a list of the text from all the ths for the first division (which we find in tables[0]).
# Remove the 'Tm' label.

names = []
for item in tables[0]('tr'):
    names.append(item('th')[0].text)
names.remove('Tm')
print(names)
    

['NYY', 'TBR', 'BOS', 'TOR', 'BAL']


In [7]:
# Do the same with 'wins' values, which are found in those same tds.

wins = []
for item in tables[0]('tr'):
    try:
        wins.append(item('td')[0].text)
    except:
        pass
print(wins)

['54', '49', '44', '32', '24']


In [8]:
# Do the same with winning percentages, which are found in tds.

wps = []
for item in tables[0]('tr'):
    try:
        wps.append(item('td')[2].text)
    except:
        pass
print(wps)

['.659', '.576', '.524', '.376', '.286']


In [9]:
# Do the same with 'games back' values, which are found in those same tds.

gb_values = []
for item in tables[0]('tr'):
    try:
        gb_values.append(item('td')[3].text)
    except:
        pass
print(gb_values)

['--', ' 6.5', '11.0', '23.5', '31.0']


In [10]:
# Build a df with each of these lists as a column,
# giving us the first division's teams and their winning percentages and games back values.

df_al_east = {}
df_al_east['team'] = names
df_al_east['wins'] = wins
df_al_east['wp'] = wps
df_al_east['gb'] = gb_values
df_al_east = pd.DataFrame(df_al_east)
df_al_east

Unnamed: 0,team,wins,wp,gb
0,NYY,54,0.659,--
1,TBR,49,0.576,6.5
2,BOS,44,0.524,11.0
3,TOR,32,0.376,23.5
4,BAL,24,0.286,31.0


In [11]:
# Now let's do this for each division, building a list of six dfs, one for each division.

division_dfs = []
for table in tables:
    names = []
    for item in table('tr'):
        names.append(item('th')[0].text)
    names.remove('Tm')

    wins = []
    for item in table('tr'):
        try:
            wins.append(item('td')[0].text)
        except:
            pass    
    
    wps = []
    for item in table('tr'):
        try:
            wps.append(item('td')[2].text)
        except:
            pass
    
    gb_values = []
    for item in table('tr'):
        try:
            gb_values.append(item('td')[3].text)
        except:
            pass

    df_division = {}
    df_division['team'] = names
    df_division['wins'] = wins
    df_division['wp'] = wps
    df_division['gb'] = gb_values
    df_division = pd.DataFrame(df_division)
    division_dfs.append(df_division)
division_dfs


[  team wins    wp    gb
 0  NYY   54  .659    --
 1  TBR   49  .576   6.5
 2  BOS   44  .524  11.0
 3  TOR   32  .376  23.5
 4  BAL   24  .286  31.0,
   team wins    wp    gb
 0  MIN   53  .639    --
 1  CLE   45  .542   8.0
 2  CHW   39  .481  13.0
 3  DET   27  .338  24.5
 4  KCR   29  .341  25.0,
   team wins    wp    gb
 0  HOU   53  .624    --
 1  TEX   46  .548   6.5
 2  OAK   47  .547   6.5
 3  LAA   42  .494  11.0
 4  SEA   37  .420  17.5,
   team wins    wp    gb
 0  ATL   50  .588    --
 1  PHI   44  .524   5.5
 2  WSN   42  .506   7.0
 3  NYM   38  .447  12.0
 4  MIA   32  .390  16.5,
   team wins    wp    gb
 0  MIL   46  .541    --
 1  CHC   45  .529   1.0
 2  STL   41  .500   3.5
 3  PIT   40  .482   5.0
 4  CIN   38  .463   6.5,
   team wins    wp    gb
 0  LAD   57  .663    --
 1  COL   44  .524  12.0
 2  SDP   42  .500  14.0
 3  ARI   43  .500  14.0
 4  SFG   37  .440  19.0]

In [12]:
# Ok, sweet! We're getting somewhere. The next step is to make one of these lists for every day of the season.
# Start by building a list of dates in the year. There must be an easier way to do this...

months = [3, 4, 5, 6, 7, 8, 9, 10]
days = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
years = [1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
dates = []

# For loop to combine 'months' and 'days' into 'dates'
for year in years:
    for month in months:
        for day in days:
            date_list = []
            date_list.append(year)
            date_list.append(month)
            date_list.append(day)        
            dates.append(date_list)
# dates.remove([3, 1])
# dates.remove([3, 2])
# dates.remove([3, 3])
# dates.remove([3, 4])
# dates.remove([3, 5])
# dates.remove([3, 6])
# dates.remove([3, 7])
# dates.remove([3, 8])
# dates.remove([3, 9])
# dates.remove([3, 10])
# dates.remove([3, 11])
# dates.remove([3, 12])
# dates.remove([3, 13])
# dates.remove([3, 14])
# dates.remove([4, 31])
# dates.remove([6, 31])
# dates.remove([9, 31]) 
# dates.remove([10, 10]) 
# dates.remove([10, 11]) 
# dates.remove([10, 12])
# dates.remove([10, 13])
# dates.remove([10, 14])
# dates.remove([10, 15])
# dates.remove([10, 16])
# dates.remove([10, 17])
# dates.remove([10, 18])
# dates.remove([10, 19])
# dates.remove([10, 20])
# dates.remove([10, 21])
# dates.remove([10, 22])
# dates.remove([10, 23])
# dates.remove([10, 24]) 
# dates.remove([10, 25]) 
# dates.remove([10, 26])
# dates.remove([10, 27])
# dates.remove([10, 28])
# dates.remove([10, 29])
# dates.remove([10, 30])
dates

[[1995, 3, 1],
 [1995, 3, 2],
 [1995, 3, 3],
 [1995, 3, 4],
 [1995, 3, 5],
 [1995, 3, 6],
 [1995, 3, 7],
 [1995, 3, 8],
 [1995, 3, 9],
 [1995, 3, 10],
 [1995, 3, 11],
 [1995, 3, 12],
 [1995, 3, 13],
 [1995, 3, 14],
 [1995, 3, 15],
 [1995, 3, 16],
 [1995, 3, 17],
 [1995, 3, 18],
 [1995, 3, 19],
 [1995, 3, 20],
 [1995, 3, 21],
 [1995, 3, 22],
 [1995, 3, 23],
 [1995, 3, 24],
 [1995, 3, 25],
 [1995, 3, 26],
 [1995, 3, 27],
 [1995, 3, 28],
 [1995, 3, 29],
 [1995, 3, 30],
 [1995, 3, 31],
 [1995, 4, 1],
 [1995, 4, 2],
 [1995, 4, 3],
 [1995, 4, 4],
 [1995, 4, 5],
 [1995, 4, 6],
 [1995, 4, 7],
 [1995, 4, 8],
 [1995, 4, 9],
 [1995, 4, 10],
 [1995, 4, 11],
 [1995, 4, 12],
 [1995, 4, 13],
 [1995, 4, 14],
 [1995, 4, 15],
 [1995, 4, 16],
 [1995, 4, 17],
 [1995, 4, 18],
 [1995, 4, 19],
 [1995, 4, 20],
 [1995, 4, 21],
 [1995, 4, 22],
 [1995, 4, 23],
 [1995, 4, 24],
 [1995, 4, 25],
 [1995, 4, 26],
 [1995, 4, 27],
 [1995, 4, 28],
 [1995, 4, 29],
 [1995, 4, 30],
 [1995, 4, 31],
 [1995, 5, 1],
 [1995, 5, 

In [13]:
for date in dates:
    if date[1] == 3 and date[2] == 1:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 2:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 3:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 4:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 5:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 6:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 7:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 8:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 9:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 10:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 11:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 12:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 13:
        dates.remove(date)
for date in dates:
    if date[1] == 3 and date[2] == 14:
        dates.remove(date)
for date in dates:
    if date[1] == 4 and date[2] == 31:
        dates.remove(date)
for date in dates:
    if date[1] == 6 and date[2] == 31:
        dates.remove(date)
for date in dates:
    if date[1] == 9 and date[2] == 31:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 11:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 12:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 13:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 14:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 15:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 16:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 17:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 18:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 19:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 20:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 21:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 22:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 23:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 24:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 25:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 26:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 27:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 28:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 29:
        dates.remove(date)
for date in dates:
    if date[1] == 10 and date[2] == 30:
        dates.remove(date)
dates

[[1995, 3, 15],
 [1995, 3, 16],
 [1995, 3, 17],
 [1995, 3, 18],
 [1995, 3, 19],
 [1995, 3, 20],
 [1995, 3, 21],
 [1995, 3, 22],
 [1995, 3, 23],
 [1995, 3, 24],
 [1995, 3, 25],
 [1995, 3, 26],
 [1995, 3, 27],
 [1995, 3, 28],
 [1995, 3, 29],
 [1995, 3, 30],
 [1995, 3, 31],
 [1995, 4, 1],
 [1995, 4, 2],
 [1995, 4, 3],
 [1995, 4, 4],
 [1995, 4, 5],
 [1995, 4, 6],
 [1995, 4, 7],
 [1995, 4, 8],
 [1995, 4, 9],
 [1995, 4, 10],
 [1995, 4, 11],
 [1995, 4, 12],
 [1995, 4, 13],
 [1995, 4, 14],
 [1995, 4, 15],
 [1995, 4, 16],
 [1995, 4, 17],
 [1995, 4, 18],
 [1995, 4, 19],
 [1995, 4, 20],
 [1995, 4, 21],
 [1995, 4, 22],
 [1995, 4, 23],
 [1995, 4, 24],
 [1995, 4, 25],
 [1995, 4, 26],
 [1995, 4, 27],
 [1995, 4, 28],
 [1995, 4, 29],
 [1995, 4, 30],
 [1995, 5, 1],
 [1995, 5, 2],
 [1995, 5, 3],
 [1995, 5, 4],
 [1995, 5, 5],
 [1995, 5, 6],
 [1995, 5, 7],
 [1995, 5, 8],
 [1995, 5, 9],
 [1995, 5, 10],
 [1995, 5, 11],
 [1995, 5, 12],
 [1995, 5, 13],
 [1995, 5, 14],
 [1995, 5, 15],
 [1995, 5, 16],
 [1995, 5,

In [14]:
dates_1995 = []
dates_1996 = []
dates_1997 = []
dates_1998 = []
dates_1999 = []
dates_2000 = []
dates_2001 = []
dates_2002 = []
dates_2003 = []
dates_2004 = []
dates_2005 = []
dates_2006 = []
dates_2007 = []
dates_2008 = []
dates_2009 = []
dates_2010 = []
dates_2011 = []
dates_2012 = []
dates_2013 = []
dates_2014 = []
dates_2015 = []
dates_2016 = []
dates_2017 = []
dates_2018 = []
dates_2019 = []
dates_2020 = []
dates_2021 = []


for date in dates:
    if date[0] == 1995:
        dates_1995.append(date)
    if date[0] == 1996:
        dates_1996.append(date)
    if date[0] == 1997:
        dates_1997.append(date)
    if date[0] == 1998:
        dates_1998.append(date)
    if date[0] == 1999:
        dates_1999.append(date)
    if date[0] == 2000:
        dates_2000.append(date)
    if date[0] == 2001:
        dates_2001.append(date)
    if date[0] == 2002:
        dates_2002.append(date)
    if date[0] == 2003:
        dates_2003.append(date)
    if date[0] == 2004:
        dates_2004.append(date)
    if date[0] == 2005:
        dates_2005.append(date)
    if date[0] == 2006:
        dates_2006.append(date)
    if date[0] == 2007:
        dates_2007.append(date)
    if date[0] == 2008:
        dates_2008.append(date)
    if date[0] == 2009:
        dates_2009.append(date)
    if date[0] == 2010:
        dates_2010.append(date)
    if date[0] == 2011:
        dates_2011.append(date)
    if date[0] == 2012:
        dates_2012.append(date)
    if date[0] == 2013:
        dates_2013.append(date)
    if date[0] == 2014:
        dates_2014.append(date)
    if date[0] == 2015:
        dates_2015.append(date)
    if date[0] == 2016:
        dates_2016.append(date)
    if date[0] == 2017:
        dates_2017.append(date)
    if date[0] == 2018:
        dates_2018.append(date)
    if date[0] == 2019:
        dates_2019.append(date)
    if date[0] == 2020:
        dates_2020.append(date)
    if date[0] == 2021:
        dates_2021.append(date)
    
dates

[[1995, 3, 15],
 [1995, 3, 16],
 [1995, 3, 17],
 [1995, 3, 18],
 [1995, 3, 19],
 [1995, 3, 20],
 [1995, 3, 21],
 [1995, 3, 22],
 [1995, 3, 23],
 [1995, 3, 24],
 [1995, 3, 25],
 [1995, 3, 26],
 [1995, 3, 27],
 [1995, 3, 28],
 [1995, 3, 29],
 [1995, 3, 30],
 [1995, 3, 31],
 [1995, 4, 1],
 [1995, 4, 2],
 [1995, 4, 3],
 [1995, 4, 4],
 [1995, 4, 5],
 [1995, 4, 6],
 [1995, 4, 7],
 [1995, 4, 8],
 [1995, 4, 9],
 [1995, 4, 10],
 [1995, 4, 11],
 [1995, 4, 12],
 [1995, 4, 13],
 [1995, 4, 14],
 [1995, 4, 15],
 [1995, 4, 16],
 [1995, 4, 17],
 [1995, 4, 18],
 [1995, 4, 19],
 [1995, 4, 20],
 [1995, 4, 21],
 [1995, 4, 22],
 [1995, 4, 23],
 [1995, 4, 24],
 [1995, 4, 25],
 [1995, 4, 26],
 [1995, 4, 27],
 [1995, 4, 28],
 [1995, 4, 29],
 [1995, 4, 30],
 [1995, 5, 1],
 [1995, 5, 2],
 [1995, 5, 3],
 [1995, 5, 4],
 [1995, 5, 5],
 [1995, 5, 6],
 [1995, 5, 7],
 [1995, 5, 8],
 [1995, 5, 9],
 [1995, 5, 10],
 [1995, 5, 11],
 [1995, 5, 12],
 [1995, 5, 13],
 [1995, 5, 14],
 [1995, 5, 15],
 [1995, 5, 16],
 [1995, 5,

In [15]:
# Name the base url, which we will combine with the date values to create urls for each date.
url_anchor = 'https://www.baseball-reference.com/boxes/?'

# 'https://www.baseball-reference.com/boxes/?month=7&day=1&year=2019'

In [16]:
# For each date in the list of dates, we want to build the url using the year value, month value, and day value.
date_urls = []
for date in dates:
    date_urls.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls

['https://www.baseball-reference.com/boxes/?month=3&day=15&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=16&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=17&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=18&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=19&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=20&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=21&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=22&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=23&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=24&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=25&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=26&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=27&year=1995',
 'https://www.baseball-reference.com/boxes/?month=3&day=28&year=1995',
 'http

In [18]:
# Now we want to throw these date urls into another for loop to get us a list of dfs for each date.
daily_division_dfs = []
for date_url in date_urls:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
#     print(date_url)
    daily_division_dfs.append(division_dfs)
daily_division_dfs

KeyboardInterrupt: 

In [19]:
# Let's do this year by year, first creating lists of date-urls, and then running our scraper through each year's list to create a dataframe and outputting our standard deviation values.

date_urls_1995 = []
for date in dates_1995:
    date_urls_1995.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_1996 = []
for date in dates_1996:
    date_urls_1996.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_1997 = []
for date in dates_1997:
    date_urls_1997.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_1998 = []
for date in dates_1998:
    date_urls_1998.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_1999 = []
for date in dates_1999:
    date_urls_1999.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2000 = []
for date in dates_2000:
    date_urls_2000.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2001 = []
for date in dates_2001:
    date_urls_2001.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2002 = []
for date in dates_2002:
    date_urls_2002.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2003 = []
for date in dates_2003:
    date_urls_2003.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2004 = []
for date in dates_2004:
    date_urls_2004.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2005 = []
for date in dates_2005:
    date_urls_2005.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2006 = []
for date in dates_2006:
    date_urls_2006.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2007 = []
for date in dates_2007:
    date_urls_2007.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2007 = []
for date in dates_2007:
    date_urls_2007.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2008 = []
for date in dates_2008:
    date_urls_2008.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2009 = []
for date in dates_2009:
    date_urls_2009.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2010 = []
for date in dates_2010:
    date_urls_2010.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2011 = []
for date in dates_2011:
    date_urls_2011.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2011 = []
for date in dates_2011:
    date_urls_2011.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2012 = []
for date in dates_2012:
    date_urls_2012.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2013 = []
for date in dates_2013:
    date_urls_2013.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2014 = []
for date in dates_2014:
    date_urls_2014.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2015 = []
for date in dates_2015:
    date_urls_2015.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2016 = []
for date in dates_2016:
    date_urls_2016.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2017 = []
for date in dates_2017:
    date_urls_2017.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2018 = []
for date in dates_2018:
    date_urls_2018.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2019 = []
for date in dates_2019:
    date_urls_2019.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2020 = []
for date in dates_2020:
    date_urls_2020.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))
date_urls_2021 = []
for date in dates_2021:
    date_urls_2021.append(url_anchor + 'month=' + str(date[1]) + '&day=' + str(date[2]) + '&year=' + str(date[0]))


## 1995

In [20]:
# First, let's create our df.
daily_division_dfs_1995 = []
for date_url in date_urls_1995:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_1995.append(division_dfs)
daily_division_dfs_1995

[[  team wins    wp  gb
  0  TOR    0  .000  --
  1  DET    0  .000  --
  2  BOS    0  .000  --
  3  NYY    0  .000  --
  4  BAL    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  MIL    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  CAL    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  MON    0  .000  --
  1  FLA    0  .000  --
  2  PHI    0  .000  --
  3  NYM    0  .000  --
  4  ATL    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  COL    0  .000  --
  2  SFG    0  .000  --
  3  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TOR    0  .000  --
  1  DET    0  .000  --
  2  BOS    0  .000  --
  3  NYY    0  .000  --
  4  BAL    0  .000  --,
    team wins    wp  gb
  0  CHW

In [21]:
# Then, let's find a standard deviation value for wins across the league for each date of the year.
daily_wins_stds_1995 = []
for division_dfs in daily_division_dfs_1995:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_1995.append(daily_std)
daily_wins_stds_1995

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.08333333333333333,
 0.5079680626286325,
 0.7847167827772514,
 0.9879497278624371,
 1.1035940721023627,
 1.0067510806196387,
 1.2204951545033096,
 1.391486338818142,
 1.7059795357478915,
 1.686950838924557,
 1.6940780823243677,
 1.6674167384371998,
 1.7742465993015495,
 1.7522671447347202,
 1.7706803316597204,
 1.9658194610022963,
 1.9499311378560507,
 2.1889534490384945,
 2.1708127952243212,
 2.3323626801322437,
 2.108420243945148,
 2.1841659620139904,
 2.0643901715482147,
 2.063880548098436,
 2.1385136156113993,
 2.3060887051310903,
 2.4953834280086653,
 2.548693843358126,
 2.714782102700467,
 2.820950042916827,
 2.6797840321304953,
 2.792176845165075,
 2.796051547816068,
 2.98887966750325,
 3.103835387755865,
 3.2929802155336425,
 3.386554

In [22]:
# Then, let's create a df with these values that we can add each year to.
df_wins_stds = pd.DataFrame(daily_wins_stds_1995)
df_wins_stds

Unnamed: 0,0
0,0.000000
1,0.000000
2,0.000000
3,0.000000
4,0.000000
...,...
206,10.160298
207,10.160298
208,10.160298
209,10.160298


## 1996

In [23]:
daily_division_dfs_1996 = []
for date_url in date_urls_1996:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_1996.append(division_dfs)
daily_division_dfs_1996

[[  team wins    wp  gb
  0  TOR    0  .000  --
  1  DET    0  .000  --
  2  BOS    0  .000  --
  3  BAL    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  MIL    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  CAL    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  ATL    0  .000  --
  1  MON    0  .000  --
  2  FLA    0  .000  --
  3  PHI    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  COL    0  .000  --
  2  SFG    0  .000  --
  3  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TOR    0  .000  --
  1  DET    0  .000  --
  2  BOS    0  .000  --
  3  BAL    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  CHW

In [24]:
daily_wins_stds_1996 = []
for division_dfs in daily_division_dfs_1996:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_1996.append(daily_std)
daily_wins_stds_1996

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.08333333333333333,
 0.5112037625333125,
 0.6307571447738003,
 0.7405572773303405,
 0.703005957214223,
 0.828095985460379,
 0.8614994492365281,
 1.0544654576448922,
 1.1596648429527467,
 1.1450377580572313,
 1.192454783700357,
 1.2319747557935083,
 1.173439560924767,
 1.3340638791846056,
 1.3778359054948996,
 1.4456482483581918,
 1.6976045694023625,
 1.8488418257139425,
 1.804377490295946,
 1.8948462150398429,
 2.0315594505264944,
 2.136153672791608,
 2.058939574495151,
 1.9873351016912073,
 2.051426984646788,
 2.057309690219978,
 2.2498996709310592,
 2.157622037916604,
 2.2630366920281952,
 2.3776038159104274,
 2.3699555386337408,
 2.520333304985225,
 2.5760126683018076,
 2.6130707313777197,
 2.54738060962888,
 2.764618166251527,
 2.744860682008412,
 2.710489741474442,
 2.9409647319905705,
 2.921459561846581,
 2.8665436498460064,
 2.9854175882904763,
 3.009210575477122,
 3.272138208439109

In [25]:
df_wins_stds['1996'] = daily_wins_stds_1996
df_wins_stds

Unnamed: 0,0,1996
0,0.000000,0.000000
1,0.000000,0.000000
2,0.000000,0.000000
3,0.000000,0.000000
4,0.000000,0.000000
...,...,...
206,10.160298,10.361502
207,10.160298,10.361502
208,10.160298,10.361502
209,10.160298,10.361502


## 1997

In [26]:
daily_division_dfs_1997 = []
for date_url in date_urls_1997:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_1997.append(division_dfs)
daily_division_dfs_1997

[[  team wins    wp  gb
  0  TOR    0  .000  --
  1  DET    0  .000  --
  2  BOS    0  .000  --
  3  BAL    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  MIL    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  ANA    0  .000  --
  1  TEX    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  MON    0  .000  --
  1  PHI    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  FLA    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  COL    0  .000  --
  2  SFG    0  .000  --
  3  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TOR    0  .000  --
  1  DET    0  .000  --
  2  BOS    0  .000  --
  3  BAL    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  CHW

In [27]:
daily_wins_stds_1997 = []
for division_dfs in daily_division_dfs_1997:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_1997.append(daily_std)
daily_wins_stds_1997

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.4534196569508541,
 0.8031084998299995,
 0.7674411727963492,
 0.933896794788995,
 1.067685518288296,
 1.0356692011000006,
 1.0781546053867992,
 1.0588981775551274,
 1.188153888915721,
 1.2431654239615861,
 1.5151342370952623,
 1.7207889434199057,
 1.6304940584611238,
 1.7291888030049503,
 1.8698288837409605,
 1.9908574319780297,
 2.1690429952388004,
 2.2483911669621945,
 2.366573963263968,
 2.277802918184998,
 2.1630091902560813,
 2.4076392927898254,
 2.3592442595890093,
 2.3840310872876587,
 2.4902495697616502,
 2.549120150653,
 2.6682475836189745,
 2.769662173278592,
 2.7744405274581023,
 2.848322671294214,
 3.0033363109975775,
 3.121303152514896,
 2.9324343103465473,
 3.136579872687776,
 3.2404252187540528,
 3.342070979053812,
 3.426593587070428,
 3.485987745028132,
 3.4818636491021913,
 3.5010016728925315,
 3.4949941561765674,
 3.6526046692368457,
 3.6794804042612994,
 3.88360483

In [28]:
df_wins_stds['1997'] = daily_wins_stds_1997
df_wins_stds

Unnamed: 0,0,1996,1997
0,0.000000,0.000000,0.00000
1,0.000000,0.000000,0.00000
2,0.000000,0.000000,0.00000
3,0.000000,0.000000,0.00000
4,0.000000,0.000000,0.00000
...,...,...,...
206,10.160298,10.361502,9.09709
207,10.160298,10.361502,9.09709
208,10.160298,10.361502,9.09709
209,10.160298,10.361502,9.09709


## 1998

In [29]:
daily_division_dfs_1998 = []
for date_url in date_urls_1998:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_1998.append(division_dfs)
daily_division_dfs_1998

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  ANA    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  TEX    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  MON    0  .000  --,
    team wins    wp  gb
  0  PIT    0  .000  --
  1  CHC    0  .000  --
  2  STL    0  .000  --
  3  MIL    0  .000  --
  4  CIN    0  .000  --
  5  HOU    0  .000  --,
    team wins    wp  gb
  0  COL    0  .000  --
  1  SFG    0  .000  --
  2  LAD    0  .000  --
  3  SDP    0  .000  --
  4  ARI    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY 

In [30]:
daily_wins_stds_1998 = []
for division_dfs in daily_division_dfs_1998:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_1998.append(daily_std)
daily_wins_stds_1998

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.3251511668290256,
 0.6197401189476657,
 0.7726074722935351,
 0.9703897581504982,
 1.2384806284767562,
 1.36188542690176,
 1.4901975150230335,
 1.6174492218349112,
 1.5340948707300592,
 1.669682368315368,
 1.7364341367021094,
 1.8273113741570655,
 2.007072157139991,
 2.139741652781682,
 2.3985141897264843,
 2.4002815444422723,
 2.298039651402681,
 2.35670695737223,
 2.491099484787186,
 2.5452237974754772,
 2.51673338428388,
 2.7255699133883353,
 2.833992702269132,
 2.9240256899638304,
 2.9256595746184995,
 3.038362542666732,
 2.977528020987256,
 3.1302652001615416,
 3.213582896649649,
 3.3493347851085713,
 3.356501260533672,
 3.419469630406931,
 3.6042892406753477,
 3.579456959521043,
 3.5610368306445204,
 3.7225716297086193,
 3.695595718324379,
 3.6779890096987544,
 3.8275531410999215,
 3.802582642933242,
 4.015416625663492,
 4.173318699373236,
 4.3029023503517445,
 4.306021870937354,
 4.

In [31]:
df_wins_stds['1998'] = daily_wins_stds_1998
df_wins_stds

Unnamed: 0,0,1996,1997,1998
0,0.000000,0.000000,0.00000,0.000000
1,0.000000,0.000000,0.00000,0.000000
2,0.000000,0.000000,0.00000,0.000000
3,0.000000,0.000000,0.00000,0.000000
4,0.000000,0.000000,0.00000,0.000000
...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973
207,10.160298,10.361502,9.09709,13.262973
208,10.160298,10.361502,9.09709,13.262973
209,10.160298,10.361502,9.09709,13.262973


## 1999

In [32]:
daily_division_dfs_1999 = []
for date_url in date_urls_1999:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_1999.append(division_dfs)
daily_division_dfs_1999

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  ANA    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  TEX    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  MON    0  .000  --,
    team wins    wp  gb
  0  PIT    0  .000  --
  1  CHC    0  .000  --
  2  STL    0  .000  --
  3  MIL    0  .000  --
  4  CIN    0  .000  --
  5  HOU    0  .000  --,
    team wins    wp  gb
  0  COL    0  .000  --
  1  SFG    0  .000  --
  2  LAD    0  .000  --
  3  SDP    0  .000  --
  4  ARI    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY 

In [33]:
daily_wins_stds_1999 = []
for division_dfs in daily_division_dfs_1999:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_1999.append(daily_std)
daily_wins_stds_1999

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5165230867474213,
 0.5447941946797473,
 0.669106777950212,
 0.8623677248081254,
 0.9150524429324158,
 1.0430654155464814,
 1.2314220027783034,
 1.3758103578294902,
 1.337153831258538,
 1.4854955313551679,
 1.4508159537449805,
 1.3461757091187214,
 1.4645423012588672,
 1.5159726751231462,
 1.6072121022212797,
 1.8238240146513849,
 1.9246018373822602,
 2.0334786887144474,
 2.2035534507710652,
 2.2396046540300785,
 2.4093394141082762,
 2.4599483508103313,
 2.5813024343456945,
 2.631779975466617,
 2.7326495009231073,
 2.6708113619209413,
 2.957165586368212,
 3.04298976008385,
 3.0105079311857392,
 2.89305265490593,
 2.960305868578631,
 3.046104179451355,
 3.0768526825912805,
 3.1830697332653894,
 3.153792454182126,
 3.282129132631232,
 3.2987355787087993,
 3.4463600556153065,
 3.4703791906204984,
 3.573744106126513,
 3.8192766233254574,
 3.733687713

In [34]:
df_wins_stds['1999'] = daily_wins_stds_1999
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999
0,0.000000,0.000000,0.00000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000
...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826
207,10.160298,10.361502,9.09709,13.262973,12.955826
208,10.160298,10.361502,9.09709,13.262973,12.955826
209,10.160298,10.361502,9.09709,13.262973,12.955826


## 2000

In [35]:
daily_division_dfs_2000 = []
for date_url in date_urls_2000:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2000.append(division_dfs)
daily_division_dfs_2000

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  ANA    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  TEX    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  MON    0  .000  --,
    team wins    wp  gb
  0  PIT    0  .000  --
  1  CHC    0  .000  --
  2  STL    0  .000  --
  3  MIL    0  .000  --
  4  CIN    0  .000  --
  5  HOU    0  .000  --,
    team wins    wp  gb
  0  COL    0  .000  --
  1  SFG    0  .000  --
  2  LAD    0  .000  --
  3  SDP    0  .000  --
  4  ARI    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY 

In [36]:
daily_wins_stds_2000 = []
for division_dfs in daily_division_dfs_2000:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2000.append(daily_std)
daily_wins_stds_2000

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.06804138174397717,
 0.14257698099397018,
 0.14257698099397018,
 0.14257698099397018,
 0.14257698099397018,
 0.5659527527564481,
 0.7336623798350876,
 0.6566435144140278,
 0.6521611176865523,
 0.7674961578588868,
 0.893674409875643,
 1.0318755071070542,
 1.1109721906123005,
 1.2520563717817879,
 1.290195942739924,
 1.2256724391291385,
 1.2805240411875627,
 1.3819800275713863,
 1.5656375132641493,
 1.5808242892024695,
 1.71707394794707,
 1.8289219220318766,
 2.010818145435023,
 2.0336759415767696,
 2.0585848439027683,
 2.2112652409586486,
 2.30191564435743,
 2.358505780428095,
 2.5478001755981188,
 2.460486903945465,
 2.5633343951602514,
 2.873348894286037,
 2.99817491581064,
 3.02602702139391,
 3.0101604411981246,
 2.883425484851109,
 2.826026774867106,
 2.947690056291874,
 2.9434829366189543,
 2.9290966337040474,
 3.0308641906431197,
 3.294567743740673,
 3.2566463810789776,
 3.2277570095118455,
 3.21

In [37]:
df_wins_stds['2000'] = daily_wins_stds_2000
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611


## 2001

In [38]:
daily_division_dfs_2001 = []
for date_url in date_urls_2001:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2001.append(division_dfs)
daily_division_dfs_2001

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  ANA    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  TEX    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  MON    0  .000  --,
    team wins    wp  gb
  0  PIT    0  .000  --
  1  CHC    0  .000  --
  2  STL    0  .000  --
  3  MIL    0  .000  --
  4  CIN    0  .000  --
  5  HOU    0  .000  --,
    team wins    wp  gb
  0  COL    0  .000  --
  1  SFG    0  .000  --
  2  LAD    0  .000  --
  3  SDP    0  .000  --
  4  ARI    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [39]:
daily_wins_stds_2001 = []
for division_dfs in daily_division_dfs_2001:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2001.append(daily_std)
daily_wins_stds_2001

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.43173021133590944,
 0.527609051530992,
 0.817328569965575,
 0.983874246095939,
 1.0773600496027433,
 1.0107281778457433,
 1.0000758167243722,
 1.095486748695472,
 1.3960851574698683,
 1.6141694467062215,
 1.594051909374816,
 1.6714099668392288,
 1.9194684292816084,
 1.9524952594279588,
 1.78076362570501,
 1.8912627929855155,
 2.141020083621329,
 2.17643920710353,
 2.4615047123502,
 2.5365714352707807,
 2.659986884960676,
 2.6954426529223063,
 2.723463929447225,
 2.875298808722808,
 2.933049828281524,
 3.1231417753527606,
 3.0697564805907636,
 3.113251770633999,
 3.093527348516467,
 3.1284782769737394,
 3.2025091238035177,
 3.284372686671387,
 3.283313795561688,
 3.3822548547397617,
 3.527988416492311,
 3.632604651979268,
 3.582128181926992,
 3.689808614886258,
 3.712277516850228,
 3.8397512083472845,
 4.042600775510754,
 4.128882711466907,
 4.206293421247072,
 4.

In [40]:
df_wins_stds['2001'] = daily_wins_stds_2001
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892


## 2002

In [41]:
daily_division_dfs_2002 = []
for date_url in date_urls_2002:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2002.append(division_dfs)
daily_division_dfs_2002

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  OAK    0  .000  --
  1  SEA    0  .000  --
  2  TEX    0  .000  --
  3  ANA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  MON    0  .000  --,
    team wins    wp  gb
  0  PIT    0  .000  --
  1  CHC    0  .000  --
  2  STL    0  .000  --
  3  MIL    0  .000  --
  4  CIN    0  .000  --
  5  HOU    0  .000  --,
    team wins    wp  gb
  0  COL    0  .000  --
  1  SFG    0  .000  --
  2  LAD    0  .000  --
  3  SDP    0  .000  --
  4  ARI    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [42]:
daily_wins_stds_2002 = []
for division_dfs in daily_division_dfs_2002:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2002.append(daily_std)
daily_wins_stds_2002

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5177965079182965,
 0.650302333658051,
 0.6391268760760714,
 0.8888705121608244,
 0.8593780496135794,
 1.2140714586024801,
 1.3734191938830405,
 1.6105666227960824,
 1.7008179095016043,
 1.7815896690435427,
 1.734174503657434,
 2.072150983431378,
 2.3460968072109734,
 2.255886262428766,
 2.4303915407392567,
 2.512573949742461,
 2.54311882149675,
 2.6450009081200756,
 2.508081527437102,
 2.728466051114889,
 3.0737880336615557,
 3.1134340402085274,
 3.2215229486883,
 3.1888698482025446,
 3.382921158730857,
 3.433668505193389,
 3.400412670893566,
 3.4425882593206616,
 3.440720130433206,
 3.4013351961329796,
 3.4845952610596544,
 3.478998028156067,
 3.6068834588083125,
 3.731136312548491,
 3.763391895516836,
 3.8537081070173502,
 3.8265868775909744,
 4.067960921375728,
 4.067710901330616,
 3.958649010637179,
 4.109787477367357,
 3.879902323736912,
 3.9812441889028203,
 4.17

In [43]:
df_wins_stds['2002'] = daily_wins_stds_2002
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683


## 2003

In [44]:
daily_division_dfs_2003 = []
for date_url in date_urls_2003:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2003.append(division_dfs)
daily_division_dfs_2003

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  ANA    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  TEX    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  ATL    0  .000  --
  2  NYM    0  .000  --
  3  MON    0  .000  --
  4  FLA    0  .000  --,
    team wins    wp  gb
  0  PIT    0  .000  --
  1  CHC    0  .000  --
  2  STL    0  .000  --
  3  MIL    0  .000  --
  4  CIN    0  .000  --
  5  HOU    0  .000  --,
    team wins    wp  gb
  0  COL    0  .000  --
  1  SFG    0  .000  --
  2  LAD    0  .000  --
  3  SDP    0  .000  --
  4  ARI    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [45]:
daily_wins_stds_2003 = []
for division_dfs in daily_division_dfs_2003:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2003.append(daily_std)
daily_wins_stds_2003

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.08333333333333333,
 0.5397687979209718,
 0.609268283704213,
 1.0641058490243636,
 1.1394048104544683,
 1.2408113949099662,
 1.3221449814005624,
 1.52050508783383,
 1.5167375792273485,
 1.5802029833287952,
 1.7357174800441617,
 1.8387369527144413,
 1.9603090473234441,
 2.0112351106914366,
 2.0805636780131507,
 2.2620575144187165,
 2.3788832401891886,
 2.3801772343286047,
 2.3113705822877426,
 2.618622797977953,
 2.927021267211078,
 2.844150391946124,
 2.8368878646327835,
 3.1582195301646983,
 3.039650134675316,
 3.047342121924519,
 3.1810751068095477,
 3.2466156388826035,
 3.227417716799169,
 3.2318815788045434,
 3.343575583036557,
 3.4692434312105145,
 3.673073054366403,
 3.7127989436785183,
 3.7304347924842887,
 3.9092273670263964,
 3.812961167274306,
 3.8290791472466754,
 3.970304130005081,
 4.238869782883613,
 4.242062408515677,
 4.256030253462928,
 4.284415184786464,
 4.375022482167733,
 4.

In [46]:
df_wins_stds['2003'] = daily_wins_stds_2003
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984


## 2004

In [47]:
daily_division_dfs_2004 = []
for date_url in date_urls_2004:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2004.append(division_dfs)
daily_division_dfs_2004

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS    0  .000  --,
    team wins    wp  gb
  0  CHW    0  .000  --
  1  MIN    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  ANA    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  TEX    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  MON    0  .000  --,
    team wins    wp  gb
  0  PIT    0  .000  --
  1  CHC    0  .000  --
  2  STL    0  .000  --
  3  MIL    0  .000  --
  4  CIN    0  .000  --
  5  HOU    0  .000  --,
    team wins    wp  gb
  0  COL    0  .000  --
  1  SFG    0  .000  --
  2  LAD    0  .000  --
  3  SDP    0  .000  --
  4  ARI    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS 

In [48]:
daily_wins_stds_2004 = []
for division_dfs in daily_division_dfs_2004:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2004.append(daily_std)
daily_wins_stds_2004

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.09128709291752769,
 0.09128709291752769,
 0.09128709291752769,
 0.09128709291752769,
 0.09128709291752769,
 0.44848170500344414,
 0.789309968093721,
 0.6170420562875331,
 0.9037695281784295,
 0.8675775611870055,
 1.0995212850339653,
 1.120975291661856,
 1.1247536767820645,
 1.2400905250182392,
 1.4977788337146938,
 1.4047498744206612,
 1.4382896728718526,
 1.5761023705750319,
 1.7392911526983585,
 1.6970200109678863,
 1.6207380640114313,
 1.861549049642701,
 1.9363011314973149,
 2.1800712895683914,
 2.2825849427916247,
 2.2999570169875803,
 2.3077841375347607,
 2.286282757012981,
 2.4935776345414316,
 2.820573608489299,
 2.7742926216104187,
 2.9275024015649724,
 2.839524492045444,
 2.903053920723307,
 2.7889223204076234,
 3.068441450298027,
 3.16875803500569,
 3.209525260497793,
 3.3834248625385293,
 3.524621764456729,
 3.450697032698008,
 3.5643801101620465,
 3.712367026341

In [49]:
df_wins_stds['2004'] = daily_wins_stds_2004
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198


## 2005

In [50]:
daily_division_dfs_2005 = []
for date_url in date_urls_2005:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2005.append(division_dfs)
daily_division_dfs_2005

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  DET    0  .000  --
  2  CLE    0  .000  --
  3  KCR    0  .000  --
  4  CHW    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  LAA    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  LAD    0  .000  --
  3  COL    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [51]:
daily_wins_stds_2005 = []
for division_dfs in daily_division_dfs_2005:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2005.append(daily_std)
daily_wins_stds_2005

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5062658105859025,
 0.6228192868665745,
 0.5052637379580397,
 0.681662254537974,
 0.6148204987975932,
 0.8970102419918465,
 0.7878805026329179,
 0.9441829640997934,
 1.0755463731767663,
 1.1564830037425657,
 1.1768071089360657,
 1.2707624019303168,
 1.3154335446477192,
 1.3568537830181266,
 1.542966523765671,
 1.4887728446233883,
 1.7954089216590736,
 1.7183343407775105,
 1.7005490909619034,
 1.8690777246175123,
 2.1091967108289116,
 2.1567242032089187,
 2.2504752234175807,
 2.372249204998227,
 2.4027070706705413,
 2.555701483792689,
 2.634987696625949,
 2.8077653234436304,
 2.889906754348623,
 3.0166048054575967,
 3.2482364102769115,
 3.338165243049909,
 3.5911320100639927,
 3.6570364317761075,
 3.700901005913595,
 3.6035632439301426,
 3.7432255267464303,
 3.8742133456729384,
 3.9320013142113974,
 3.8064195091942,
 3.973748969915141,
 4.07129975230839

In [52]:
df_wins_stds['2005'] = daily_wins_stds_2005
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508


## 2006

In [53]:
daily_division_dfs_2006 = []
for date_url in date_urls_2006:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2006.append(division_dfs)
daily_division_dfs_2006

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  CHW    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  LAA    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  LAD    0  .000  --
  3  COL    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [54]:
daily_wins_stds_2006 = []
for division_dfs in daily_division_dfs_2006:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2006.append(daily_std)
daily_wins_stds_2006

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5345480015858312,
 0.473858986787423,
 0.6677643252690793,
 0.8664588114749826,
 1.0410011180874725,
 1.3381486562353633,
 1.3768115886052488,
 1.4112782766641307,
 1.4577069209177784,
 1.159432748492453,
 1.26775777081313,
 1.3141917220490755,
 1.4328357708967348,
 1.41281363920245,
 1.6751170284069818,
 1.7165330560560281,
 1.7332509493856136,
 1.8637844915309059,
 1.9379311740673801,
 2.078820904041894,
 2.219782868831772,
 2.2292025138266784,
 2.4873169716251007,
 2.437582666148285,
 2.483585529391289,
 2.599642611547471,
 2.774796901449198,
 2.776835153978457,
 2.957249885632232,
 3.010963841834687,
 3.2218509139344955,
 3.39931321561858,
 3.4702145891224507,
 3.56866238675193,
 3.6714025155026477,
 3.640278017393749,
 3.5048545478575974,
 3.4134435587926935,
 3.5044601276079086,
 3.505831665262727,
 3.6000895919559484,
 3.6369036283486262,
 3.70918985

In [55]:
df_wins_stds['2006'] = daily_wins_stds_2006
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797


## 2007

In [56]:
daily_division_dfs_2007 = []
for date_url in date_urls_2007:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2007.append(division_dfs)
daily_division_dfs_2007

[[  team wins    wp  gb
  0  TBD    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  CHW    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  LAA    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  LAD    0  .000  --
  3  COL    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBD    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS 

In [57]:
daily_wins_stds_2007 = []
for division_dfs in daily_division_dfs_2007:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2007.append(daily_std)
daily_wins_stds_2007

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5359090157850414,
 0.844999626695639,
 1.0207955129038893,
 0.9870771485971579,
 0.9920390042126085,
 1.0362512560765922,
 1.2354687145875785,
 1.269427353841522,
 1.2402937325713583,
 1.4212745981298702,
 1.432069712298776,
 1.4515618281889429,
 1.4028043430729813,
 1.521163714503284,
 1.488920142691386,
 1.6718746860446603,
 1.755672302075969,
 1.8287415878981461,
 1.9590960450617512,
 2.0660718984775883,
 2.03623628289439,
 2.0263973232038635,
 2.1945433629408293,
 2.01974268294154,
 2.0548750237134166,
 2.125106160992623,
 2.152012521197737,
 2.340341399327801,
 2.5133766550063665,
 2.58825145270934,
 2.742386335619978,
 2.7095917654675326,
 2.8176610105602493,
 2.8109798887389625,
 2.948969161134491,
 3.0003889392468515,
 3.2634858309914176,
 3.4266272331671934,
 3.4806759130812774,
 3.621235381438661,
 3.797348835120569,
 3.696906022291618,
 3.7043189149008

In [58]:
df_wins_stds['2007'] = daily_wins_stds_2007
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751


## 2008

In [59]:
daily_division_dfs_2008 = []
for date_url in date_urls_2008:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2008.append(division_dfs)
daily_division_dfs_2008

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  CHW    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  LAA    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  WSN    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  PHI    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  LAD    0  .000  --
  3  COL    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [60]:
daily_wins_stds_2008 = []
for division_dfs in daily_division_dfs_2008:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2008.append(daily_std)
daily_wins_stds_2008

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.15786893258332635,
 0.15786893258332635,
 0.15786893258332635,
 0.15786893258332635,
 0.23240453183331936,
 0.6052238186998937,
 0.5956834027669525,
 0.798688495804587,
 0.8828803142249243,
 0.7504230240676532,
 0.9301727680920068,
 1.0682228637680085,
 1.2306531072962994,
 1.3184292194742757,
 1.3783920859505094,
 1.319317100746335,
 1.3424167532761133,
 1.5055920265864307,
 1.3855585155101144,
 1.4689415552653582,
 1.688580435691476,
 1.7671252758545333,
 1.6576203019579356,
 1.8706488204043452,
 2.0981188866797935,
 2.186425025873561,
 2.3380101184830253,
 2.2796453282211857,
 2.2999483627913633,
 2.3546810358310015,
 2.359300561365108,
 2.389735705575195,
 2.405868169652129,
 2.5138303249693505,
 2.5194087343325045,
 2.5048040582475246,
 2.477489516993619,
 2.459276146416433,
 2.3853895722565976,
 2.526588965360343,
 2.705891560839968,
 2.919939462820721,
 2.8680544668535064,
 2.8196709486962526,
 2.

In [61]:
df_wins_stds['2008'] = daily_wins_stds_2008
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703


## 2009

In [62]:
daily_division_dfs_2009 = []
for date_url in date_urls_2009:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2009.append(division_dfs)
daily_division_dfs_2009

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  CHW    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  LAA    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  LAD    0  .000  --
  3  COL    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  TOR    0  .000  --
  4  NYY 

In [63]:
daily_wins_stds_2009 = []
for division_dfs in daily_division_dfs_2009:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2009.append(daily_std)
daily_wins_stds_2009

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.44326090866830353,
 0.48346339392226073,
 0.7602979520372722,
 0.7708213686833566,
 0.9565558599477231,
 1.2011754469049467,
 1.3865195096569476,
 1.607673725383826,
 1.7352246559856415,
 1.766450012034898,
 1.8334912094667528,
 1.9701701484824685,
 2.120512102837107,
 2.1993613488095534,
 2.131377353200958,
 1.9859548411502974,
 2.0094979780250677,
 2.1019122778873176,
 2.313376322790313,
 2.51729271469519,
 2.2957297914401393,
 2.281499814712584,
 2.3828720217702015,
 2.3668541094495175,
 2.475606266276771,
 2.6197852399007,
 2.56138325779775,
 2.7396128277035245,
 2.7411944014429266,
 2.903513631438272,
 3.0296730163106087,
 2.936812288107387,
 2.9014465137368135,
 2.9538546367666654,
 3.1109577311399725,
 3.083708276784216,
 3.105477682135914,
 2.9803400230435493,
 3.106400419809498,
 3.3118210273070994,
 3.420724547686353,
 3.62688271

In [64]:
df_wins_stds['2009'] = daily_wins_stds_2009
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836


## 2010

In [65]:
daily_division_dfs_2010 = []
for date_url in date_urls_2010:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2010.append(division_dfs)
daily_division_dfs_2010

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  CHW    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  LAA    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  LAD    0  .000  --
  3  COL    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [66]:
daily_wins_stds_2010 = []
for division_dfs in daily_division_dfs_2010:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2010.append(daily_std)
daily_wins_stds_2010

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5177965079182966,
 0.4829995018678686,
 0.8093590314462632,
 0.7738381522453416,
 1.0724111254577606,
 1.1405984084956258,
 1.3938402856094028,
 1.6301778679212093,
 1.696978742291888,
 1.7834013690582173,
 1.746026443001595,
 1.8485106426417388,
 1.9209475538594025,
 1.8832530614099803,
 1.9036926316590226,
 1.8310382490851944,
 2.012086098812816,
 2.0301525867171923,
 1.9564556915121232,
 2.1095231566886308,
 2.021631300597937,
 2.100780224673976,
 2.1782367599479326,
 2.3373118989441815,
 2.501127330794951,
 2.4800404670705642,
 2.363214965663361,
 2.513123893400897,
 2.629057316870251,
 2.8531618967776815,
 2.9515942856465007,
 2.981213492779723,
 3.1776679603859566,
 3.2945694124806564,
 3.393326787925822,
 3.369853281066366,
 3.2672721809583174,
 3.3575007043214598,
 3.364554710577123,
 3.218616497585696,
 3.2602232691381645,
 3.4032159878

In [67]:
df_wins_stds['2010'] = daily_wins_stds_2010
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622


## 2011

In [68]:
daily_division_dfs_2011 = []
for date_url in date_urls_2011:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2011.append(division_dfs)
daily_division_dfs_2011

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  CHW    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  LAA    0  .000  --
  3  SEA    0  .000  --,
    team wins    wp  gb
  0  FLA    0  .000  --
  1  PHI    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  HOU    0  .000  --
  2  PIT    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  LAD    0  .000  --
  3  COL    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [69]:
daily_wins_stds_2011 = []
for division_dfs in daily_division_dfs_2011:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2011.append(daily_std)
daily_wins_stds_2011

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.3917330064948243,
 0.5827042464239828,
 0.8266250119516422,
 1.0156045828733553,
 1.2283878669302968,
 1.3441186585590117,
 1.4067758038754343,
 1.4571753638810643,
 1.2993992290367966,
 1.6520467832215473,
 1.7015008081138785,
 1.8177392859912518,
 1.5835349245270394,
 1.676446695610137,
 1.873807291706692,
 2.081689217228123,
 2.1252965549761136,
 2.0199909994272187,
 2.003722187578815,
 1.9336901450517923,
 2.034172044941685,
 1.9253116089535063,
 1.915082137811682,
 1.9545080206929946,
 2.1228411985299274,
 2.1090567280977255,
 2.01333591158212,
 2.1301265756745518,
 2.1729155296740563,
 2.2437643133232212,
 2.4269822222112154,
 2.303032834492535,
 2.311495938612976,
 2.2271580685410073,
 2.3374785709940693,
 2.482693839711732,
 2.4373374233587413,
 2.4310127713760616,
 2.6089221655694996,
 2.705550401454633,
 2.8183165530367273,
 2.7589593994873955,
 2.8510157947627253,
 2.9869419016

In [70]:
df_wins_stds['2011'] = daily_wins_stds_2011
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689


## 2012

In [71]:
daily_division_dfs_2012 = []
for date_url in date_urls_2012:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2012.append(division_dfs)
daily_division_dfs_2012

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  HOU    0  .000  --
  3  MIL    0  .000  --
  4  CHC    0  .000  --
  5  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  LAD    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [72]:
daily_wins_stds_2012 = []
for division_dfs in daily_division_dfs_2012:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2012.append(daily_std)
daily_wins_stds_2012

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.08333333333333333,
 0.09622504486493762,
 0.09622504486493762,
 0.09622504486493762,
 0.09622504486493762,
 0.09622504486493762,
 0.09622504486493762,
 0.1642664266089148,
 0.4971852321148314,
 0.6193297939297403,
 0.950843530192115,
 1.1414610809956465,
 1.054834344284479,
 1.291043564087403,
 1.279439852591121,
 1.4035372603331437,
 1.3797116592096061,
 1.5217188733255769,
 1.6279615177817994,
 1.6533981577121792,
 1.8424455754280997,
 2.0122285150921218,
 2.115404372158213,
 2.229003036955987,
 2.4244617617720317,
 2.5491225142448095,
 2.5776235658347164,
 2.6023864462391297,
 2.644953265794541,
 2.5387981173748937,
 2.534639662698542,
 2.5745553226452333,
 2.722034684744538,
 2.6545306724251336,
 2.6907699960305105,
 2.656787679455493,
 2.6665197746192892,
 2.7404806785508353,
 2.8201474848004344,
 2.811698410624954,
 2.8561911086402056,
 2.8949127749262984,
 2.903741200144522,
 3.107594119726637,
 3.1

In [73]:
df_wins_stds['2012'] = daily_wins_stds_2012
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105


## 2013

In [74]:
daily_division_dfs_2013 = []
for date_url in date_urls_2013:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2013.append(division_dfs)
daily_division_dfs_2013

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS 

In [75]:
daily_wins_stds_2013 = []
for division_dfs in daily_division_dfs_2013:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2013.append(daily_std)
daily_wins_stds_2013

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5309710638376316,
 0.5575351011178618,
 0.7018191527639276,
 0.6464647021041614,
 0.8140499161683606,
 0.9912617587617358,
 1.2643082701246018,
 1.413401695643799,
 1.4286081604372718,
 1.574906718608997,
 1.7615105421558346,
 1.7778940322194188,
 1.9297882630191197,
 2.05459243069334,
 2.1138319725162034,
 2.238893442240074,
 2.345075788801505,
 2.492275633441489,
 2.377569375629908,
 2.50334585379033,
 2.5020294125220177,
 2.636271110758585,
 2.6429183028910574,
 2.652362680965672,
 2.7411788656882687,
 2.814254487463414,
 2.8365696833756,
 2.859045105869528,
 2.830454862819677,
 2.906365023204888,
 2.9360527473028157,
 3.0122202935926317,
 3.1190935268674274,
 3.3933016120293202,
 3.547262407063052,
 3.444605215949536,
 3.2834428239241036,
 3.381750416500306,
 3.426320280491574,
 3.6964906038952035,
 3.7069633737872363,
 3.724159216038517,
 3.8705450259704715,
 3.89

In [76]:
df_wins_stds['2013'] = daily_wins_stds_2013
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721


## 2014

In [77]:
daily_division_dfs_2014 = []
for date_url in date_urls_2014:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2014.append(division_dfs)
daily_division_dfs_2014

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  LAD    0  .000  --
  4  SFG    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [78]:
daily_wins_stds_2014 = []
for division_dfs in daily_division_dfs_2014:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2014.append(daily_std)
daily_wins_stds_2014

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.14907119849998596,
 0.5791273086757831,
 0.7137299325410758,
 0.9746836290871687,
 0.9672285774057009,
 0.9846040171361032,
 1.1589174548680452,
 0.9846040171361032,
 0.9306319680764966,
 1.1440350471814693,
 1.0701498787305626,
 1.152085249994082,
 1.2089890092700335,
 1.3019977986980564,
 1.5256333135003406,
 1.6177758163964848,
 1.6801866358155009,
 1.7740118857371512,
 1.7892674783074494,
 1.8331249642131426,
 2.0211717673016856,
 2.091113011694588,
 2.0463208766691015,
 2.03356243925143,
 2.0463208766691015,
 2.0296510634693377,
 2.162855614994754,
 2.2697357142052983,
 2.318603846714717,
 2.5038140922913406,
 2.6098446935598587,
 2.6379207468226804,
 2.537035356258158,
 2.489417234342041,
 2.4015550082135855,
 2.361674150889547,
 2.5546117365293823,
 2.61279893519502,
 2.8017502

In [79]:
df_wins_stds['2014'] = daily_wins_stds_2014
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,10.39508,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415


## 2015

In [80]:
daily_division_dfs_2015 = []
for date_url in date_urls_2015:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2015.append(division_dfs)
daily_division_dfs_2015

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  MIN    0  .000  --
  1  CHW    0  .000  --
  2  DET    0  .000  --
  3  CLE    0  .000  --
  4  KCR    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [81]:
daily_wins_stds_2015 = []
for division_dfs in daily_division_dfs_2015:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2015.append(daily_std)
daily_wins_stds_2015

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.5477225575051662,
 0.5456243213407136,
 0.7939502201806014,
 0.996156750051895,
 1.1886180523980323,
 1.2237433069070853,
 1.1438883696359883,
 1.242171675523842,
 1.493328467250336,
 1.5157631565816108,
 1.5282017393029639,
 1.7407585701423505,
 1.6166526966544301,
 1.8169500515418042,
 2.070591311876608,
 2.1657408085534375,
 2.042381794215979,
 2.02816487873584,
 2.0959303107440097,
 2.222948603050845,
 2.2056880240512924,
 2.5079300566201765,
 2.5166601653686747,
 2.526378461860381,
 2.613282774424053,
 2.8823300830895278,
 2.9591795598818695,
 3.060121835653625,
 2.9607775125102553,
 3.1113621501590765,
 2.946445689874739,
 3.225254518024571,
 3.4126081679751272,
 3.476996078053974,
 3.4557351881891165,
 3.440034859128399,
 3.527682021203088,
 3.4270000404243284,
 3.544633387029221,
 3.5884145187111662,
 3.6709166240611153,
 3.8401814

In [82]:
df_wins_stds['2015'] = daily_wins_stds_2015
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.959797,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403


## 2016

In [83]:
daily_division_dfs_2016 = []
for date_url in date_urls_2016:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2016.append(division_dfs)
daily_division_dfs_2016

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  STL    0  .000  --
  4  CHC    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [84]:
daily_wins_stds_2016 = []
for division_dfs in daily_division_dfs_2016:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2016.append(daily_std)
daily_wins_stds_2016

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.22360679774997902,
 0.5720036757525552,
 0.7459786582108384,
 0.8777112344039969,
 0.9748683119555572,
 0.991207578349158,
 1.0362408997388668,
 1.1845347108696778,
 1.3285563223471486,
 1.550130115022343,
 1.62710308948445,
 1.9018038250505072,
 1.8778632089816896,
 1.8422127199527925,
 1.6505846151319605,
 1.633902058855415,
 1.738480878508529,
 1.8828234290250616,
 2.043346533944049,
 2.1380199127617323,
 2.2335347823570686,
 2.401306344219937,
 2.3313385133159765,
 2.518146329769879,
 2.6584244434437667,
 2.7075072191240683,
 2.909503480208389,
 3.089209017315803,
 3.0915041841476274,
 3.143392300294902,
 3.1775102888021665,
 3.158277315508536,
 3.2724986916123115,
 3.2656639126394293,
 3.4484031992417,
 3.622936201490123,
 3.708045126880591,
 3.909121124044122,
 3.918403275229668,
 3.9954865075661825,
 4.148944919466284,
 4.173439064233336,
 4.094398246624018,
 3.98

In [85]:
df_wins_stds['2016'] = daily_wins_stds_2016
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,9.60751,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759


## 2017

In [86]:
daily_division_dfs_2017 = []
for date_url in date_urls_2017:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2017.append(division_dfs)
daily_division_dfs_2017

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  OAK    0  .000  --
  2  SEA    0  .000  --
  3  LAA    0  .000  --
  4  HOU    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [87]:
daily_wins_stds_2017 = []
for division_dfs in daily_division_dfs_2017:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2017.append(daily_std)
daily_wins_stds_2017

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.22360679774997902,
 0.5309710638376315,
 0.6105320598463999,
 0.8673585145454412,
 0.9182277446922921,
 1.0468335025248685,
 1.125429756491942,
 1.310431425440679,
 1.335036058376762,
 1.6302766596199334,
 1.5725885038849434,
 1.6697255471994428,
 1.6103682075919608,
 1.5570468481449165,
 1.703437845211182,
 1.7179339010629804,
 1.7210516691980826,
 1.6659606369362472,
 1.7499334373243292,
 1.8227766314870406,
 2.0747804796099643,
 2.082706702125931,
 2.1652294707077178,
 2.373093411840516,
 2.3387291115879205,
 2.40572762535306,
 2.4231414802774385,
 2.3529996083071794,
 2.4915091172121877,
 2.403280307477743,
 2.5313576614623834,
 2.6164188239674666,
 2.774617679770424,
 2.803371908775315,
 2.926145644467018,
 3.0462596229485146,
 2.975355424486047,
 3.1839314017537164,
 3.265017652984539,
 3.300390070341972,
 3.1756012484879754,
 3.073724983887036,
 3.072923303612153,
 3.06

In [88]:
df_wins_stds['2017'] = daily_wins_stds_2017
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.277703,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222


## 2018

In [89]:
daily_division_dfs_2018 = []
for date_url in date_urls_2018:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2018.append(division_dfs)
daily_division_dfs_2018

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BAL    0  .000  --
  2  NYY    0  .000  --
  3  TOR    0  .000  --
  4  BOS 

In [90]:
daily_wins_stds_2018 = []
for division_dfs in daily_division_dfs_2018:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2018.append(daily_std)
daily_wins_stds_2018

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.5309710638376316,
 0.5144042530384856,
 0.77005470110403,
 0.9570057466268448,
 1.111584792043231,
 1.2029464578591453,
 1.4525483511225377,
 1.5420277910866946,
 1.6410225857703375,
 1.5424840172101788,
 1.831490118692465,
 1.9391380285102546,
 2.076629412251617,
 2.250811312077731,
 2.494347613911675,
 2.712827303301159,
 2.872842640691118,
 2.9444744453359744,
 2.777154399260395,
 2.898267222386672,
 3.0246992423638055,
 3.2123571103695334,
 3.2498328172401805,
 3.4794819033218842,
 3.529010498726235,
 3.417006477717211,
 3.4605836621468424,
 3.598562220730695,
 3.7087562611387335,
 3.691768527128295,
 3.5832647784215816,
 3.546954693917241,
 3.7166722035793733,
 3.7056084463557517,
 3.9129650414230004,
 3.936707940174161,
 3.9630798804974012,
 4.038005155870434,
 4.1204336618651025,
 4.142070922541138,
 4.360102569566711,
 4.33365116832582,
 4.399733087201292,
 4.210884474007933,
 4.3295327646319

In [91]:
df_wins_stds['2018'] = daily_wins_stds_2018
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.677836,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339


## 2019

In [92]:
daily_division_dfs_2019 = []
for date_url in date_urls_2019:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2019.append(division_dfs)
daily_division_dfs_2019

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  ATL    0  .000  --
  3  NYM    0  .000  --
  4  WSN    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [93]:
daily_wins_stds_2019 = []
for division_dfs in daily_division_dfs_2019:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2019.append(daily_std)
daily_wins_stds_2019

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.074535599249993,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.149071198499986,
 0.6438081161520354,
 0.653662907925776,
 0.8022498597373261,
 1.0153842088677412,
 1.0731586765241319,
 1.4955720896202156,
 1.5886720151344038,
 1.5644077854632785,
 1.6774772838583527,
 1.7408022542977324,
 1.9723793677823434,
 2.121312984775383,
 2.1733901743286426,
 2.3142020721566525,
 2.385947498167845,
 2.4718147910880663,
 2.354564315337439,
 2.326634358878391,
 2.2660988792441183,
 2.340633421713798,
 2.465251320843171,
 2.38536792547846,
 2.2916583667349517,
 2.393084536740957,
 2.3343578929101603,
 2.4326935482501026,
 2.40612214846379,
 2.3886889649777303,
 2.3321123920613296,
 2.4540648669264633,
 2.660549346710502,
 2.8836915634131532,
 2.9323886291389596,
 3.070290520977936,
 2.7996774384684326,
 2.865880732946875,
 2.9939621302251016,
 3.016003890045577,
 3.093062964361616,
 3.23206

In [94]:
df_wins_stds['2019'] = daily_wins_stds_2019
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.570622,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948


##  2020

In [95]:
daily_division_dfs_2020 = []
for date_url in date_urls_2020:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2020.append(division_dfs)
daily_division_dfs_2020

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [96]:
daily_wins_stds_2020 = []
for division_dfs in daily_division_dfs_2020:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2020.append(daily_std)
daily_wins_stds_2020

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.149071198499986,
 0.6055066630876245,
 0.3751455981511951,
 0.5309710638376314,
 0.7235424761537712,
 1.0081860030461265,
 0.8508903512639135,
 0.9771668510463272,
 1.1528546515948415,
 1.3478286433063467,
 1.65968394

In [97]:
df_wins_stds['2020'] = daily_wins_stds_2020
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.88689,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199


## 2021

In [98]:
daily_division_dfs_2021 = []
for date_url in date_urls_2021:
    response = requests.get(date_url)

    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.select('table.stats_table')
    tables = [tables[0], tables[1], tables[2], tables[3], tables[4], tables[5]]
    division_dfs = []
    for table in tables:
        names = []
        for item in table('tr'):
            names.append(item('th')[0].text)
        names.remove('Tm')

        wins = []
        for item in table('tr'):
            try:
                wins.append(item('td')[0].text)
            except:
                pass    
    
        wps = []
        for item in table('tr'):
            try:
                wps.append(item('td')[2].text)
            except:
                pass

        gb_values = []
        for item in table('tr'):
            try:
                gb_values.append(item('td')[3].text)
            except:
                pass

        df_division = {}
        df_division['team'] = names
        df_division['wins'] = wins
        df_division['wp'] = wps
        df_division['gb'] = gb_values
        df_division = pd.DataFrame(df_division)
        division_dfs.append(df_division)
    daily_division_dfs_2021.append(division_dfs)
daily_division_dfs_2021

[[  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR    0  .000  --,
    team wins    wp  gb
  0  KCR    0  .000  --
  1  MIN    0  .000  --
  2  CHW    0  .000  --
  3  DET    0  .000  --
  4  CLE    0  .000  --,
    team wins    wp  gb
  0  TEX    0  .000  --
  1  HOU    0  .000  --
  2  OAK    0  .000  --
  3  SEA    0  .000  --
  4  LAA    0  .000  --,
    team wins    wp  gb
  0  PHI    0  .000  --
  1  MIA    0  .000  --
  2  WSN    0  .000  --
  3  ATL    0  .000  --
  4  NYM    0  .000  --,
    team wins    wp  gb
  0  CIN    0  .000  --
  1  PIT    0  .000  --
  2  MIL    0  .000  --
  3  CHC    0  .000  --
  4  STL    0  .000  --,
    team wins    wp  gb
  0  SDP    0  .000  --
  1  ARI    0  .000  --
  2  COL    0  .000  --
  3  SFG    0  .000  --
  4  LAD    0  .000  --],
 [  team wins    wp  gb
  0  TBR    0  .000  --
  1  BOS    0  .000  --
  2  BAL    0  .000  --
  3  NYY    0  .000  --
  4  TOR 

In [99]:
daily_wins_stds_2021 = []
for division_dfs in daily_division_dfs_2021:
    division_stds = []
    for division in division_dfs:
        division_stds.append(division.wins.astype(float).std().astype(float))
    division_stds
    daily_std = sum(division_stds)/len(division_stds)
    daily_wins_stds_2021.append(daily_std)
daily_wins_stds_2021

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.5309710638376315,
 0.6538475907941649,
 0.8415939092778465,
 1.036007494374087,
 1.249677620612898,
 1.2819044673153037,
 1.2360973743034611,
 1.3971429118796321,
 1.415162621868588,
 1.3292672747442327,
 1.3975033788238909,
 1.3905976972027012,
 1.282277859204373,
 1.3810407175802843,
 1.432114885333302,
 1.5184823751359187,
 1.5975717677300707,
 1.722354559973277,
 1.8741434736567069,
 1.893564079582207,
 1.8091576648486594,
 1.7173515926772371,
 1.7573205910292569,
 1.9477852567590954,
 2.0215379320016615,
 2.0954609398483472,
 2.0126865146809934,
 2.151198883961602,
 2.207274445872215,
 2.251749839843202,
 2.128639500028885,
 2.105794557020896,
 2.2355462308498844,
 2.2985705913423233,
 2.180911262495434,
 2.3024276753686883,
 2.4332639430571374,
 2.66383122211989,
 2.786231528381752,
 2.6810646845616986,
 2.8978762117197583,
 3.040043688310504,
 3.130465755367952,
 3.2229036573

In [100]:
df_wins_stds['2021'] = daily_wins_stds_2021
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983


In [101]:
# Let's create a 'date' column.
df_dates = pd.DataFrame(dates_1995)
df_dates['date'] = df_dates[1].astype(str) + '/' + df_dates[2].astype(str)
df_dates

Unnamed: 0,0,1,2,date
0,1995,3,15,3/15
1,1995,3,16,3/16
2,1995,3,17,3/17
3,1995,3,18,3/18
4,1995,3,19,3/19
...,...,...,...,...
206,1995,10,7,10/7
207,1995,10,8,10/8
208,1995,10,9,10/9
209,1995,10,10,10/10


In [102]:
# And add it to our df.
df_wins_stds['date'] = df_dates.date
df_wins_stds

Unnamed: 0,0,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,date
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/15
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/16
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/17
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/18
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/7
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/8
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/9
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/10


In [103]:
# Let's rename our first column that we created this df with.
df_wins_stds = df_wins_stds.rename(columns={0: "1995"})
df_wins_stds


Unnamed: 0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,date
0,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/15
1,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/16
2,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/17
3,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/18
4,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/7
207,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/8
208,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/9
209,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/10


In [121]:
# This df is ready to go! Let's send it to csv.
df_wins_stds.to_csv("df_wins_stds.csv")


## A Second DataFrame for final deviations compared to season length

In [148]:
# We want to create another df comparing the end of season standard deviations in wins to the length of the scheduled season.
# Let's take the last column in our last df (end of season stdevs) and add a 'scheduled_games' column.

df_eos_wins_stds = pd.DataFrame(df_wins_stds.transpose()[210])
df_eos_wins_stds['scheduled_games'] = [144, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 60, 89.8, 0]
df_eos_wins_stds = df_eos_wins_stds.drop('date')
df_eos_wins_stds

Unnamed: 0,210,scheduled_games
0,10.160298,144.0
1996,10.361502,162.0
1997,9.09709,162.0
1998,13.262973,162.0
1999,12.955826,162.0
2000,10.148611,162.0
2001,12.876892,162.0
2002,14.531683,162.0
2003,13.790984,162.0
2004,14.43198,162.0


In [150]:
df_eos_wins_stds = df_eos_wins_stds.rename(columns={210: "wins_std"})
df_eos_wins_stds

Unnamed: 0,wins_std,scheduled_games
0,10.160298,144.0
1996,10.361502,162.0
1997,9.09709,162.0
1998,13.262973,162.0
1999,12.955826,162.0
2000,10.148611,162.0
2001,12.876892,162.0
2002,14.531683,162.0
2003,13.790984,162.0
2004,14.43198,162.0


In [153]:
# And create a per-game ratio
df_eos_wins_stds['std_per_g'] = df_eos_wins_stds.wins_std/df_eos_wins_stds.scheduled_games
df_eos_wins_stds


Unnamed: 0,wins_std,scheduled_games,std/g,std_per_g
0,10.160298,144.0,0.070558,0.070558
1996,10.361502,162.0,0.06396,0.06396
1997,9.09709,162.0,0.056155,0.056155
1998,13.262973,162.0,0.08187,0.08187
1999,12.955826,162.0,0.079974,0.079974
2000,10.148611,162.0,0.062646,0.062646
2001,12.876892,162.0,0.079487,0.079487
2002,14.531683,162.0,0.089702,0.089702
2003,13.790984,162.0,0.08513,0.08513
2004,14.43198,162.0,0.089086,0.089086


In [155]:
df_eos_wins_stds.drop('std/g', axis=1)

Unnamed: 0,wins_std,scheduled_games,std_per_g
0,10.160298,144.0,0.070558
1996,10.361502,162.0,0.06396
1997,9.09709,162.0,0.056155
1998,13.262973,162.0,0.08187
1999,12.955826,162.0,0.079974
2000,10.148611,162.0,0.062646
2001,12.876892,162.0,0.079487
2002,14.531683,162.0,0.089702
2003,13.790984,162.0,0.08513
2004,14.43198,162.0,0.089086


In [159]:
df_eos_wins_stds = df_eos_wins_stds.rename(index={0: "1995"})
df_eos_wins_stds

Unnamed: 0,wins_std,scheduled_games,std/g,std_per_g
1995,10.160298,144.0,0.070558,0.070558
1996,10.361502,162.0,0.06396,0.06396
1997,9.09709,162.0,0.056155,0.056155
1998,13.262973,162.0,0.08187,0.08187
1999,12.955826,162.0,0.079974,0.079974
2000,10.148611,162.0,0.062646,0.062646
2001,12.876892,162.0,0.079487,0.079487
2002,14.531683,162.0,0.089702,0.089702
2003,13.790984,162.0,0.08513,0.08513
2004,14.43198,162.0,0.089086,0.089086


In [161]:
# Maybe make it a per 162 games ratio to make it more understandable to readers.
df_eos_wins_stds['std_per_162'] = (df_eos_wins_stds.wins_std/df_eos_wins_stds.scheduled_games)*162
df_eos_wins_stds


Unnamed: 0,wins_std,scheduled_games,std/g,std_per_g,std_per_162
1995,10.160298,144.0,0.070558,0.070558,11.430335
1996,10.361502,162.0,0.06396,0.06396,10.361502
1997,9.09709,162.0,0.056155,0.056155,9.09709
1998,13.262973,162.0,0.08187,0.08187,13.262973
1999,12.955826,162.0,0.079974,0.079974,12.955826
2000,10.148611,162.0,0.062646,0.062646,10.148611
2001,12.876892,162.0,0.079487,0.079487,12.876892
2002,14.531683,162.0,0.089702,0.089702,14.531683
2003,13.790984,162.0,0.08513,0.08513,13.790984
2004,14.43198,162.0,0.089086,0.089086,14.43198


In [162]:
# This is ready to go! Send it to csv.
df_eos_wins_stds.to_csv('df_eos_wins_stds.csv')

In [104]:
# Just a little clean-up of our original df

df_wins_stds.index = df_wins_stds.date

df_wins_stds

Unnamed: 0_level_0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3/15,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/15
3/16,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/16
3/17,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/17
3/18,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/18
3/19,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3/19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10/7,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/7
10/8,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/8
10/9,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/9
10/10,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983,10/10


In [105]:
df_wins_stds = df_wins_stds.drop('date', axis=1)
df_wins_stds

Unnamed: 0_level_0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3/15,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/16,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/17,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/18,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/19,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10/7,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
10/8,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
10/9,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
10/10,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983


In [108]:
df_wins_stds = df_wins_stds.drop('3/15', axis=0)
df_wins_stds

Unnamed: 0_level_0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3/16,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/17,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/18,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/19,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3/20,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.074536,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10/7,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
10/8,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
10/9,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983
10/10,10.160298,10.361502,9.09709,13.262973,12.955826,10.148611,12.876892,14.531683,13.790984,14.43198,...,11.799105,12.467721,10.183415,10.185403,11.406759,12.024222,14.195339,16.594948,5.831199,14.635983


### Melting

In [109]:
#Ok, we gotta melt. We need a df where each value is it's own row, with date and year as id variables.

melted_df = pd.melt(df_wins_stds, id_vars = [df_wins_stds.columns[0]], ignore_index=False)
melted_df

Unnamed: 0_level_0,1995,variable,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3/16,0.000000,1996,0.000000
3/17,0.000000,1996,0.000000
3/18,0.000000,1996,0.000000
3/19,0.000000,1996,0.000000
3/20,0.000000,1996,0.000000
...,...,...,...
10/7,10.160298,2021,14.635983
10/8,10.160298,2021,14.635983
10/9,10.160298,2021,14.635983
10/10,10.160298,2021,14.635983


In [110]:
melted_df['variable'] = melted_df['variable'].astype(int)
melted_df.dtypes

1995        float64
variable      int64
value       float64
dtype: object

In [111]:
pd.DataFrame(melted_df['1995'].head(210))

Unnamed: 0_level_0,1995
date,Unnamed: 1_level_1
3/16,0.000000
3/17,0.000000
3/18,0.000000
3/19,0.000000
3/20,0.000000
...,...
10/7,10.160298
10/8,10.160298
10/9,10.160298
10/10,10.160298


In [112]:
melted_df_1995 = pd.DataFrame(melted_df['1995'].head(210))
melted_df_1995['variable'] = 1995
melted_df_1995 = melted_df_1995.rename(columns={'1995': 'value'})
melted_df_1995

Unnamed: 0_level_0,value,variable
date,Unnamed: 1_level_1,Unnamed: 2_level_1
3/16,0.000000,1995
3/17,0.000000,1995
3/18,0.000000,1995
3/19,0.000000,1995
3/20,0.000000,1995
...,...,...
10/7,10.160298,1995
10/8,10.160298,1995
10/9,10.160298,1995
10/10,10.160298,1995


In [113]:
melted_df_1995.dtypes

value       float64
variable      int64
dtype: object

In [114]:
melted_df_all = melted_df_1995.merge(melted_df, 'outer').drop('1995', axis=1)
melted_df_all

Unnamed: 0,value,variable
0,0.000000,1995
1,0.000000,1995
2,0.000000,1995
3,0.000000,1995
4,0.000000,1995
...,...,...
5665,14.635983,2021
5666,14.635983,2021
5667,14.635983,2021
5668,14.635983,2021


In [115]:
list_of_dates_5460 = []
for index in melted_df.index:
    list_of_dates_5460.append(index)
list_of_dates_5460

['3/16',
 '3/17',
 '3/18',
 '3/19',
 '3/20',
 '3/21',
 '3/22',
 '3/23',
 '3/24',
 '3/25',
 '3/26',
 '3/27',
 '3/28',
 '3/29',
 '3/30',
 '3/31',
 '4/1',
 '4/2',
 '4/3',
 '4/4',
 '4/5',
 '4/6',
 '4/7',
 '4/8',
 '4/9',
 '4/10',
 '4/11',
 '4/12',
 '4/13',
 '4/14',
 '4/15',
 '4/16',
 '4/17',
 '4/18',
 '4/19',
 '4/20',
 '4/21',
 '4/22',
 '4/23',
 '4/24',
 '4/25',
 '4/26',
 '4/27',
 '4/28',
 '4/29',
 '4/30',
 '5/1',
 '5/2',
 '5/3',
 '5/4',
 '5/5',
 '5/6',
 '5/7',
 '5/8',
 '5/9',
 '5/10',
 '5/11',
 '5/12',
 '5/13',
 '5/14',
 '5/15',
 '5/16',
 '5/17',
 '5/18',
 '5/19',
 '5/20',
 '5/21',
 '5/22',
 '5/23',
 '5/24',
 '5/25',
 '5/26',
 '5/27',
 '5/28',
 '5/29',
 '5/30',
 '5/31',
 '6/1',
 '6/2',
 '6/3',
 '6/4',
 '6/5',
 '6/6',
 '6/7',
 '6/8',
 '6/9',
 '6/10',
 '6/11',
 '6/12',
 '6/13',
 '6/14',
 '6/15',
 '6/16',
 '6/17',
 '6/18',
 '6/19',
 '6/20',
 '6/21',
 '6/22',
 '6/23',
 '6/24',
 '6/25',
 '6/26',
 '6/27',
 '6/28',
 '6/29',
 '6/30',
 '7/1',
 '7/2',
 '7/3',
 '7/4',
 '7/5',
 '7/6',
 '7/7',
 '7/8',


In [116]:
list_of_dates_210 = []
for index in melted_df_1995.index:
    list_of_dates_210.append(index)
list_of_dates_210

['3/16',
 '3/17',
 '3/18',
 '3/19',
 '3/20',
 '3/21',
 '3/22',
 '3/23',
 '3/24',
 '3/25',
 '3/26',
 '3/27',
 '3/28',
 '3/29',
 '3/30',
 '3/31',
 '4/1',
 '4/2',
 '4/3',
 '4/4',
 '4/5',
 '4/6',
 '4/7',
 '4/8',
 '4/9',
 '4/10',
 '4/11',
 '4/12',
 '4/13',
 '4/14',
 '4/15',
 '4/16',
 '4/17',
 '4/18',
 '4/19',
 '4/20',
 '4/21',
 '4/22',
 '4/23',
 '4/24',
 '4/25',
 '4/26',
 '4/27',
 '4/28',
 '4/29',
 '4/30',
 '5/1',
 '5/2',
 '5/3',
 '5/4',
 '5/5',
 '5/6',
 '5/7',
 '5/8',
 '5/9',
 '5/10',
 '5/11',
 '5/12',
 '5/13',
 '5/14',
 '5/15',
 '5/16',
 '5/17',
 '5/18',
 '5/19',
 '5/20',
 '5/21',
 '5/22',
 '5/23',
 '5/24',
 '5/25',
 '5/26',
 '5/27',
 '5/28',
 '5/29',
 '5/30',
 '5/31',
 '6/1',
 '6/2',
 '6/3',
 '6/4',
 '6/5',
 '6/6',
 '6/7',
 '6/8',
 '6/9',
 '6/10',
 '6/11',
 '6/12',
 '6/13',
 '6/14',
 '6/15',
 '6/16',
 '6/17',
 '6/18',
 '6/19',
 '6/20',
 '6/21',
 '6/22',
 '6/23',
 '6/24',
 '6/25',
 '6/26',
 '6/27',
 '6/28',
 '6/29',
 '6/30',
 '7/1',
 '7/2',
 '7/3',
 '7/4',
 '7/5',
 '7/6',
 '7/7',
 '7/8',


In [117]:
list_of_all_dates = (list_of_dates_5460 + list_of_dates_210)

In [118]:
melted_df_all['date'] = list_of_all_dates
melted_df_all

Unnamed: 0,value,variable,date
0,0.000000,1995,3/16
1,0.000000,1995,3/17
2,0.000000,1995,3/18
3,0.000000,1995,3/19
4,0.000000,1995,3/20
...,...,...,...
5665,14.635983,2021,10/7
5666,14.635983,2021,10/8
5667,14.635983,2021,10/9
5668,14.635983,2021,10/10


In [119]:
melted_df_all.to_csv('melted_df_all.csv')