### 2021 Games

Run this code to update the 2021 conference games and results.

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pickle

In [2]:
teams = ["Bethel", "Goshen", "Grace", "HU", "IWU", "Marian", "MVNU", "SAU", "SFU", "Taylor"]
t_nums = [1629, 1678, 1679, 1688, 1694, 1717, 1736, 1780, 1805, 1784]

In [3]:
urls = ['http://www.dakstats.com/WebSync/Pages/Team/TeamSchedule.aspx?association=10&sg=MBA&sea=NAIMBA_2021&team=' +
        str(num) for num in t_nums]
#Create a handle, page, to handle the contents of the website
pages = [requests.get(url) for url in urls]
#Store the page as an element tree using BeautifulSoup4
soups = [BeautifulSoup(page.content) for page in pages]
team_tables = [
  [
    [
      [td.get_text(strip=True) for td in tr.find_all('td')] 
      for tr in table.find_all('tr') 
    ]#for each row in each table
    for table in soup.find_all('table') 
  ]#for each table on each webpage
  for soup in soups 
]#for each team's webpage
headers = [['Date', 'Opponent', 'Location', 'Score', 'Outcome'] for tables in team_tables]
team_rows = [[r[:5] for r in tables[35][1::2]] for tables in team_tables]
dfc = [pd.DataFrame(columns = headers[i], data = team_rows[i]) for i in range(len(headers))]
conf_df = [df[df.Opponent.str.contains("*", regex = False)] for df in dfc]
conf_df_played = [df[df.Score.str.contains("-", regex = False)] for df in conf_df]
tidy_conf2021 = conf_df_played.copy()
for i,df in enumerate(tidy_conf2021):
    split_scores = df['Score'].str.replace(r"\(.*\)","").str.split('-', expand = True)
    tidy_conf2021[i] = df.assign(Score = pd.to_numeric(split_scores[0]),
                            Opp_score = pd.to_numeric(split_scores[1]),
                            Opponent = df.Opponent.str.replace(' \*', '', regex= True),
                            Date = pd.to_datetime(df.Date)
                            )
for i in range(len(teams)): #add column for team
    tidy_conf2021[i]["Team"] = teams[i]
    team = tidy_conf2021[i].pop("Team")
    tidy_conf2021[i].insert(1, team.name, team) #move team column to second

In [4]:
conf_w2021 = [df[df.Outcome.str.contains("W", regex = False)] for df in tidy_conf2021]
conf_w2021[2][-3:]

Unnamed: 0,Date,Team,Opponent,Location,Score,Outcome,Opp_score
9,2021-03-06,Grace,Bethel (Ind.),H,14,W,5
12,2021-03-12,Grace,Huntington (Ind.),N,4,W,2
18,2021-03-19,Grace,St. Francis (Ind.),H,8,W,7


In [5]:
new_games = pd.concat(conf_w2021)
new_games = new_games.sort_values(['Date', 'Team'])
new_games.rename(columns={
    'Team': 'Win_Tm',
    'Opponent': 'Lose_Tm',
    'Score': 'W_Score',
    'Opp_score': 'L_Score'}, 
    inplace=True)
del new_games["Outcome"]

In [6]:
new_games.Lose_Tm.replace({
        'Bethel (Ind.)' : 'Bethel',
        'Taylor (Ind.)' : 'Taylor',
        'Spring Arbor (Mich.)' : 'SAU',
        'Huntington (Ind.)' : 'HU',
        'St. Francis (Ind.)' : 'SFU',
        'Indiana Wesleyan' : 'IWU',
        'Mount Vernon Nazarene (Ohio)' : 'MVNU',
        'Marian (Ind.)' : 'Marian',
        'Goshen (Ind.)' : 'Goshen',
        'Grace (Ind.)' : 'Grace'
        }, 
    inplace=True)

In [7]:
new_games[-5:]

Unnamed: 0,Date,Win_Tm,Lose_Tm,Location,W_Score,L_Score
26,2021-03-20,Taylor,MVNU,H,4,1
24,2021-03-22,Marian,SAU,H,9,4
19,2021-03-22,SAU,Marian,A,12,11
27,2021-03-22,Taylor,MVNU,H,1,0
28,2021-03-22,Taylor,MVNU,H,9,2


In [8]:
with open('2021games.pkl', 'wb') as f:
    pickle.dump(new_games, f)