# Baseball H2H Dashboard
---

**Author:** Jackson Muehlbauer

**Date:** 9/2023

---
**Description:** 

This notebook will create an interactive Dashboard. The User will select two MLB Baseball teams and have them face off on some split violin plots. This Notebook will use the sportsreference library to access data. 

In [2]:
# Imports
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs


In [3]:
# select columns
batting_sel = ['Pos', 'Name', "PA", "SB", "BA", "OBP", "SLG"]
pitching_sel = ['Pos', 'Name', "ERA", "IP", "WHIP", "SO9"]

In [7]:
# fetching web page
url = "https://www.baseball-reference.com/teams/LAD/2023.shtml"
page = requests.get(url)

# extracting batting table from html
soup = bs(page.content,"html.parser")
table = soup.find(id="all_team_batting")

tab_text = table.decode_contents().split('tbody')[1].strip()

tab_soup = bs(tab_text,"html.parser")

# extracting records from table
batting_records = []
for i, row in enumerate(tab_soup.find_all('tr')):
    record = {}
    for col in row.find_all('td'):
        name = str(col).split('data-stat')[1].split('"')[1]
        value = col.text.strip()
        record[name] = value
    if record != {}:
        batting_records.append(record)
        
# extracting pitching table from html
table = soup.find(id="all_team_pitching")       

tab_text = table.decode_contents().split('tbody')[1].strip()

tab_soup = bs(tab_text,"html.parser")

# extracting records from table
pitching_records = []
for i, row in enumerate(tab_soup.find_all('tr')):
    record = {}
    for col in row.find_all('td'):
        name = str(col).split('data-stat')[1].split('"')[1]
        value = col.text.strip()
        record[name] = value
    if record != {}:
        pitching_records.append(record)      
        


In [89]:
# Function to grab players on team
def get_players_on_team(team, year, player_type):
    """
    team : str, 3 Character Name of Team
    year : int, ex. 2021
    player_type : str, "batting" or "pitching"
    return dataframe
    """
    
    records = []
    url = f'https://www.baseball-reference.com/teams/{team}/{year}.shtml'
    page = requests.get(url)
    soup = bs(page.content,"html.parser")    
    table = soup.find(id=f"all_team_{player_type}")
    tab_text = table.decode_contents().split('tbody')[1].strip()
    tab_soup = bs(tab_text,"html.parser")

    # extracting records from table
    for i, row in enumerate(tab_soup.find_all('tr')):
        record = {}
        for col in row.find_all('td'):
            name = str(col).split('data-stat')[1].split('"')[1]
            value = col.text.strip()
            record[name] = value
        if record != {}:
            records.append(record)
    
    # Convert to DataFrame
    df = pd.DataFrame.from_records(records)
    
    if player_type == "batting":
        df['PA'] = df['PA'].apply(pd.to_numeric, errors='coerce')
        df = df.query('PA > 10').copy()
    elif player_type == "pitching":
        df['IP'] = df['IP'].apply(pd.to_numeric, errors='coerce')
        df = df.query('IP > 10').copy()
        df.drop(columns = 'win_loss_perc', inplace = True)
    
    # Clean
    coercible_columns = df.columns[df.apply(lambda x: pd.to_numeric(x, errors='coerce').notna()).all()]
    # Convert coercible columns to numeric
    df[coercible_columns] = df[coercible_columns].apply(pd.to_numeric, errors='coerce')
    df.reset_index(inplace = True)
    
    return df
    
    

In [91]:
df = get_players_on_team('LAD', 2023, 'batting')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   index                      20 non-null     int64  
 1   pos                        20 non-null     object 
 2   player                     20 non-null     object 
 3   age                        20 non-null     int64  
 4   G                          20 non-null     int64  
 5   PA                         20 non-null     int64  
 6   AB                         20 non-null     int64  
 7   R                          20 non-null     int64  
 8   H                          20 non-null     int64  
 9   2B                         20 non-null     int64  
 10  3B                         20 non-null     int64  
 11  HR                         20 non-null     int64  
 12  RBI                        20 non-null     int64  
 13  SB                         20 non-null     int64  
 