# MLB All-Star Roster Analysis

Source: https://www.seanlahman.com/baseball-archive/statistics

In [1]:
import numpy as np
# import packages

import polars as pl
import os
import helpers.dataframe_utils as dfUtils
import helpers.toad_utils as ToadUtils
from models.baseball_databank import BaseballDatabank
from helpers.download_helper import DownloadHelper

In [2]:
# initialize model

Model = BaseballDatabank()

print(Model.source_url)
print(Model.data_dir)

https://raw.githubusercontent.com/chadwickbureau/baseballdatabank/master
/Users/ajp/Documents/Projects/AllStarRosters/data/baseball-databank


In [None]:
# download baseball databank data

Model.download()

In [46]:
# get base people dataframe

dfPeople = pl.read_csv(os.path.join(Model.data_dir, 'raw', 'People.csv')).rename(Model.config_headers['People'])

dfPeople.schema

{'player_id': Utf8,
 'birth_year': Int64,
 'birth_month': Int64,
 'birth_day': Int64,
 'birth_country': Utf8,
 'birth_state': Utf8,
 'birth_city': Utf8,
 'death_year': Int64,
 'death_month': Int64,
 'death_day': Int64,
 'death_country': Utf8,
 'death_state': Utf8,
 'death_city': Utf8,
 'first_name': Utf8,
 'last_name': Utf8,
 'given_name': Utf8,
 'weight': Int64,
 'height': Int64,
 'batting_hand': Utf8,
 'throwing_hand': Utf8,
 'debut_date': Utf8,
 'final_game': Utf8,
 'retro_id': Utf8,
 'bbref_id': Utf8}

In [47]:
# fix column names

dfPeople = dfPeople.rename(Model.config_headers['People'])
dfPeople.schema

NotFoundError: playerID

In [49]:
# combine birth date columns

birth_cols = ['birth_year', 'birth_month', 'birth_day']

dfPeople = dfPeople.with_column((pl.date(
        pl.col(birth_cols[0]),
        pl.col(birth_cols[1]),
        pl.col(birth_cols[2])
)).alias('birth_date'))

dfPeople = dfPeople.drop(birth_cols)

dfPeople.head(25)

NotFoundError: birth_year

In [51]:
# combine death date columns

birth_cols = ['death_year', 'death_month', 'death_day']

dfPeople = dfPeople.with_column((pl.date(
        pl.col(birth_cols[0]),
        pl.col(birth_cols[1]),
        pl.col(birth_cols[2])
)).alias('death_date'))

dfPeople = dfPeople.drop(birth_cols)

dfPeople.head(25)

player_id,birth_country,birth_state,birth_city,death_country,death_state,death_city,first_name,last_name,given_name,weight,height,batting_hand,throwing_hand,debut_date,final_game,retro_id,bbref_id,birth_date,death_date
str,str,str,str,str,str,str,str,str,str,i64,i64,str,str,str,str,str,str,date,date
"""aardsda01""","""USA""","""CO""","""Denver""",,,,"""David""","""Aardsma""","""David Allan""",215.0,75.0,"""R""","""R""","""2004-04-06""","""2015-08-23""","""aardd001""","""aardsda01""",1981-12-27,
"""aaronha01""","""USA""","""AL""","""Mobile""","""USA""","""GA""","""Atlanta""","""Hank""","""Aaron""","""Henry Louis""",180.0,72.0,"""R""","""R""","""1954-04-13""","""1976-10-03""","""aaroh101""","""aaronha01""",1934-02-05,2021-01-22
"""aaronto01""","""USA""","""AL""","""Mobile""","""USA""","""GA""","""Atlanta""","""Tommie""","""Aaron""","""Tommie Lee""",190.0,75.0,"""R""","""R""","""1962-04-10""","""1971-09-26""","""aarot101""","""aaronto01""",1939-08-05,1984-08-16
"""aasedo01""","""USA""","""CA""","""Orange""",,,,"""Don""","""Aase""","""Donald William...",190.0,75.0,"""R""","""R""","""1977-07-26""","""1990-10-03""","""aased001""","""aasedo01""",1954-09-08,
"""abadan01""","""USA""","""FL""","""Palm Beach""",,,,"""Andy""","""Abad""","""Fausto Andres""",184.0,73.0,"""L""","""L""","""2001-09-10""","""2006-04-13""","""abada001""","""abadan01""",1972-08-25,
"""abadfe01""","""D.R.""","""La Romana""","""La Romana""",,,,"""Fernando""","""Abad""","""Fernando Anton...",235.0,74.0,"""L""","""L""","""2010-07-28""","""2021-10-01""","""abadf001""","""abadfe01""",1985-12-17,
"""abadijo01""","""USA""","""PA""","""Philadelphia""","""USA""","""NJ""","""Pemberton""","""John""","""Abadie""","""John W.""",192.0,72.0,"""R""","""R""","""1875-04-26""","""1875-06-10""","""abadj101""","""abadijo01""",1850-11-04,1905-05-17
"""abbated01""","""USA""","""PA""","""Latrobe""","""USA""","""FL""","""Fort Lauderdal...","""Ed""","""Abbaticchio""","""Edward James""",170.0,71.0,"""R""","""R""","""1897-09-04""","""1910-09-15""","""abbae101""","""abbated01""",1877-04-15,1957-01-06
"""abbeybe01""","""USA""","""VT""","""Essex""","""USA""","""VT""","""Colchester""","""Bert""","""Abbey""","""Bert Wood""",175.0,71.0,"""R""","""R""","""1892-06-14""","""1896-09-23""","""abbeb101""","""abbeybe01""",1869-11-11,1962-06-11
"""abbeych01""","""USA""","""NE""","""Falls City""","""USA""","""CA""","""San Francisco""","""Charlie""","""Abbey""","""Charles S.""",169.0,68.0,"""L""","""L""","""1893-08-16""","""1897-08-19""","""abbec101""","""abbeych01""",1866-10-14,1926-04-27


In [52]:
# combine birth location columns

In [None]:
# combine death location columns