# MLB All-Star Game Television Viewership Analysis

Source: https://www.baseball-almanac.com/asgbox/asgtv.shtml

In [1]:
# import packages
import os

import pandas as pd

import helpers.dataframe_utils as DataUtils
import helpers.toad_utils as ToadUtils
import models.baseball_almanac as Model
from helpers.download_helper import DownloadHelper
from models.baseball_almanac import MovingRangeCalc

Invoking __init__.py for models


In [2]:
# set variables

blob = '87d0391b48e4a05b3cd1e3bcf7f000e62623ede8'
url = f'https://github.com/BronzeToad/AllStarRosters/blob/{blob}/data/baseball-almanac/all_star_game_tv_stats.csv'
data_dir = os.path.join(Model.ROOT_DIR, 'data', 'baseball-almanac')

print(f'URL: {url}\n'
      f'DATA_DIR: {data_dir}')

URL: https://raw.githubusercontent.com/BronzeToad/AllStarRosters/1.2.1/data/baseball-almanac/all_star_game_tv_stats.csv
DATA_DIR: /Users/ajp/Documents/Projects/AllStarRosters/data/baseball-almanac


In [3]:
# download viewership data

DownloadHelper(url=url, save_dir=data_dir).download()

Filename all_star_game_tv_stats.csv saved successfully to /Users/ajp/Documents/Projects/AllStarRosters/data/baseball-almanac.
Content from https://raw.githubusercontent.com/BronzeToad/AllStarRosters/1.2.1/data/baseball-almanac/all_star_game_tv_stats.csv downloaded successfully.


In [4]:
# create tv viewership dataframe

df = ToadUtils.get_csv(folder=data_dir, filename=ToadUtils.get_filename_from_url(url))

Dataframe rows: 55, Dataframe columns: 6


In [6]:
# update column names

new_col_names = {
    'Year | ASG': 'Year',
    'Households': 'HouseholdViewers'
}

df.rename(columns=new_col_names, inplace=True)
print(df.columns)

Index(['Year', 'Network', 'Rating', 'Share', 'HouseholdViewers', 'Viewers'], dtype='object')


In [7]:
# remove unneeded columns

df = DataUtils.drop(dataframe=df, columns=['Network', 'Viewers'])
print(df.dtypes)

Year                  int64
Rating              float64
Share                 int64
HouseholdViewers     object
dtype: object


In [8]:
# update data types

df['HouseholdViewers'] = pd.to_numeric(df['HouseholdViewers'].str.replace(',', ''))
print(df.dtypes)

Year                  int64
Rating              float64
Share                 int64
HouseholdViewers      int64
dtype: object


In [10]:
# get date range for moving averages

min_years = Model.get_moving_range(dataframe=df,
                                   column='Year',
                                   calculation=MovingRangeCalc.MIN)

max_years = Model.get_moving_range(dataframe=df,
                                   column='Year',
                                   calculation=MovingRangeCalc.MAX)

min_max_years = []
for minn, maxx in zip(min_years, max_years):
    min_max_years.append(f'{minn}-{maxx}')

print(min_max_years)

['1967-1976', '1968-1977', '1969-1978', '1970-1979', '1971-1980', '1972-1981', '1973-1982', '1974-1983', '1975-1984', '1976-1985', '1977-1986', '1978-1987', '1979-1988', '1980-1989', '1981-1990', '1982-1991', '1983-1992', '1984-1993', '1985-1994', '1986-1995', '1987-1996', '1988-1997', '1989-1998', '1990-1999', '1991-2000', '1992-2001', '1993-2002', '1994-2003', '1995-2004', '1996-2005', '1997-2006', '1998-2007', '1999-2008', '2000-2009', '2001-2010', '2002-2011', '2003-2012', '2004-2013', '2005-2014', '2006-2015', '2007-2016', '2008-2017', '2009-2018', '2010-2019', '2011-2021', '2012-2022']


In [14]:
# get moving average for df metrics

moving_avg_rating = Model.get_moving_range(dataframe=df,
                                           column='Rating',
                                           calculation=MovingRangeCalc.MEAN)

moving_avg_share = Model.get_moving_range(dataframe=df,
                                           column='Share',
                                           calculation=MovingRangeCalc.MEAN)

moving_avg_viewers = Model.get_moving_range(dataframe=df,
                                           column='HouseholdViewers',
                                           calculation=MovingRangeCalc.MEAN)

print(f'{moving_avg_rating}\n\n{moving_avg_share}\n\n{moving_avg_viewers}')

[24, 23, 23, 24, 24, 24, 24, 24, 23, 23, 22, 22, 21, 21, 20, 19, 18, 18, 17, 17, 16, 15, 15, 14, 13, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 5, 5]

[47, 46, 46, 46, 45, 44, 44, 44, 43, 42, 40, 40, 38, 37, 36, 35, 34, 32, 32, 31, 29, 28, 27, 26, 24, 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 11, 11, 11, 11]

[14855000, 15194000, 15652000, 16609000, 16987000, 16928000, 17544000, 17793000, 17928000, 18195000, 18071000, 17918000, 17822000, 17649000, 17098000, 17154000, 16488000, 16152000, 15947000, 15533000, 15058000, 14609000, 14104600, 13648600, 13171300, 12671100, 12303700, 11864300, 11342600, 10905000, 10666100, 10458400, 10199900, 10086323, 9938873, 9590273, 9260045, 8995545, 9050545, 9074845, 8642845, 8439745, 8039645, 7557222, 7323772, 7172572]


In [15]:
# create dictionary with first and last records
data = {
    'Years': Model.get_first_last(min_max_years),
    'AvgRating': Model.get_first_last(moving_avg_rating),
    'RatingDelta': Model.get_percent_change(moving_avg_rating),
    'AvgShare': Model.get_first_last(moving_avg_share),
    'ShareDelta': Model.get_percent_change(moving_avg_share),
    'AvgHouseholdViewers': Model.get_first_last(moving_avg_viewers),
    'ViewersDelta': Model.get_percent_change(moving_avg_viewers)
}

for key, val in data.items():
    print(f'{key}: {val}')

Years: ['1967-1976', '2012-2022']
AvgRating: [24, 5]
RatingDelta: [nan, 0.79]
AvgShare: [47, 11]
ShareDelta: [nan, 0.77]
AvgHouseholdViewers: [14855000, 7172572]
ViewersDelta: [nan, 0.52]


In [16]:
# create dataframe with first and last records

compare_first_last = pd.DataFrame(data)
display(compare_first_last)

Unnamed: 0,Years,AvgRating,RatingDelta,AvgShare,ShareDelta,AvgHouseholdViewers,ViewersDelta
0,1967-1976,24,,47,,14855000,
1,2012-2022,5,0.79,11,0.77,7172572,0.52


After reviewing television viewing data from MLB All-Star games we observe a massive decrease in viewership. From the first time period in this dataset (1967-1976) to the most recent available time period (2012-2022); the average rating for All-Star games dropped by 79%, the average share of the viewership market decreased by 77%, and the number of average household viewers fell by 52%.