# MLB All-Star Game Television Viewership Analysis

Source: https://www.baseball-almanac.com/asgbox/asgtv.shtml

In [1]:
# import packages
import os

import pandas as pd

import helpers.dataframe_helper as DataHelper
import helpers.utilities as ToadUtils
import models.baseball_almanac as Model
from helpers.downloader import Downloader
from models.baseball_almanac import MovingRangeCalc

Invoking __init__.py for helpers


In [2]:
# set variables

URL = 'https://raw.githubusercontent.com/BronzeToad/AllStarRosters/1.2.1/data/baseball-almanac/all_star_game_tv_stats.csv'
DATA_DIR = os.path.join(Model.ROOT_DIR, 'data', 'baseball-almanac')

print(f'URL: {URL}\n'
      f'DATA_DIR: {DATA_DIR}')

URL: https://raw.githubusercontent.com/BronzeToad/AllStarRosters/1.2.1/data/baseball-almanac/all_star_game_tv_stats.csv
DATA_DIR: /Users/ajp/Documents/Projects/AllStarRosters/data/baseball-almanac


In [3]:
# download viewership data

Downloader.download(url=URL, save_dir=DATA_DIR)


--------------------
Filename : all_star_game_tv_stats.csv
URL : https://raw.githubusercontent.com/BronzeToad/AllStarRosters/1.2.1/data/baseball-almanac/all_star_game_tv_stats.csv
Status good? : True
Content saved? : True


True

In [4]:
# create tv viewership dataframe

df = ToadUtils.get_csv(folder=DATA_DIR, filename=ToadUtils.get_filename_from_url(URL))

Dataframe rows: 55, Dataframe columns: 6


In [5]:
# update column names

new_col_names = {
    'Year | ASG': 'Year',
    'Households': 'HouseholdViewers'
}

df.rename(columns=new_col_names, inplace=True)
ToadUtils.prant(list(df.columns))


Printing list...
Year
Network
Rating
Share
HouseholdViewers
Viewers


In [6]:
# remove unneeded columns

df = DataHelper.drop(dataframe=df, columns=['Network', 'Viewers'])
print(df.dtypes)

Year                  int64
Rating              float64
Share                 int64
HouseholdViewers     object
dtype: object


In [7]:
# update data types

df['HouseholdViewers'] = pd.to_numeric(df['HouseholdViewers'].str.replace(',', ''))
print(df.dtypes)

Year                  int64
Rating              float64
Share                 int64
HouseholdViewers      int64
dtype: object


In [10]:
# get date range for moving averages

min_years = Model.get_moving_range(dataframe=df,
                                   column='Year',
                                   calculation=MovingRangeCalc.MIN)

max_years = Model.get_moving_range(dataframe=df,
                                   column='Year',
                                   calculation=MovingRangeCalc.MAX)

min_max_years = []
for minn, maxx in zip(min_years, max_years):
    min_max_years.append(f'{minn}-{maxx}')

In [11]:
# get moving average for df metrics

moving_avg_rating = Model.get_moving_range(dataframe=df,
                                           column='Rating',
                                           calculation=MovingRangeCalc.MEAN)

moving_avg_share = Model.get_moving_range(dataframe=df,
                                           column='Share',
                                           calculation=MovingRangeCalc.MEAN)

moving_avg_viewers = Model.get_moving_range(dataframe=df,
                                           column='HouseholdViewers',
                                           calculation=MovingRangeCalc.MEAN)

In [18]:
# create dictionary with first and last records
data = {
    'Years': Model.get_first_last(min_max_years),
    'AvgRating': Model.get_first_last(moving_avg_rating),
    'RatingDelta': Model.get_percent_change(moving_avg_rating),
    'AvgShare': Model.get_first_last(moving_avg_share),
    'ShareDelta': Model.get_percent_change(moving_avg_share),
    'AvgHouseholdViewers': Model.get_first_last(moving_avg_viewers),
    'ViewersDelta': Model.get_percent_change(moving_avg_viewers)
}

ToadUtils.prant(data)


Printing dictionary...
Years: ['1967-1976', '2012-2022']
AvgRating: [24, 5]
RatingDelta: [nan, 0.79]
AvgShare: [47, 11]
ShareDelta: [nan, 0.77]
AvgHouseholdViewers: [14855000, 7172572]
ViewersDelta: [nan, 0.52]


In [19]:
# create dataframe with first and last records

compare_first_last = pd.DataFrame(data)
display(compare_first_last)

Unnamed: 0,Years,AvgRating,RatingDelta,AvgShare,ShareDelta,AvgHouseholdViewers,ViewersDelta
0,1967-1976,24,,47,,14855000,
1,2012-2022,5,0.79,11,0.77,7172572,0.52


After reviewing television viewing data from MLB All-Star games we observe a massive decrease in viewership. From the first time period in this dataset (1967-1976) to the most recent available time period (2012-2022); the average rating for All-Star games dropped by 76%, the average share of the viewership market decreased by 77%, and the number of average household viewers fell by 52%.