In [1]:
# Run cell to import libraries and load data sets
import geopandas as gpd 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import contextily 
import mapclassify 
import folium
import aiohttp
import fsspec
import warnings
warnings.filterwarnings('ignore')
plt.style.use("ggplot")

# Load the COVID-19 data:
df_cases = pd.read_csv("https://raw.githubusercontent.com/PBLarcher/COVID-19-data-visualization/refs/heads/main/cases_data.csv")
df_deaths = pd.read_csv("https://raw.githubusercontent.com/PBLarcher/COVID-19-data-visualization/refs/heads/main/deaths_data.csv")    

# Load the GeoDataFrame containing United States geometry shapes (at a county-level):
county_shapes = "https://github.com/PBLarcher/COVID-19-data-visualization/blob/main/counties_geometry.zip?raw=true"
with fsspec.open(county_shapes) as counties_file:
    county_shapes = gpd.read_file(counties_file)

In [3]:
## project here

#User Interaction Section
#------------------------------------

fName = input('Please enter your first name:').title()
state_input = input(f'{fName}, enter any U.S. State (e.g Alabama) of your choice\nto view its respective COVID-19 data:\n ').title()
#------------------------------------

### Statistics shown:

print(f'\nCovid-19 Statistics in {state_input}:')

#ground zero calc
date_columns = df_cases.columns[9:]

#filtration of state
state_df = df_cases[df_cases['STATE'] == state_input]

#sum cases by the date
state_totals = state_df[date_columns].sum()
state_totals.index = pd.to_datetime(state_totals.index)

#Find the first date greater than 0
ground_zero = state_totals[state_totals > 0].index[0]
ground_zero_format = ground_zero.strftime('%B %d, %Y')
print(f'Ground Zero of COVID-19 in {state_input} happened on {ground_zero_format}\n')

'''Date transformation, featuring Melting and Grouping.'''

#meltington for cases then deaths

cases_long = pd.melt(
    df_cases,
    id_vars=['STATE','COUNTY'],
    value_vars = date_columns,
    var_name = 'date',
    value_name = 'cases'
)

deaths_long = pd.melt(
    df_deaths,
    id_vars = ['state','county'],
    value_vars=date_columns,
    var_name = 'date',
    value_name = 'deaths'    
)

#turning to datetime conversion
cases_long['date'] = pd.to_datetime(cases_long['date'])
deaths_long['date'] = pd.to_datetime(deaths_long['date'])

#filter on state user choice
scases_long = cases_long[cases_long['STATE'] == state_input]
sdeaths_long = deaths_long[deaths_long['state'] == state_input]

#date grouping for daily
cases_group = scases_long.groupby('date')['cases'].sum()
deaths_group = sdeaths_long.groupby('date')['deaths'].sum()

#usages of diff() to calculate daily C/D's
#We are intentionally keeping NaNs and negative values here as per raw data
new_cases = cases_group.diff()
new_deaths = deaths_group.diff()

'''stat calculation'''
#year boundaries
end_20 = pd.to_datetime('2020-12-31')
start_21 = pd.to_datetime('2021-01-01')
end_21 = pd.to_datetime('2021-12-31')

#2020 slice: ground zero to dec 31 2020
mask_20 = (new_cases.index >= ground_zero) & (new_cases.index <= end_20)
new_cases_20 = new_cases[mask_20]
new_deaths_20 = new_deaths[mask_20]

#2021 slice: jan 1 2021 to dec 31 2021
mask_21 = (new_cases.index >= start_21) & (new_cases.index <= end_21)
new_cases_21 = new_cases[mask_21]
new_deaths_21 = new_deaths[mask_21]

#die mathematiks fur totals und avg
#2020 stats
total_cases_20 = new_cases_20.sum()
avg_case_20 = new_cases_20.mean()
total_deaths_20 = new_deaths_20.sum()
avg_deaths_20 = new_deaths_20.mean()

#2021 stats
total_cases_21 = new_cases_21.sum()
avg_cases_21 = new_cases_21.mean()
total_deaths_21 = new_deaths_21.sum()
avg_deaths_21 = new_deaths_21.mean()

#cumulative by the end of 2021 dec 31
cum_cases = cases_group[end_21]
cum_deaths = deaths_group[end_21]

ground_zero_print = ground_zero.strftime('%B %d')

#stats printing
print(f'{state_input} data for both 2020 & 2021 with total:\n')

print(f'2020 (from {ground_zero_print}):')
print(f'   - Total reported cases: {int(total_cases_20):,}')
print(f'   - Average daily new cases: {avg_case_20:,.2f}')
print(f'   - Total reported deaths: {int(total_deaths_20):,}')
print(f'   - Average daily new deaths: {(avg_deaths_20):,.2f}\n')

print(f'2021:')
print(f'   - Total reported cases: {int(total_cases_21):,}')
print(f'   - Average daily new cases: {avg_cases_21:,.2f}')
print(f'   - Total reported deaths: {int(total_deaths_21):,}')
print(f'   - Average daily new deaths: {avg_deaths_21:,.2f}\n')


print(f'Cumulative Totals in {state_input} (as of December 31, 2021):')
print(f'   - Total cases: {int(cum_cases):,}')
print(f'   - Total deaths: {int(cum_deaths):,}\n')

#Visualization options
print(f'{fName}, please choose a data visualization option for {state_input}')
print(f' 1. View four subplots showing COVID-19 trends in {state_input} (2020-2021):')
print(f'   * Bar Chart of Daily New Cases\n   * Line Chart of Cumulative Cases Trend')
print(f'   * Bar Chart of Daily New Deaths\n   * Line Chart of Cumulative Deaths Trend')
print('  _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-\n')
print(f' 2. View a choropleth map showing total reported cases and deaths by county in {state_input} as of December 31, 2021\n')

user_choice = int(input('Enter your Choice (1 or 2):'))

if user_choice == 1:
    # Filter data for the plot range (Ground Zero to End of 2021)
    plt_range = (new_cases.index >= ground_zero) & (new_cases.index <= end_21)

    plot_daily_cases = new_cases[plt_range]
    plot_deaths_daily = new_deaths[plt_range]
    plot_cases_cum = cases_group[plt_range]
    plot_deaths_cum = deaths_group[plt_range]

    #subplot setup
    fig, axes = plt.subplots(2,2,figsize=(12,9))
    fig.suptitle(f'{state_input} COVID 19 report for {fName}')

    #P1: Daily New Cases
    axes[0,0].bar(plot_daily_cases.index, plot_daily_cases)
    axes[0,0].set_title('1. Bar Chart of Daily New Cases')
    axes[0,0].set_ylabel('Count')
    axes[0,0].set_xlabel('Date')

    #P2: Cumulative Cases
    axes[0,1].plot(plot_cases_cum.index, plot_cases_cum)
    axes[0,1].set_title('2. Line Chart of Cumulative Cases Trend')
    axes[0,1].set_ylabel('Total Count')
    axes[0,1].set_xlabel('Date')
    axes[0,1].yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
    #^ this small little line of code makes it so I get rid of the scientific notations
    
    #P3: Daily New Deaths
    axes[1,0].bar(plot_deaths_daily.index, plot_deaths_daily)
    axes[1,0].set_title('3. Bar Chart of Daily New Deaths')
    axes[1,0].set_ylabel('Count')
    axes[1,0].set_xlabel('Date')

    #P4: Cumulative Deaths
    axes[1,1].plot(plot_deaths_cum.index, plot_deaths_cum)
    axes[1,1].set_title('4. Line Chart of Cumulative Deaths Trend')
    axes[1,1].set_ylabel('Total Count')
    axes[1,1].set_xlabel('Date')

    for ax in axes.flat:
        ax.tick_params(axis = 'x', rotation = 45)

    plt.tight_layout()
    plt.show()

elif user_choice == 2:
    #Map Logic, ts mad annoying to figure out, 50 errors later
    
    map_date = '12/31/2021'
    
    #Cases has uppercase 'FIPS', Deaths has lowercase 'fips', kindof like the problem I had had up top in stats
    map_cases = df_cases[df_cases['STATE'] == state_input][['FIPS', 'COUNTY', 'STATE', map_date]].copy()
    map_deaths = df_deaths[df_deaths['state'] == state_input][['fips', map_date]].copy()
    #renaming of the columns
    map_cases.rename(columns={map_date: 'Total Cases'}, inplace=True)
    map_deaths.rename(columns={map_date: 'Total Deaths', 'fips': 'FIPS'}, inplace=True)

    #Merge Cases and Deaths
    covid_map_data = pd.merge(map_cases, map_deaths, on='FIPS')

    #Remove rows with no FIPS code
    covid_map_data = covid_map_data.dropna(subset=['FIPS'])
    covid_map_data['FIPS'] = covid_map_data['FIPS'].astype(int).astype(str).str.zfill(5)

    #Create a copy so we don't modify the og
    state_shape = county_shapes.copy()
    state_shape['FIPS_BEA'] = state_shape['FIPS_BEA'].astype(int).astype(str).str.zfill(5)
    # Rename FIPS_BEA to FIPS
    state_shape.rename(columns={'FIPS_BEA': 'FIPS'}, inplace=True)

    #Merge Shapefile and COVID Data
    final_map_data = state_shape.merge(covid_map_data, on='FIPS', how='inner')

    #filling in empty values, IK this should need to be done, but Ive been running into stupid issues where it would give me errors and empty data
    final_map_data['Total Cases'] = final_map_data['Total Cases'].fillna(0)
    final_map_data['Total Deaths'] = final_map_data['Total Deaths'].fillna(0)


    #Map Making
    print(f'Interactive chloropleth map: Total Reported Cases and Deaths in {state_input}')

    #die second layer so we can get the actual number to show up on the tooltip
    m = final_map_data.explore(
        name='Total Deaths',
        cmap='YlOrRd',
        column='Total Deaths',
        scheme='equalinterval',
        tooltip=['STATE', 'COUNTY', 'Total Cases', 'Total Deaths'],
        k=5,
        legend=False,
        style_kwds={'color': 'black', 'weight': 0.5}
    )

    map_combined = final_map_data.explore(
        m = m,
        name='Total Cases',
        cmap='YlOrRd',
        column='Total Cases',
        scheme='equalinterval',
        tooltip=['STATE','COUNTY', 'Total Cases', 'Total Deaths'],
        k=5,
        legend=True,
        style_kwds={'color': 'black', 'weight': 0.5}
    )
    display(map_combined)
else:
    quit
##I know the project does ask for an else statement, 
##but it also does not tell me to NOT add one. 
##I know that [quit] is essentially the code break without throwing errors, 
##meaning this just kills the code from running without that error. 
##I know we werent taught about the quit or break function in MIS433, 
##but I was taught in IT109 so it makes sense to use it to show a I alreayd have prior python knowledge''' 


Please enter your first name: paxton
Paxton, enter any U.S. State (e.g Alabama) of your choice
to view its respective COVID-19 data:
  texas



Covid-19 Statistics in Texas:
Ground Zero of COVID-19 in Texas happened on March 05, 2020

Texas data for both 2020 & 2021 with total:

2020 (from March 05):
   - Total reported cases: 1,805,998
   - Average daily new cases: 5,980.13
   - Total reported deaths: 28,090
   - Average daily new deaths: 93.01

2021:
   - Total reported cases: 2,820,753
   - Average daily new cases: 7,728.09
   - Total reported deaths: 47,550
   - Average daily new deaths: 130.27

Cumulative Totals in Texas (as of December 31, 2021):
   - Total cases: 4,626,751
   - Total deaths: 75,640

Paxton, please choose a data visualization option for Texas
 1. View four subplots showing COVID-19 trends in Texas (2020-2021):
   * Bar Chart of Daily New Cases
   * Line Chart of Cumulative Cases Trend
   * Bar Chart of Daily New Deaths
   * Line Chart of Cumulative Deaths Trend
  _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-

 2. View a choropleth map showing total repo

Enter your Choice (1 or 2): 2


Interactive chloropleth map: Total Reported Cases and Deaths in Texas
