In [1]:
import zipfile
import os

# Define the path to the uploaded zip file
zip_path = "AQI_BY_COUNTY_25_YEARS.zip"
extractedDir = "/aqi_data"

# Extract the ZIP file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extractedDir)

# List extracted files
extracted_files = os.listdir(extractedDir)
extracted_files

['AQI_BY_COUNTY_25_YEARS']

In [2]:
subdir_path = os.path.join(extractedDir, 'AQI_BY_COUNTY_25_YEARS')
subdir_contents = os.listdir(subdir_path)
#subdir_contents

In [3]:
import pandas as pd

# Load and combine all CSV files into a single DataFrame
all_files = [os.path.join(subdir_path, f) for f in subdir_contents if f.endswith('.csv')]
df_list = [pd.read_csv(file) for file in all_files]
aqi_df = pd.concat(df_list, ignore_index=True)

# Show the first few rows and column names for inspection
#aqi_df.head(), aqi_df.columns.tolist()

In [4]:
#!jupyter nbextension enable --py widgetsnbextension --sys-prefix
#!jupyter nbextension install --py widgetsnbextension --sys-prefix

In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load the preprocessed data (assumes it's loaded into `aqiDf`)
aqiDf = pd.read_csv("AQI_BY_COUNTY_25_YEARS/annual_aqi_by_county_2024.csv")
for year in range(2000, 2024):
    yearly = pd.read_csv(f"AQI_BY_COUNTY_25_YEARS/annual_aqi_by_county_{year}.csv")
    aqiDf = pd.concat([aqiDf, yearly], ignore_index=True)

# Widgets for interaction
stateWidget = widgets.Dropdown(
    options=sorted(aqiDf['State'].unique()),
    description='State:',
    layout=widgets.Layout(width='50%')
)

countyWidget = widgets.Dropdown(
    options=[],
    description='County:',
    layout=widgets.Layout(width='50%')
)

yearRange = widgets.IntRangeSlider(
    value=[2000, 2024],
    min=2000,
    max=2024,
    step=1,
    description='Year Range:',
    continuous_update=False,
    layout=widgets.Layout(width='90%')
)

output = widgets.Output()

def updateCountyOptions(*args):
    selectedState = stateWidget.value
    counties = sorted(aqiDf[aqiDf['State'] == selectedState]['County'].unique())
    countyWidget.options = counties

stateWidget.observe(updateCountyOptions, 'value')
updateCountyOptions()

def showSummary(change=None):
    with output:
        clear_output()
        state = stateWidget.value
        county = countyWidget.value
        yearMin, yearMax = yearRange.value

        dfFiltered = aqiDf[
            (aqiDf['State'] == state) &
            (aqiDf['County'] == county) &
            (aqiDf['Year'] >= yearMin) &
            (aqiDf['Year'] <= yearMax)
        ]

        if dfFiltered.empty:
            print("No data available for the selected options.")
        else:
            display(dfFiltered.describe(include='all'))
            display(dfFiltered.groupby('Year')[[ 
                'Good Days', 'Moderate Days', 'Unhealthy for Sensitive Groups Days',
                'Unhealthy Days', 'Very Unhealthy Days', 'Hazardous Days',
                'Max AQI', 'Median AQI', 'Days with AQI']].sum())

submitBtn = widgets.Button(description="Show Report", button_style='primary')
submitBtn.on_click(showSummary)

# Display all widgets and output
ui = widgets.VBox([stateWidget, countyWidget, yearRange, submitBtn, output])
display(ui)

VBox(children=(Dropdown(description='State:', layout=Layout(width='50%'), options=('Alabama', 'Alaska', 'Arizo…