# UNMC Air Quality Q & A:

### 1. What are the 5 locations in Nebraska with the highest mean and median concentrations of VOC, PM 2.5 and PM 10.0?

In [53]:
import pandas as pd
air_data = pd.read_csv('AirQuality_Daily_StudentVersion.csv') 
air_data = pd.DataFrame(air_data)

PM2_5 = air_data.groupby(['sensor.name']).agg(mean = ('pm2.5_atm', 'mean'),
                                                     median = ('pm2.5_atm', 'median')).sort_values(['mean', 'median'], ascending=False).head(5)

VOC = air_data.groupby(['sensor.name']).agg(mean = ('voc', 'mean'),
                                                     median = ('voc', 'median')).sort_values(['mean', 'median'], ascending=False).head(5)

PM10 = air_data.groupby(['sensor.name']).agg(mean = ('pm10.0_atm', 'mean'),
                                                     median = ('pm10.0_atm', 'median')).sort_values(['mean', 'median'], ascending=False).head(5)
print("Top 5 Locations for PM2.5:")
print(PM2_5, "\n")

print("Top 5 Locations for PM10:")
print(PM10, "\n")

print("Top 5 Locations for VOC:")
print(VOC)


Top 5 Locations for PM2.5:
                                                          mean     median
sensor.name                                                              
Broken Bow                                          928.710593  36.050240
#16 - Richardson County Courthouse                  700.127342  11.977344
#18 - Southeast District Health Department- Tec...  613.175352  10.322875
NCDHD O'Neill #11                                   164.495078   7.251208
Swnphd-mccook                                       123.011622   4.582281 

Top 5 Locations for PM10:
                                                          mean     median
sensor.name                                                              
Broken Bow                                          929.678512  43.179094
#16 - Richardson County Courthouse                  701.632446  13.305615
#18 - Southeast District Health Department- Tec...  614.227248  11.433729
NCDHD O'Neill #11                                   166.1

### 2. On what days did the maximum values occur and where did this maximums occur?

In [54]:
PM2_5max = air_data.loc[air_data['pm2.5_atm'].idxmax()] 
PM10_max = air_data.loc[air_data['pm10.0_atm'].idxmax()]
VOC_max  = air_data.loc[air_data['voc'].idxmax()]

summary = pd.DataFrame({
    'Pollutant': ['PM2.5', 'PM10', 'VOC'],
    'Max Value': [PM2_5max['pm2.5_atm'], PM10_max['pm10.0_atm'], VOC_max['voc']],
    'Date': [PM2_5max['date'], PM10_max['date'], VOC_max['date']],
    'Sensor Name & Location': [PM2_5max['sensor.name'], PM10_max['sensor.name'], VOC_max['sensor.name']]
})

print(summary)




  Pollutant    Max Value      Date              Sensor Name & Location
0     PM2.5  3782.823313  02/18/25  #16 - Richardson County Courthouse
1      PM10  3784.682542  02/18/25  #16 - Richardson County Courthouse
2       VOC  1209.931571  06/24/24                     Swnphd-ogallala


### 3. Does humidity and temperature have a noticeable effect on air quality?

In [55]:
def categorize_humidity(humidity):
    if humidity < 50:
        return 'Low'
    elif humidity <= 80:
        return 'High'
    else:
        return 'Very High'

def categorize_temperature(temperature):
    if temperature < 32:
        return 'Below Freezing'
    elif temperature <= 50:
        return 'Cool'
    elif temperature <= 70:
        return 'Warm'
    else:
        return 'Hot'

air_data['Humidity Category'] = air_data['humidity'].apply(categorize_humidity)
air_data['Temperature Category'] = air_data['temperature'].apply(categorize_temperature)

correlation = air_data[['pm2.5_atm', 'pm10.0_atm', 'voc', 'humidity', 'temperature']].corr()

print("\n Average Correlation Between Pollutants, Humidity, and Temperature:")
print(correlation)

for sensor, group in air_data.groupby('sensor.name'):
    print(f"\nCorrelation for sensor: {sensor}")
    corr = group[['pm2.5_atm', 'pm10.0_atm', 'voc', 'humidity', 'temperature']].corr()
    print(corr)






 Average Correlation Between Pollutants, Humidity, and Temperature:
             pm2.5_atm  pm10.0_atm       voc  humidity  temperature
pm2.5_atm     1.000000    0.999990 -0.012408 -0.008327    -0.197996
pm10.0_atm    0.999990    1.000000 -0.011899 -0.008054    -0.198141
voc          -0.012408   -0.011899  1.000000 -0.067082     0.035466
humidity     -0.008327   -0.008054 -0.067082  1.000000    -0.012772
temperature  -0.197996   -0.198141  0.035466 -0.012772     1.000000

Correlation for sensor: #16 - Richardson County Courthouse
             pm2.5_atm  pm10.0_atm       voc  humidity  temperature
pm2.5_atm     1.000000    0.999999  0.018535  0.016993     0.148728
pm10.0_atm    0.999999    1.000000  0.020175  0.016432     0.148801
voc           0.018535    0.020175  1.000000 -0.495272     0.058156
humidity      0.016993    0.016432 -0.495272  1.000000    -0.109179
temperature   0.148728    0.148801  0.058156 -0.109179     1.000000

Correlation for sensor: #17 - Otoe County
            

### 4. Have there been any Air Quality Index (AQI) health risks (unhealthy for sensitive populations) at any of the locations in the dataset for PM 2.5 and PM 10 based on the EPAâ€™s AQI ratings?

In [57]:

def calc_aqi_pm25(c):
    bp = [
        (0.0, 12.0, 0, 50),
        (12.1, 35.4, 51, 100),
        (35.5, 55.4, 101, 150),
        (55.5, 150.4, 151, 200),
        (150.5, 250.4, 201, 300),
        (250.5, 350.4, 301, 400),
        (350.5, 500.4, 401, 500)
    ]
    for low, high, aqi_low, aqi_high in bp:
        if low <= c <= high:
            return ((aqi_high - aqi_low) / (high - low)) * (c - low) + aqi_low
    return None

def calc_aqi_pm10(c):
    bp = [
        (0, 54, 0, 50),
        (55, 154, 51, 100),
        (155, 254, 101, 150),
        (255, 354, 151, 200),
        (355, 424, 201, 300),
        (425, 504, 301, 400),
        (505, 604, 401, 500)
    ]
    for low, high, aqi_low, aqi_high in bp:
        if low <= c <= high:
            return ((aqi_high - aqi_low) / (high - low)) * (c - low) + aqi_low
    return None

air_data['AQI_PM25'] = air_data['pm2.5_atm'].apply(calc_aqi_pm25)
air_data['AQI_PM10'] = air_data['pm10.0_atm'].apply(calc_aqi_pm10)

risk_rows = air_data[
    ((air_data['AQI_PM25'] >= 101) & (air_data['AQI_PM25'] <= 150)) |
    ((air_data['AQI_PM10'] >= 101) & (air_data['AQI_PM10'] <= 150))
]
if not risk_rows.empty:
    risk_rows_sorted = (
        risk_rows
        .assign(Max_AQI=risk_rows[['AQI_PM25', 'AQI_PM10']].max(axis=1))
        .sort_values(by='Max_AQI', ascending=False)
    )

    print("Rows where AQI reached 'Unhealthy for Sensitive Groups:")
    print(risk_rows_sorted[['date', 'sensor.name', 'AQI_PM25', 'AQI_PM10']])
else:
    print("No rows reached 'Unhealthy for Sensitive Groups' for PM2.5 or PM10.")


Rows where AQI reached 'Unhealthy for Sensitive Groups:
          date                                        sensor.name    AQI_PM25  \
2144  06/14/24                Loup Basin Public Health Department  280.261875   
6076  12/08/24                 #16 - Richardson County Courthouse  249.049561   
2443  06/27/24                                   Swnphd-Benklemen  238.317521   
1350  05/14/24                               PHS Fairbury JCHL 13  143.533867   
7493  02/15/25                                 ELVPHD Wisner HD 5  141.590744   
...        ...                                                ...         ...   
6720  01/09/25                                  #17 - Otoe County  101.606447   
7836  03/02/25                                      Swnphd-mccook  101.587261   
3799  08/21/24  South Heartland District Health Dept. Hastings...  101.431469   
3208  07/27/24                Loup Basin Public Health Department  101.072689   
1357  05/14/24                                       