In [1]:
### Import Packages

# File manipulation

import os # For working with Operating System
import requests # Accessing the Web
import datetime as dt # Working with dates/times
import io # Input/Output Bytes objects

# Analysis

import numpy as np
import pandas as pd

## Data

In [2]:
df = pd.read_csv('example_df.csv')

In [3]:
df

Unnamed: 0,timestamp,pm25,is_spike
0,2022-06-15 22:10:00,1.374,False
1,2022-06-15 02:50:00,2.326,False
2,2022-06-15 06:20:00,1.310,False
3,2022-06-15 21:00:00,1.362,False
4,2022-06-15 23:40:00,1.853,False
...,...,...,...
139,2022-06-15 20:20:00,2.952,False
140,2022-06-15 19:00:00,3.773,False
141,2022-06-15 17:00:00,0.886,False
142,2022-06-15 05:00:00,2.331,False


In [4]:
#daily time above 12 ug

# AQI 24 hr average index values from https://www.airnow.gov/sites/default/files/2020-05/aqi-technical-assistance-document-sept2018.pdf
# 0 - 12 Good
# 12.1 - 35.4 Moderate
# 35.5 - 55.4 Unhealthy for Sensative Groups
# 55.5 - 150.4 Unhealthy
# 150.5 - 250.4 Very Unhealthy
# 250.5 - 500.4 Hazardous
# 500.4 + Beyond the AQI, use Hazardous information

#convert timestamp to pandas datetime object for comparison
df['timestamp'] = pd.to_datetime(df.timestamp)

# Filter rows where pm25 values are above 12
df_above_12 = df[df['pm25'] > 12]

# Calculate time difference between timestamps
time_diff = df_above_12['timestamp'].diff()

# Sum up the time differences to get the total time above 12
total_time_above_12 = time_diff.sum()

print(total_time_above_12)




0 days 00:00:00


In [7]:
# function to find mean, min, max and std of morning rush hour

df['timestamp'] = pd.to_datetime(df.timestamp)

def pm_25_MorningRushStats(df):
    df_6_to_9 = df.loc[(df['timestamp'].dt.hour >= 6) & (df['timestamp'].dt.hour < 9)]
    
    pm25_morningRushmean = df_6_to_9['pm25'].mean()
    pm25_morningRushmin = df_6_to_9['pm25'].min()
    pm25_morningRushmax = df_6_to_9['pm25'].max()
    pm25_morningRushstd = df_6_to_9['pm25'].std()
    
    # Return the results as a list
    return [pm25_morningRushmean, pm25_morningRushmin, pm25_morningRushmax, pm25_morningRushstd]

stats = pm_25_MorningRushStats(df)

print(stats)

[0.9539444444444444, 0.604, 1.323, 0.21824823217994962]
