In [1]:
### Import Packages

# File manipulation

import os # For working with Operating System
import requests # Accessing the Web
import datetime as dt # Working with dates/times
import io # Input/Output Bytes objects

# Analysis

import numpy as np
import pandas as pd

## Data

In [21]:
df = pd.read_csv('example_df.csv')

In [22]:
df

Unnamed: 0,timestamp,pm25,is_spike
0,2022-06-15 22:10:00,1.374,False
1,2022-06-15 02:50:00,2.326,False
2,2022-06-15 06:20:00,1.310,False
3,2022-06-15 21:00:00,1.362,False
4,2022-06-15 23:40:00,1.853,False
...,...,...,...
139,2022-06-15 20:20:00,2.952,False
140,2022-06-15 19:00:00,3.773,False
141,2022-06-15 17:00:00,0.886,False
142,2022-06-15 05:00:00,2.331,False


In [7]:
PM_rushhour_stats = pd.DataFrame()
# set time range for filtering
df['timestamp'] = pd.to_datetime(df['timestamp'])
start_time = pd.to_datetime('15:00:00', format='%H:%M:%S').time()
end_time = pd.to_datetime('18:30:00', format='%H:%M:%S').time()

# iterate over unique dates in dataframe
for date in df['timestamp'].dt.date.unique():
    # filter dataframe by date and time range
    filtered_df = df[(df['timestamp'].dt.date == date) & (df['timestamp'].dt.time >= start_time) & (df['timestamp'].dt.time <= end_time)]

    # find minimum PM2.5 reading and corresponding timestamp
    min_pm25 = filtered_df['pm25'].min()
    min_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmin(), 'timestamp']
    
    max_pm25 = filtered_df['pm25'].max()
    max_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmax(), 'timestamp']
    
    mean_pm25 = filtered_df['pm25'].mean()
    sd_pm25 = filtered_df['pm25'].std()

    # create new row with results
    new_row = pd.DataFrame({'Min_Timestamp': min_pm25_timestamp, 'Min_PM25': min_pm25, 
                            'Max_Timestamp': max_pm25_timestamp, 'Max_PM25': max_pm25,
                            'Mean_PM25': mean_pm25, 'SD_PM25': sd_pm25}, index=[0])
    
    # add results to PM_rushhour_stats dataframe
    PM_rushhour_stats = pd.concat([PM_rushhour_stats, new_row], ignore_index=True)


PM_rushhour_stats

Unnamed: 0,Min_Timestamp,Min_PM25,Max_Timestamp,Max_PM25,Mean_PM25,SD_PM25
0,2022-06-15 15:40:00,0.553,2022-06-15 18:20:00,3.259,1.282,0.887156


In [18]:
# create a new column that holds the date
PM_rushhour_stats = pd.DataFrame()
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = df['timestamp'].dt.date

# create a new column that holds the time component of the timestamp
df['time'] = df['timestamp'].dt.time

# set time range for filtering
start_time = pd.to_datetime('15:00:00', format='%H:%M:%S').time()
end_time = pd.to_datetime('18:30:00', format='%H:%M:%S').time()

# iterate over unique dates in dataframe
for date in df['date'].unique():
    # filter dataframe by date and time range
    filtered_df = df[(df['date'] == date) & (df['time'] >= start_time) & (df['time'] <= end_time)]

    # find minimum PM2.5 reading and corresponding timestamp
    min_pm25 = filtered_df['pm25'].min()
    min_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmin(), 'time']

    # find maximum PM2.5 reading and corresponding timestamp
    max_pm25 = filtered_df['pm25'].max()
    max_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmax(), 'time']

    # calculate mean and standard deviation of PM2.5 readings
    mean_pm25 = filtered_df['pm25'].mean()
    sd_pm25 = filtered_df['pm25'].std()

    # create new row with results
    new_row = pd.DataFrame({'pm25Time_eveningRush_min': min_pm25_timestamp, 'pm25_eveningRush_min': min_pm25,
                            'pm25Time_eveningRush_max': max_pm25_timestamp, 'pm25_eveningRush_max': max_pm25,
                            'pm25_eveningRush_mean': mean_pm25, 'pm25_eveningRush_sd': sd_pm25}, index=[0])

    # add results to PM_rushhour_stats dataframe
    PM_rushhour_stats = pd.concat([PM_rushhour_stats, new_row], ignore_index=True)
PM_rushhour_stats

Unnamed: 0,Date,Min_Timestamp,Min_PM25,Max_Timestamp,Max_PM25,Mean_PM25,SD_PM25
0,2022-06-15,15:40:00,0.553,18:20:00,3.259,1.282,0.887156


In [24]:
def get_pm_stats(df):
    # initialize dictionary to hold statistics
    pm_stats = {}

    # set time range for filtering
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    start_time = pd.to_datetime('15:00:00', format='%H:%M:%S').time()
    end_time = pd.to_datetime('18:30:00', format='%H:%M:%S').time()

    # iterate over unique dates in dataframe
    for date in df['timestamp'].dt.date.unique():
        # filter dataframe by date and time range
        filtered_df = df[(df['timestamp'].dt.date == date) & (df['timestamp'].dt.time >= start_time) & (df['timestamp'].dt.time <= end_time)]

        # calculate statistics
        min_pm25 = filtered_df['pm25'].min()
        max_pm25 = filtered_df['pm25'].max()
        mean_pm25 = filtered_df['pm25'].mean()
        sd_pm25 = filtered_df['pm25'].std()

        # store statistics in dictionary
        pm_stats[str(date)] = {
            'min': min_pm25,
            'max': max_pm25,
            'mean': mean_pm25,
            'sd': sd_pm25
        }

    return pm_stats


SyntaxError: invalid syntax (3191866285.py, line 29)

In [26]:
def (df):
    pm25_stats = []
    # set time range for filtering
    start_time = pd.to_datetime('15:00:00', format='%H:%M:%S').time()
    end_time = pd.to_datetime('18:30:00', format='%H:%M:%S').time()

    # iterate over unique dates in dataframe
    for date in df['timestamp'].dt.date.unique():
        # filter dataframe by date and time range
        filtered_df = df[(df['timestamp'].dt.date == date) & (df['timestamp'].dt.time >= start_time) & (df['timestamp'].dt.time <= end_time)]

        # find minimum and maximum PM2.5 readings and corresponding timestamps
        min_pm25 = filtered_df['pm25'].min()
        min_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmin(), 'timestamp'].time()
        max_pm25 = filtered_df['pm25'].max()
        max_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmax(), 'timestamp'].time()

        # calculate mean and standard deviation of PM2.5 readings
        mean_pm25 = filtered_df['pm25'].mean()
        std_pm25 = filtered_df['pm25'].std()

        # add results to list
        pm25_stats += [[min_pm25, min_pm25_timestamp, max_pm25, max_pm25_timestamp, mean_pm25, std_pm25]]

    return pm25_stats

In [None]:
def getEveningRushHourStats(df):
    pm25_stats = []
    # set time range for filtering
    start_time = pd.to_datetime('15:00:00', format='%H:%M:%S').time()
    end_time = pd.to_datetime('18:30:00', format='%H:%M:%S').time()

    # iterate over unique dates in dataframe
    for date in df['timestamp'].dt.date.unique():
        # filter dataframe by date and time range
        filtered_df = df[(df['timestamp'].dt.date == date) & (df['timestamp'].dt.time >= start_time) & (df['timestamp'].dt.time <= end_time)]

        # find minimum and maximum PM2.5 readings and corresponding timestamps
        min_pm25 = filtered_df['pm25'].min()
        min_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmin(), 'timestamp'].time()
        max_pm25 = filtered_df['pm25'].max()
        max_pm25_timestamp = filtered_df.loc[filtered_df['pm25'].idxmax(), 'timestamp'].time()

        # calculate mean and standard deviation of PM2.5 readings
        mean_pm25 = filtered_df['pm25'].mean()
        std_pm25 = filtered_df['pm25'].std()

        # add results to list
        pm25_stats += [[min_pm25, min_pm25_timestamp, max_pm25, max_pm25_timestamp, mean_pm25, std_pm25]]

    return pm25_stats