# Import

In [1]:
import os
import pandas as pd

In [5]:
# 定义文件夹路径
input_folder = r"D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\BreatheLondon"
output_folder = r"D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\BreatheLondon2"

# 确保输出文件夹存在
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [3]:
# 定义研究区域文件夹
study_areas = ["Camden", "City of London", "Islington", "Kensington and Chelsea", "Lambeth", "Southwark", "Westminster"]

#  Unify TimeScale of each.csv 

In [14]:

# Process each study area
for area in study_areas:
    area_input_folder = os.path.join(input_folder, area)
    area_output_folder = os.path.join(output_folder, area)
    os.makedirs(area_output_folder, exist_ok=True)

    # Process each CSV file in the study area folder
    for filename in os.listdir(area_input_folder):
        if "NO2" in filename or "PM2.5" in filename:
            file_path = os.path.join(area_input_folder, filename)
            
            # Read the CSV file with encoding handling
            try:
                df = pd.read_csv(file_path, encoding='utf-8')
            except UnicodeDecodeError:
                try:
                    df = pd.read_csv(file_path, encoding='latin1')
                except Exception as e:
                    print(f"Failed to read {file_path} with error: {e}")
                    continue
            
            # Check and rename columns if necessary
            if 'Category' not in df.columns:
                print(f"'Category' column not found in {file_path}. Available columns: {df.columns}")
                continue
            
            # Clean data by dropping NaN values
            df = df.dropna()
            
            # Extract the date from 'Category' column
            df['Date'] = pd.to_datetime(df['Category'], dayfirst=True).dt.date
            
            # Determine the air quality type (NO2 or PM2.5)
            if "NO2" in filename:
                pollutant_type = "NO2"
                pollutant_column = "Nitrogen dioxide"
            elif "PM2.5" in filename:
                pollutant_type = "PM2.5"
                pollutant_column = "PM<sub>2.5</sub> particulates"
            else:
                continue
            
            # Calculate daily max, mean, and min values
            daily_stats = df.groupby('Date')[pollutant_column].agg(['mean', 'max', 'min']).reset_index()
            daily_stats.columns = ['Date', f'{pollutant_type}_mean', f'{pollutant_type}_max', f'{pollutant_type}_min']
            
            # Generate the new file name
            new_filename = filename.replace("Breathe London - ", "").replace(f" - {pollutant_type}.csv", f"_{pollutant_type}.csv")
            new_file_path = os.path.join(area_output_folder, new_filename)
            
            # Save the new CSV file
            daily_stats.to_csv(new_file_path, index=False, encoding='utf-8')

            print(f"Processed and saved: {new_file_path}")


Processed and saved: D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\BreatheLondon2\Camden\Ampthill Square Estate_NO2.csv
Processed and saved: D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\BreatheLondon2\Camden\Ampthill Square Estate_PM2.5.csv
Processed and saved: D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\BreatheLondon2\Camden\Brunswick Centre_NO2.csv
Processed and saved: D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\BreatheLondon2\Camden\Brunswick Centre_PM2.5.csv
Processed and saved: D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\BreatheLondon2\Camden\Christopher Hatton Primary School_NO2.csv
Processed and saved: D:\File_auto\0_UCL_CASA\OneDrive - University College London\Xiaoyi_disserta