In [None]:
import os
import pandas as pd
import numpy as np
import ee

In [2]:
# 认证并初始化Google Earth Engine
ee.Authenticate()  # 只需要在第一次运行时执行
ee.Initialize(project= 'ee-zongrong-flood')

In [None]:
# Load the image collection and select the band
collection = ee.ImageCollection('COPERNICUS/S5P/NRTI/L3_CO') \
    .select('CO_column_number_density') \
    .filterDate('2023-12-01', '2023-12-11')

# Calculate the mean image for the given period
mean_image = collection.mean()

# Calculate the min and max values for the CO_column_number_density band
min_max = mean_image.reduceRegion(
    reducer=ee.Reducer.minMax(),
    geometry=ee.Geometry.BBox(-180, -90, 180, 90),
    scale=1113.2,  # Approximately 1km scale
    bestEffort=True
).getInfo()

min_value = min_max['CO_column_number_density_min']
max_value = min_max['CO_column_number_density_max']

print(f"CO_column_number_density - Min value: {min_value}, Max value: {max_value}")

In [None]:
# Function to get CO value at given coordinates and normalize it
def get_co_value(coords):
    point = ee.Geometry.Point(coords)
    co_value = mean_image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=point,
        scale=1113.2
    ).get('CO_column_number_density')
    
    value = co_value.getInfo()
    if value is None:
        # If the value is None, use neighborhood interpolation to fill the gap
        co_value = mean_image.reduceNeighborhood(
            reducer=ee.Reducer.mean(),
            kernel=ee.Kernel.square(5),  # Increase the kernel size
            optimization='boxcar'
        ).reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=point,
            scale=1113.2,
            bestEffort=True
        ).get('CO_column_number_density')
        
        value = co_value.getInfo()
        if value is None:
            return 0
    
    # Apply conditional normalization
    if value > 50:
        normalized_value = 10
    else:
        normalized_value = (value - min_value) / (max_value - min_value) * 10

    print(f"Coordinates: {coords}, Original value: {value}, Normalized value: {normalized_value}")  # Debugging line
    return normalized_value

# Specify the input and output directories
input_dir = 'C:/Users/lzr/SSCI/StreetviewLLM/groudtruth/Sampling'
output_dir = 'C:/Users/lzr/SSCI/StreetviewLLM/groudtruth/air_quality'

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process each CSV file in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.csv'):
        input_csv = os.path.join(input_dir, filename)
        df = pd.read_csv(input_csv)
        
        # Create a new column for CO values
        df['CO_2019'] = df.apply(lambda row: get_co_value([row['longitude'], row['latitude']]), axis=1)
        
        # Define the output filename
        output_csv = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}_air_quality.csv")
        
        # Save the new dataframe to a CSV file
        df.to_csv(output_csv, index=False)
        
        print(f"Processed {input_csv} and saved with CO density values as {output_csv}")

print("All CSV files processed.")