# Application

## Camden

### import

In [1]:
import os
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
from scipy.linalg import solve
from scipy.optimize import curve_fit
from geopy.distance import geodesic


In [2]:

# 读取数据
camden_no_path = r'D:\File_auto\\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\LondonAir\Camden\Camden-Nitric Oxide (ug m-3).csv'
coords_path = r'D:\File_auto\\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\AirQuality\LondonAir\coords_londonair.csv'
meteostat_folder = r'D:\File_auto\\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data\meteostat'

weighted_no_output_folder = r'D:\File_auto\\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data_output\NO_weighted'

camden_no_df = pd.read_csv(camden_no_path)
coords_df = pd.read_csv(coords_path)


# 数据预处理
camden_no_df['ReadingDateTime'] = pd.to_datetime(camden_no_df['ReadingDateTime'], format='%d/%m/%Y %H:%M')
camden_no_df['Date'] = camden_no_df['ReadingDateTime'].dt.date
coords_df[['Latitude', 'Longitude']] = coords_df['Latitude & Longitude'].str.split(', ', expand=True).astype(float)

# 合并坐标信息
camden_no_df = pd.merge(camden_no_df, coords_df[['Site', 'Latitude', 'Longitude']], on='Site', how='left')

camden_no_df.head()



Unnamed: 0,Site,Species,ReadingDateTime,Value,Units,Provisional or Ratified,Date,Latitude,Longitude
0,BL0,NO,2018-01-01,16.5,ug m-3,R,2018-01-01,51.522287,-0.125848
1,BL0,NO,2018-01-02,5.7,ug m-3,R,2018-01-02,51.522287,-0.125848
2,BL0,NO,2018-01-03,12.8,ug m-3,R,2018-01-03,51.522287,-0.125848
3,BL0,NO,2018-01-04,9.7,ug m-3,R,2018-01-04,51.522287,-0.125848
4,BL0,NO,2018-01-05,11.0,ug m-3,R,2018-01-05,51.522287,-0.125848


### def

In [3]:

# calculate_semivariogram 计算半方差函数
def calculate_semivariogram(data):
    num_points = len(data)
    semivariances = []

    for i in range(num_points):
        for j in range(i + 1, num_points):
            dist = np.linalg.norm([data['Latitude'].iloc[i] - data['Latitude'].iloc[j],
                                   data['Longitude'].iloc[i] - data['Longitude'].iloc[j]])
            squared_diff = (data['Value'].iloc[i] - data['Value'].iloc[j]) ** 2
            semivariances.append((dist, squared_diff))

    unique_distances = sorted(set([item[0] for item in semivariances]))
    avg_semivariances = []
    for dist in unique_distances:
        squared_diffs = [item[1] for item in semivariances if item[0] == dist]
        avg_semivariances.append((dist, np.mean(squared_diffs) / 2.0))

    return np.array(avg_semivariances)

# calculate_kriging_weights 计算克里金权重
def calculate_kriging_weights(semivariogram, distances, n, nugget=1e-10):
    A = np.zeros((n + 1, n + 1))
    
    for i in range(n):
        for j in range(n):
            if i == j:
                A[i, j] = semivariogram[0][1] + nugget
            else:
                dist = int(distances[0, j])
                A[i, j] = semivariogram[dist][1] if dist < len(semivariogram) else semivariogram[-1][1]

    A[-1, :-1] = 1
    A[:-1, -1] = 1

    b = np.zeros(n + 1)
    for i in range(n):
        dist = int(distances[0, i])
        b[i] = semivariogram[dist][1] if dist < len(semivariogram) else semivariogram[-1][1]

    weights = solve(A, b)
    return weights[:-1]


def adjust_weights(weights, wind_speed, wind_dir, sensor_directions, max_wind_speed):
    adjustments = 1 + (wind_speed * np.cos(np.radians(wind_dir - sensor_directions))) / max_wind_speed
    adjusted_weights = np.clip(weights * adjustments, 0, None)  # Ensure weights are non-negative
    return adjusted_weights

def normalize_weights(weights):
    total_weight = np.sum(weights)
    if total_weight == 0:
        return np.zeros_like(weights)
    return weights / total_weight



'def calculate_distances(coords):\n    distances = np.zeros((len(coords), len(coords)))\n    for i, coord1 in enumerate(coords):\n        for j, coord2 in enumerate(coords):\n            distances[i, j] = geodesic(coord1, coord2).meters\n    return distances'

In [5]:

# interpolate 插值估算
def interpolate(data, weights):
    return np.sum(weights * data['Value'].values)


# 修改 get_wind_data 函数
def get_wind_data(date, year):
    wind_data_path = f'{meteostat_folder}\meteostat{year}.csv'
    wind_data_df = pd.read_csv(wind_data_path, encoding='utf-8-sig')  # 处理BOM
    wind_data_df.columns = wind_data_df.columns.str.strip()

    # # Debugging: Print column names to verify
    # print("Column names in wind_data_df:", wind_data_df.columns)

    # 确保日期格式一致
    wind_data_df['date'] = pd.to_datetime(wind_data_df['date'], format='%Y-%m-%d').dt.date
    date = pd.to_datetime(date, format='%Y-%m-%d').date()  # 确保date参数也被转换成相同格式

    wind_info = wind_data_df[wind_data_df['date'] == date]
    if not wind_info.empty:
        return wind_info['wspd'].values[0], wind_info['wdir'].values[0]
    else:
        return np.nan, np.nan


def interpolate_and_clip(df, column, method='linear', order=2):
    # 获取原始列的最小值和最大值
    original_min = df[column].min()
    original_max = df[column].max()
    
    # 进行插值
    if method == 'polynomial':
        interpolated_values = df[column].interpolate(method=method, order=order)
    else:
        interpolated_values = df[column].interpolate(method=method)
    
    # 确保插值后的值在原来值的 {min, max} 范围之间
    interpolated_values = interpolated_values.clip(lower=original_min, upper=original_max).round(2)
    
    # 仅更新空值部分
    return df[column].combine_first(interpolated_values)


  wind_data_path = f'{meteostat_folder}\meteostat{year}.csv'


### apply

In [6]:

# 对 "Value" 列进行插值并裁剪
camden_no_df['Value'] = interpolate_and_clip(camden_no_df, 'Value', method='linear')

"""
# Check new CSV
camden_no_interpolated_path = camden_no_path.replace('.csv', '_interpolated.csv')
camden_no_df.to_csv(camden_no_interpolated_path, index=False)
print(f"Interpolated data saved to {camden_no_interpolated_path}")
camden_no_df.head()
"""

# 计算加权空气质量
weighted_values = []
dates = camden_no_df['Date'].unique()


In [7]:
# Ensure all components of your Kriging process are robust to data issues and correctly implemented.

# Adjustments in your main Kriging loop:
weighted_values = []
dates = camden_no_df['Date'].unique()

for date in dates:
    daily_data = camden_no_df[camden_no_df['Date'] == date]
    if len(daily_data) > 1:
        semivariogram = calculate_semivariogram(daily_data)  
        # distances = calculate_distances(daily_data[['Latitude', 'Longitude']].values)
        distances = cdist(daily_data[['Latitude', 'Longitude']], daily_data[['Latitude', 'Longitude']], metric='euclidean')
        # params = fit_semivariogram(daily_data, 10000) # Define an appropriate max_distance
        kriging_weights = calculate_kriging_weights(semivariogram, distances, len(daily_data))

        wind_speed, wind_dir = get_wind_data(date, date.year)
        sensor_directions = np.arctan2(daily_data['Longitude'] - daily_data['Longitude'].mean(), daily_data['Latitude'] - daily_data['Latitude'].mean()) * 180 / np.pi
        # max_wind_speed = wind_speed
        # 读取对应年份的 meteostat 数据
        meteostat_path = f'{meteostat_folder}\meteostat{date.year}.csv'
        meteostat_df = pd.read_csv(meteostat_path)
        # 获取该年份中 "wspd" 列的最大值
        max_wind_speed = meteostat_df['wspd'].max()

        adjusted_weights = adjust_weights(kriging_weights, wind_speed, wind_dir, sensor_directions, max_wind_speed)
        normalized_weights = normalize_weights(adjusted_weights)

        weighted_value = interpolate(daily_data, normalized_weights)
        weighted_value = max(0, weighted_value)  # Ensure no negative values
        weighted_values.append({'Date': date, 'NO_weighted_value(ug m-3)': weighted_value})
    else:
        weighted_values.append({'Date': date, 'NO_weighted_value(ug m-3)': daily_data['Value'].values[0]})

# Save the weighted values
weighted_df = pd.DataFrame(weighted_values)
output_path = os.path.join(weighted_no_output_folder, 'Camden-NO_weighted.csv')
weighted_df.to_csv(output_path, index=False)
print(f"Weighted data saved to {output_path}")


  meteostat_path = f'{meteostat_folder}\meteostat{date.year}.csv'


Weighted data saved to D:\File_auto\\0_UCL_CASA\OneDrive - University College London\Xiaoyi_dissertation\Analysis\Data_output\NO_weighted\Camden-NO_weighted.csv
