# Data prepare

In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist
from scipy.linalg import solve

In [2]:
# Load data
air_data = pd.read_excel('test_air_sensor.xlsx')
wind_data = pd.read_csv('wind.csv')


# Split the "Latitude & Longitude" column into separate columns
air_data[['Latitude', 'Longitude']] = air_data['Latitude & Longitude'].str.split(', ', expand=True)
air_data['Latitude'] = air_data['Latitude'].astype(float)
air_data['Longitude'] = air_data['Longitude'].astype(float)

In [3]:
air_data.head()

Unnamed: 0,Name,Site,Monitor Date,PM2.5(ug m-3),NO2(ug m-3),Latitude & Longitude,Latitude,Longitude
0,Westminster - Cavendish Square,WCS,2019-04-01,,44.1,"51.516801645206, -0.145657269364",51.516802,-0.145657
1,Westminster - Marylebone Road,WMC,2019-04-01,,44.1,"51.522540000000, -0.154590000000",51.52254,-0.15459
2,Kensington and Chelsea - North Ken FIDAS,KF1,2019-04-01,10.9,,"51.521046747604, -0.213492139585",51.521047,-0.213492
3,Southwark - A2 Old Kent Road,SK5,2019-04-01,,50.6,"51.480499493647, -0.059552893257",51.480499,-0.059553
4,Islington - Arsenal,IS6,2019-04-01,,20.2,"51.557895000000, -0.106989000000",51.557895,-0.106989


In [4]:
wind_data.head()

Unnamed: 0,date,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
0,2019-04-01,7.8,3.4,12.9,,,66,13.2,27.8,1020.9,
1,2019-04-02,5.9,4.0,8.0,,,269,9.2,27.8,1005.4,
2,2019-04-03,4.5,1.4,8.2,,,275,6.9,20.4,999.1,
3,2019-04-04,5.0,1.8,7.7,,,143,15.6,38.9,994.5,
4,2019-04-05,8.2,5.3,11.3,,,112,16.9,33.3,1001.3,


# definition

In [19]:
# Calculate semivariogram
# def calculate_semivariogram(data, max_distance):
#     distances = cdist(data[['Latitude', 'Longitude']], data[['Latitude', 'Longitude']], metric='euclidean')
#     semivariances = []

#     for h in range(1, max_distance):
#         pairs = np.where((distances >= h) & (distances < h + 1))
#         if len(pairs[0]) > 0:
#             semivariance = np.mean((data['NO2(ug m-3)'].values[pairs[0]] - data['NO2(ug m-3)'].values[pairs[1]]) ** 2) / 2
#             semivariances.append((h, semivariance))

#     return np.array(semivariances)
def calculate_semivariogram(data, max_distance):
    distances = cdist(data[['Latitude', 'Longitude']], data[['Latitude', 'Longitude']], metric='euclidean')
    semivariances = []

    for h in range(1, max_distance):
        pairs = np.where((distances >= h) & (distances < h + 1))
        if len(pairs[0]) > 0:
            semivariance = np.mean((data['NO2(ug m-3)'].values[pairs[0]] - data['NO2(ug m-3)'].values[pairs[1]]) ** 2) / 2
            semivariances.append((h, semivariance))
        else:
            semivariances.append((h, 0))  # No pairs found for this distance

    return np.array(semivariances)

# # Calculate Kriging weights
# def calculate_kriging_weights(semivariogram, distances, n):
#     A = np.zeros((n + 1, n + 1))
#     A[:n, :n] = semivariogram[distances.astype(int)]
#     A[-1, :-1] = 1
#     A[:-1, -1] = 1

#     b = np.zeros(n + 1)
#     b[:-1] = semivariogram[distances.astype(int)]

#     weights = solve(A, b)
#     return weights[:-1]


# def calculate_kriging_weights(semivariogram, distances, n):
#     A = np.zeros((n + 1, n + 1))
#     for i in range(n):
#         for j in range(n):
#             A[i, j] = semivariogram[int(distances[i, j])]
#     A[-1, :-1] = 1
#     A[:-1, -1] = 1

#     b = np.zeros(n + 1)
#     for i in range(n):
#         b[i] = semivariogram[int(distances[i, -1])]

#     weights = solve(A, b)
#     return weights[:-1]

def calculate_kriging_weights(semivariogram, distances, n):
    A = np.zeros((n + 1, n + 1))
    
    for i in range(n):
        for j in range(n):
            if i == j:
                A[i, j] = semivariogram[0][1]  # Semivariance at distance 0
            else:
                dist = int(distances[0, j])
                A[i, j] = semivariogram[dist][1] if dist < len(semivariogram) else semivariogram[-1][1]  # Last semivariogram value for large distances

    A[-1, :-1] = 1
    A[:-1, -1] = 1

    b = np.zeros(n + 1)
    for i in range(n):
        dist = int(distances[0, i])
        b[i] = semivariogram[dist][1] if dist < len(semivariogram) else semivariogram[-1][1]

    weights = solve(A, b)
    return weights[:-1]

# Adjust weights for wind effects
def adjust_weights(weights, wind_speed, wind_dir, sensor_directions, max_wind_speed):
    adjustments = 1 + (wind_speed * np.cos(np.radians(wind_dir - sensor_directions))) / max_wind_speed
    adjusted_weights = weights * adjustments
    return adjusted_weights

# Normalize adjusted weights
def normalize_weights(weights):
    return weights / np.sum(weights)

# Perform interpolation
def interpolate(data, weights):
    return np.sum(weights * data['NO2(ug m-3)'].values)

# Example

In [22]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist
from scipy.linalg import solve

# Function to calculate the semivariogram
def calculate_semivariogram(data, max_distance):
    distances = cdist(data[['Latitude', 'Longitude']], data[['Latitude', 'Longitude']], metric='euclidean')
    semivariances = []

    for h in range(1, max_distance):
        pairs = np.where((distances >= h) & (distances < h + 1))
        if len(pairs[0]) > 0:
            semivariance = np.mean((data['NO2(ug m-3)'].values[pairs[0]] - data['NO2(ug m-3)'].values[pairs[1]]) ** 2) / 2
            semivariances.append((h, semivariance))
        else:
            semivariances.append((h, 0))  # No pairs found for this distance

    return np.array(semivariances)

# Function to calculate Kriging weights with regularization
def calculate_kriging_weights(semivariogram, distances, n, nugget=1e-10):
    A = np.zeros((n + 1, n + 1))

    for i in range(n):
        for j in range(n):
            if i == j:
                A[i, j] = semivariogram[0][1] + nugget  # Semivariance at distance 0 with nugget effect
            else:
                dist = int(distances[0, j])
                A[i, j] = semivariogram[dist][1] if dist < len(semivariogram) else semivariogram[-1][1]

    A[-1, :-1] = 1
    A[:-1, -1] = 1

    b = np.zeros(n + 1)
    for i in range(n):
        dist = int(distances[0, i])
        b[i] = semivariogram[dist][1] if dist < len(semivariogram) else semivariogram[-1][1]

    weights = solve(A, b)
    return weights[:-1]

# Function to adjust weights for wind effects
def adjust_weights(weights, wind_speed, wind_dir, sensor_directions, max_wind_speed):
    adjusted_weights = weights * (1 + (wind_speed * np.cos(np.radians(wind_dir - sensor_directions))) / max_wind_speed)
    return adjusted_weights

# Function to normalize weights
def normalize_weights(weights):
    return weights / np.sum(weights)

# Function to interpolate
def interpolate(data, weights):
    return np.sum(weights * data['NO2(ug m-3)'].values)

# Load data
air_data = pd.read_excel('test_air_sensor.xlsx')
wind_data = pd.read_csv('wind.csv')

# Split the "Latitude & Longitude" column into separate columns
air_data[['Latitude', 'Longitude']] = air_data['Latitude & Longitude'].str.split(', ', expand=True)
air_data['Latitude'] = air_data['Latitude'].astype(float)
air_data['Longitude'] = air_data['Longitude'].astype(float)

# Filter data for the specific date
date_filter = '2019-04-01'
filtered_air_data = air_data[air_data['Monitor Date'] == date_filter]
filtered_wind_data = wind_data[wind_data['date'] == date_filter]

# Check if filtered data is not empty
if filtered_air_data.empty or filtered_wind_data.empty:
    raise ValueError("Filtered data for the specified date is empty. Please check the data files.")

# Extract wind direction and speed for the specific date
wind_speed = filtered_wind_data['wspd'].values[0]
wind_dir = filtered_wind_data['wdir'].values[0]

# Assume max_distance is 10 (you can change as needed)
max_distance = 10
semivariogram = calculate_semivariogram(filtered_air_data, max_distance)

# Distances from interpolation point to sensors
interpolation_point = np.array([[51.516801645206, -0.145657269364]]) # Example point
distances = cdist(interpolation_point, filtered_air_data[['Latitude', 'Longitude']], metric='euclidean')

# Calculate Kriging weights
kriging_weights = calculate_kriging_weights(semivariogram, distances, len(filtered_air_data))

# Adjust weights for wind effects
sensor_directions = np.arctan2(filtered_air_data['Longitude'] - interpolation_point[0, 1], filtered_air_data['Latitude'] - interpolation_point[0, 0]) * 180 / np.pi
max_wind_speed = wind_data['wspd'].max()

adjusted_weights = adjust_weights(kriging_weights, wind_speed, wind_dir, sensor_directions, max_wind_speed)

# Normalize the adjusted weights
normalized_weights = normalize_weights(adjusted_weights)

# Interpolate to get the estimated air quality
estimated_value = interpolate(filtered_air_data, normalized_weights)
print(f'Estimated NO2 at the interpolation point: {estimated_value:.2f} ug/m3')


Estimated NO2 at the interpolation point: 0.00 ug/m3


In [16]:
# Example usage


# Filter data for the specific date
date_filter = '2019-04-01'
filtered_air_data = air_data[air_data['Monitor Date'] == date_filter]
filtered_wind_data = wind_data[wind_data['date'] == date_filter]

# Extract wind direction and speed for the specific date
wind_speed = filtered_wind_data['wspd'].values[0]
wind_dir = filtered_wind_data['wdir'].values[0]

# Assume max_distance is 10 (you can change as needed)
max_distance = 10
semivariogram = calculate_semivariogram(filtered_air_data, max_distance)

# Distances from interpolation point to sensors
interpolation_point = np.array([[51.516801645206, -0.145657269364]]) # Example point
distances = cdist(interpolation_point, filtered_air_data[['Latitude', 'Longitude']], metric='euclidean')

# Calculate Kriging weights
kriging_weights = calculate_kriging_weights(semivariogram, distances, len(filtered_air_data))

# Adjust weights for wind effects
sensor_directions = np.arctan2(filtered_air_data['Longitude'] - interpolation_point[0, 1], filtered_air_data['Latitude'] - interpolation_point[0, 0]) * 180 / np.pi
max_wind_speed = wind_data['wspd'].max()

adjusted_weights = adjust_weights(kriging_weights, wind_speed, wind_dir, sensor_directions, max_wind_speed)

# Normalize the adjusted weights
normalized_weights = normalize_weights(adjusted_weights)

# Interpolate to get the estimated air quality
estimated_value = interpolate(filtered_air_data, normalized_weights)
print(f'Estimated NO2 at the interpolation point: {estimated_value:.2f} ug/m3')

IndexError: index 0 is out of bounds for axis 0 with size 0