In [124]:
import pandas as pd
import numpy as np

In [125]:
import rasterio
import matplotlib.pyplot as plt

# open your stacked GeoTIFF
with rasterio.open("./data/satellite/sentinel/Delhi_NO2_2022_MM.tif") as src:
    NO2_2022=src.read()  # read all raster values

with rasterio.open("./data/satellite/sentinel/Delhi_NO2_2023_MM.tif") as src:
    NO2_2023=src.read()  # read all raster values

with rasterio.open("./data/satellite/sentinel/Delhi_o3_2022_MM.tif") as src:
    O3_2022=src.read()  # read all raster values

with rasterio.open("./data/satellite/sentinel/Delhi_o3_2023_MM.tif") as src:
    O3_2023=src.read()

with rasterio.open("./data/satellite/sentinel/Delhi_o3_2024_Tropospheric_Stack_MM.tif") as src:
    o3_2024=src.read()

with rasterio.open("./data/satellite/sentinel/Delhi_NO2_2024_Tropospheric_Stack_MM.tif") as src:
    no2_2024=src.read()  # read all raster values


In [126]:
no2=np.concat([NO2_2022,NO2_2023])
o3=np.concat([O3_2022,O3_2023])

In [127]:
def imputation(arr):
    first_day=arr[0,:,:]
    mean=np.nanmean(first_day)
    missing=np.isnan(first_day)
    first_day[missing]=mean

    for day in range(1,arr.shape[0]):
        elem=arr[day,:,:]
        missing=np.isnan(elem)
        prev=arr[day-1,:,:]
        elem[missing]=prev[missing]

    return arr

In [128]:
no2=imputation(no2)
o3=imputation(o3)

In [129]:
no2_2024=imputation(no2_2024)
o3_2024=imputation(o3_2024)

In [130]:
def flatten_features(array_3d: np.ndarray) -> np.ndarray:
    if array_3d.ndim != 3:
        print(f"Error: Input array must be 3-dimensional. Found {array_3d.ndim} dimensions.")
        return array_3d

    N = array_3d.shape[0]
    array_2d = array_3d.reshape(N, -1)
    
    return array_2d

def reshape_to_3d(array_2d: np.ndarray, original_shape: tuple) -> np.ndarray:
    if array_2d.shape[1] != original_shape[1] * original_shape[2]:
        print("Error: Feature count mismatch. Cannot reshape back to original dimensions.")
        return array_2d
    
    feature_shape = original_shape[1:] 
    
    N = array_2d.shape[0]
    
    array_3d = array_2d.reshape(N, *feature_shape)
    
    return array_3d


In [131]:
from sklearn.preprocessing import StandardScaler

no2_scaler=StandardScaler()
o3_scaler=StandardScaler()

no2=flatten_features(no2)
no2_2024=flatten_features(no2_2024)
no2_train_scaled=no2_scaler.fit_transform(no2)
no2_test_scaled=no2_scaler.transform(no2_2024)

o3=flatten_features(o3)
o3_2024=flatten_features(o3_2024)
o3_train_scaled=o3_scaler.fit_transform(o3)
o3_test_scaled=o3_scaler.transform(o3_2024)


In [132]:
no2_train_scaled=reshape_to_3d(no2_train_scaled,[730,9,10])
no2_test_scaled=reshape_to_3d(no2_test_scaled,[366,9,10])
o3_train_scaled=reshape_to_3d(o3_train_scaled,[730,9,10])
o3_test_scaled=reshape_to_3d(o3_test_scaled,[366,9,10])

In [133]:
np.save('./processed_data/no2_train.npy',no2_train_scaled)
np.save('./processed_data/no2_test.npy',no2_test_scaled)
np.save('./processed_data/o3_test.npy',o3_test_scaled)
np.save('./processed_data/o3_train.npy',o3_train_scaled)