In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.layers as L
from sklearn.model_selection import KFold
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import RootMeanSquaredError
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import os
import gc
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import files
_ = files.upload()

Saving NO2+Altitude.zip to NO2+Altitude.zip


In [None]:
!unzip NO2+Altitude.zip -d "NO2+Altitude"
from IPython.display import clear_output
clear_output(wait=False)
!rm NO2+Altitude.zip

In [None]:
files = pd.read_csv("/content/NO2+Altitude/NO2+Altitude/files.csv")

In [None]:
files

Unnamed: 0,Date,SatFile,GroundFile,AltGrid,Fold
0,2020-05-20,sat_2020-05-20.npy,g_2020-05-20.npy,alt_2020-05-20.npy,1
1,2021-04-27,sat_2021-04-27.npy,g_2021-04-27.npy,alt_2021-04-27.npy,2
2,2020-03-03,sat_2020-03-03.npy,g_2020-03-03.npy,alt_2020-03-03.npy,0
3,2020-10-10,sat_2020-10-10.npy,g_2020-10-10.npy,alt_2020-10-10.npy,2
4,2020-07-17,sat_2020-07-17.npy,g_2020-07-17.npy,alt_2020-07-17.npy,1
...,...,...,...,...,...
481,2020-07-25,sat_2020-07-25.npy,g_2020-07-25.npy,alt_2020-07-25.npy,3
482,2020-08-22,sat_2020-08-22.npy,g_2020-08-22.npy,alt_2020-08-22.npy,2
483,2020-03-23,sat_2020-03-23.npy,g_2020-03-23.npy,alt_2020-03-23.npy,3
484,2020-05-17,sat_2020-05-17.npy,g_2020-05-17.npy,alt_2020-05-17.npy,4


In [None]:
def pearson(y_true,y_pred):
  if len(y_true.shape)!=1:
    true = []
    for i in range(y_true.shape[0]):
      true.extend(y_true[i])
    pred = []
    for i in range(y_pred.shape[0]):
      pred.extend(y_pred[i])
  else:
    true=y_true
    pred=y_pred
  return np.mean((np.array(true)-np.mean(true))*(np.array(pred)-np.mean(pred)))/(np.std(np.array(true))*np.std(np.array(pred)))

def pearsonCorrAvgDays(true, pred):
    assert true.shape == pred.shape, "true and pred must have the same shape, found {} and {}".format(true.shape, pred.shape)
    scores = []
    for i in range(true.shape[0]):
        scores.append(np.corrcoef(true[i], pred[i])[0, 1])
    return np.mean(scores), scores

def pearsonCorrAvgPixels(true,pred):
  scores = []
  for i in range(true.shape[1]):
    scores.append(pearson(true[:,i],pred[:,i]))
  return np.mean(scores),scores

In [None]:
def loadData(df,satdir = "/content/NO2+Altitude/NO2+Altitude/satellite/",gdir = "/content/NO2+Altitude/NO2+Altitude/ground/"):
  X = []
  Y = []
  for i in range(df.shape[0]):
    factor = 46*(6.02214/6.023)*1e2
    sat = np.expand_dims(factor*np.load(os.path.join(satdir,df["SatFile"].iloc[i])),axis=2)
    ground = np.load(os.path.join(gdir,df["GroundFile"].iloc[i])).flatten()
    if not np.isnan(np.sum(sat)) and not np.isnan(np.sum(ground)):
      if not np.std(ground)==0:
        X.append(sat)
        Y.append(ground)
  return np.stack(X,axis=0),np.stack(Y,axis=0)

In [None]:
from xgboost import XGBRegressor
from tqdm import tqdm  # Import tqdm for the progress bar

scores_list = []
rmses = []
maes = []
pearsons = []

for fold in tqdm(range(5), desc="Folds Progress"):
  print("\nFold {}\n".format(fold))
  train_files = files[files["Fold"]!=fold]
  val_files = files[files["Fold"]==fold]

  X_train,Y_train = loadData(train_files)
  X_val,Y_val = loadData(val_files)
  #print("X_val=", X_val.shape)
  #print("Y_val=", Y_val.shape)

  # loss_plt = utils.loss_plt()
  model = XGBRegressor(objective="reg:squarederror")
  model.fit(X_train.reshape(-1,1),Y_train.reshape(-1,1))
  pred = model.predict(X_val.reshape(-1,1))
  #print("prediction=", pred.shape)
  Y_val = Y_val.reshape(Y_val.shape[0], -1)  # Reshape Y_val to (69, 3283)
  prediction = pred.reshape(Y_val.shape)
  #print("Prediction=", prediction.shape)
  rmse = mean_squared_error(Y_val,prediction,squared=False)
  mae = mean_absolute_error(Y_val,prediction)
  rmses.append(rmse)
  maes.append(mae)

  print("Fold {} RMSE Score: {}".format(fold, rmse))
  print("Fold {} MAE Score: {}".format(fold, mae))
  s, ls = pearsonCorrAvgDays(Y_val, prediction)
  print("Fold {} Pearson coeff avg over days: {}".format(fold, np.mean([i for i in ls if not pd.isnull(i)])))
  scores_list.append(ls)


print("\nCV RMSE Score: {}".format(np.mean(rmses)))
print("\nCV MAE Score: {}".format(np.mean(maes)))



Folds Progress:   0%|          | 0/5 [00:00<?, ?it/s]


Fold 0



Folds Progress:  20%|██        | 1/5 [00:36<02:27, 36.95s/it]

Fold 0 RMSE Score: 7.506702395489731
Fold 0 MAE Score: 5.933033322273989
Fold 0 Pearson coeff avg over days: 0.03666742035143255

Fold 1



Folds Progress:  40%|████      | 2/5 [01:13<01:49, 36.66s/it]

Fold 1 RMSE Score: 8.988037398582332
Fold 1 MAE Score: 6.811871018621722
Fold 1 Pearson coeff avg over days: 0.05826328383372929

Fold 2



Folds Progress:  60%|██████    | 3/5 [01:49<01:13, 36.60s/it]

Fold 2 RMSE Score: 8.111382021006769
Fold 2 MAE Score: 6.421752668414877
Fold 2 Pearson coeff avg over days: 0.03924704853305459

Fold 3



Folds Progress:  80%|████████  | 4/5 [02:25<00:36, 36.23s/it]

Fold 3 RMSE Score: 7.599863564102805
Fold 3 MAE Score: 5.8614191853561595
Fold 3 Pearson coeff avg over days: 0.04600388073275213

Fold 4



Folds Progress: 100%|██████████| 5/5 [03:02<00:00, 36.43s/it]

Fold 4 RMSE Score: 7.606031226616744
Fold 4 MAE Score: 6.108700770544965
Fold 4 Pearson coeff avg over days: 0.04256940228944561

CV RMSE Score: 7.962403321159677

CV MAE Score: 6.227355393042343





In [None]:
print(Y_val.shape)
inp = X_val[10][:,:,0]
pred = prediction[10].reshape((49,67))
true = Y_val[10].reshape((49,67))

(65, 3283)


In [None]:
import numpy as np
import folium
import branca.colormap as cm

ireland_bound = [-9.4003, -6.032, 51.828500000000005, 54.323]
llcrn, urcrn = (-9.4003, 51.8285), (-6.032, 54.323)

# Create latitude and longitude grids
num_cols = int((urcrn[0] - llcrn[0]) // 0.05)
num_rows = int((urcrn[1] - llcrn[1]) // 0.05)
lat_coords = np.linspace(llcrn[1], urcrn[1], num_rows+1)
lon_coords = np.linspace(llcrn[0], urcrn[0], num_cols+1)
lat,lon = np.meshgrid(lon_coords, lat_coords)

true = np.array(true)
# Initialize a map
m = folium.Map(location=[53.0, -7.5], zoom_start=5, tiles='Stamen Terrain')

# Convert the numpy array to a GeoJSON-like structure
features = []
for i in range(num_rows):
    for j in range(num_cols):
        polygon = [
            [lat[i, j], lon[i, j]],
            [lat[i+1, j], lon[i+1, j]],
            [lat[i+1, j+1], lon[i+1, j+1]],
            [lat[i, j+1], lon[i, j+1]],
            [lat[i, j], lon[i, j]]
        ]
        feature = {
            'type': 'Feature',
            'geometry': {
                'type': 'Polygon',
                'coordinates': [polygon]
            },
            'properties': {
                'value': true[i, j]  # Use the corresponding value from the 'true' array
            }
        }
        features.append(feature)

# Define a color scale for the 'true' values
vmin = np.min(true)
vmax = np.max(true)
colormap = cm.LinearColormap(colors=['blue', 'green', 'yellow', 'orange', 'red'], vmin=vmin, vmax=vmax)

# Add the GeoJSON layer to the map and set fill color based on 'true' values
folium.GeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    style_function=lambda feature: {
        'fillColor': colormap(feature['properties']['value']),
        'color': 'none',
        'fillOpacity': 0.6
    }
).add_to(m)


# Create a color bar legend with values
legend_html = '''
    <div style="
        position: fixed;
        bottom: 160px; left: 1120px; width: 70px; height: 134px;
        background-color: white;
        z-index: 9999; font-size: 12px;
        border: 2px solid grey; padding: 2px;
    ">

'''

num_values = 5  # Number of values to display in the color bar legend
value_range = np.linspace(vmin, vmax, num_values)

for value in value_range:
    color = colormap(value)
    label = f'{int(value)}'  # Format the value as desired
    legend_html += f'<p><span style="background-color:{color};">&nbsp;&nbsp;&nbsp;&nbsp;</span> {label}</p>'

legend_html += '</div>'

# Add the color bar legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Crop the map to the bounds of the selected region
m.fit_bounds([[llcrn[1], llcrn[0]], [urcrn[1], urcrn[0]]])
#m.fit_bounds(bounds)

# Save and display the map
m.save('map.html')

m

In [None]:
import numpy as np
import folium
import branca.colormap as cm

ireland_bound = [-9.4003, -6.032, 51.828500000000005, 54.323]
llcrn, urcrn = (-9.4003, 51.8285), (-6.032, 54.323)

# Create latitude and longitude grids
num_cols = int((urcrn[0] - llcrn[0]) // 0.05)
num_rows = int((urcrn[1] - llcrn[1]) // 0.05)
lat_coords = np.linspace(llcrn[1], urcrn[1], num_rows+1)
lon_coords = np.linspace(llcrn[0], urcrn[0], num_cols+1)
lat,lon = np.meshgrid(lon_coords, lat_coords)

pred = np.array(pred).astype(np.float64)
# Initialize a map
m = folium.Map(location=[53.0, -7.5], zoom_start=5, tiles='Stamen Terrain')

# Convert the numpy array to a GeoJSON-like structure
features = []
for i in range(num_rows):
    for j in range(num_cols):
        polygon = [
            [lat[i, j], lon[i, j]],
            [lat[i+1, j], lon[i+1, j]],
            [lat[i+1, j+1], lon[i+1, j+1]],
            [lat[i, j+1], lon[i, j+1]],
            [lat[i, j], lon[i, j]]
        ]
        feature = {
            'type': 'Feature',
            'geometry': {
                'type': 'Polygon',
                'coordinates': [polygon]
            },
            'properties': {
                'value': pred[i, j]  # Use the corresponding value from the 'pred' aklnxrray
            }
        }
        features.append(feature)

# Define a color scale for the 'pred' values
vmin = np.min(pred)
vmax = np.max(pred)
colormap = cm.LinearColormap(colors=['blue', 'green', 'yellow', 'orange', 'red'], vmin=vmin, vmax=vmax)

# Add the GeoJSON layer to the map and set fill color based on 'pred' values
folium.GeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    style_function=lambda feature: {
        'fillColor': colormap(feature['properties']['value']),
        'color': 'none',
        'fillOpacity': 0.6
    }
).add_to(m)


# Create a color bar legend with values
legend_html = '''
    <div style="
        position: fixed;
        bottom: 160px; left: 1120px; width: 70px; height: 134px;
        background-color: white;
        z-index: 9999; font-size: 12px;
        border: 2px solid grey; padding: 2px;
    ">

'''

num_values = 5  # Number of values to display in the color bar legend
value_range = np.linspace(vmin, vmax, num_values)

for value in value_range:
    color = colormap(value)
    label = f'{int(value)}'  # Format the value as desired
    legend_html += f'<p><span style="background-color:{color};">&nbsp;&nbsp;&nbsp;&nbsp;</span> {label}</p>'

legend_html += '</div>'

# Add the color bar legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Crop the map to the bounds of the selected region
m.fit_bounds([[llcrn[1], llcrn[0]], [urcrn[1], urcrn[0]]])
#m.fit_bounds(bounds)

# Save and display the map
m.save('map.html')

m