<a href="https://colab.research.google.com/github/DavidAde6/Sentinel-5P-NO2-Prediction/blob/main/LSTM_NO2Prediction_Sentinel5P.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**SETUP**

In [1]:
# IMPORTS
import ee
from datetime import datetime
import geemap.core as geemap
from IPython.display import display
import pandas as pd
from tensorflow import keras
from tensorflow.python.keras import layers
import numpy as np

In [2]:
ee.Authenticate()
ee.Initialize(project='ee-dadeniyi54')
collection = ee.ImageCollection("COPERNICUS/S5P/NRTI/L3_NO2")

*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_0JLhFqfSY1uiEaW?source=Init


In [9]:
region = ee.Geometry.BBox(-76.5, 44.9, -75, 45.7)
filtered_collection = collection.filterDate('2020-01-01', '2023-12-31').filterBounds(region)

In [7]:
def extract_data(image):
    stats = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=ee.Geometry.BBox(-76.5, 44.9, -75, 45.7),
        scale=7000  # Sentinel-5P resolution
    )
    return ee.Feature(None, stats).set('date', image.date().format())

In [51]:
features = filtered_collection.map(extract_data).getInfo()
data = pd.DataFrame([feature['properties'] for feature in features['features']])
data = data.fillna(0.0)
no2_col = data['tropospheric_NO2_column_number_density']

In [52]:
no2_col.shape

(4122,)

In [13]:
def df_to_X_y(df, window_size=5):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_size):
    row = [[a] for a in df_as_np[i:i+window_size]]
    X.append(row)
    label = df_as_np[i+window_size]
    y.append(label)
  return np.array(X), np.array(y)

In [53]:
X1, y1 = df_to_X_y(no2_col, 5)
X_train1, y_train1 = X1[:3000], y1[:3000]
X_val1, y_val1 = X1[3000:3500], y1[3000:3500]
X_test1, y_test1 = X1[3500:], y1[3500:]
X_train1.shape, y_train1.shape, X_val1.shape, y_val1.shape, X_test1.shape, y_test1.shape

((3000, 5, 1), (3000,), (500, 5, 1), (500,), (617, 5, 1), (617,))

In [57]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(5, 1)))
model.add(LSTM(64))
model.add(Dense(8, 'relu'))
model.add(Dense(1, 'linear'))

model.summary()

  super().__init__(**kwargs)


In [83]:
cp1 = ModelCheckpoint('model.keras', save_best_only=True)
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [92]:
model.fit(X_train1, y_train1, validation_data=(X_val1, y_val1), epochs=20, callbacks=[cp1])

Epoch 1/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 7.1068e-10 - root_mean_squared_error: 2.6594e-05 - val_loss: 2.7522e-10 - val_root_mean_squared_error: 1.6590e-05
Epoch 2/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 7.5518e-10 - root_mean_squared_error: 2.7373e-05 - val_loss: 4.0060e-10 - val_root_mean_squared_error: 2.0015e-05
Epoch 3/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 7.6156e-10 - root_mean_squared_error: 2.7572e-05 - val_loss: 1.8862e-10 - val_root_mean_squared_error: 1.3734e-05
Epoch 4/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 8.6553e-10 - root_mean_squared_error: 2.9394e-05 - val_loss: 1.5503e-10 - val_root_mean_squared_error: 1.2451e-05
Epoch 5/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 6.5931e-10 - root_mean_squared_error: 2.5659e-05 - val_loss: 3.4673e-10 - val_root_m

<keras.src.callbacks.history.History at 0x7f978ca47730>

In [95]:
from tensorflow.keras.models import load_model
model = load_model('model.keras')

In [97]:
# TRAINING
train_predictions = model.predict(X_train1).flatten()
train_results = pd.DataFrame(data={'Train Predictions':train_predictions, 'Actuals':y_train1})

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [96]:
# VALIDATION
val_predictions = model.predict(X_val1).flatten()
val_results = pd.DataFrame(data={'Val Predictions':val_predictions, 'Actuals':y_val1})

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
