In [1]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.layers as L
from sklearn.model_selection import KFold
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import RootMeanSquaredError
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import os
import gc
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

In [2]:
from google.colab import files
_ = files.upload()

Saving O3.zip to O3.zip


In [3]:
!unzip O3.zip -d "O3"
from IPython.display import clear_output
clear_output(wait=False)
!rm O3.zip

In [4]:
files = pd.read_csv("/content/O3/O3/files.csv")

In [5]:
files

Unnamed: 0,Date,SatFile,GroundFile,AltGrid,Fold
0,2021-01-25,sat_2021-01-25.npy,g_2021-01-25.npy,alt_2021-01-25.npy,1
1,2021-02-21,sat_2021-02-21.npy,g_2021-02-21.npy,alt_2021-02-21.npy,2
2,2020-01-06,sat_2020-01-06.npy,g_2020-01-06.npy,alt_2020-01-06.npy,0
3,2021-01-07,sat_2021-01-07.npy,g_2021-01-07.npy,alt_2021-01-07.npy,3
4,2021-02-07,sat_2021-02-07.npy,g_2021-02-07.npy,alt_2021-02-07.npy,1
...,...,...,...,...,...
480,2020-04-08,sat_2020-04-08.npy,g_2020-04-08.npy,alt_2020-04-08.npy,0
481,2021-04-23,sat_2021-04-23.npy,g_2021-04-23.npy,alt_2021-04-23.npy,3
482,2020-11-20,sat_2020-11-20.npy,g_2020-11-20.npy,alt_2020-11-20.npy,2
483,2021-01-19,sat_2021-01-19.npy,g_2021-01-19.npy,alt_2021-01-19.npy,3


In [6]:
def pearson(y_true,y_pred):
  if len(y_true.shape)!=1:
    true = []
    for i in range(y_true.shape[0]):
      true.extend(y_true[i])
    pred = []
    for i in range(y_pred.shape[0]):
      pred.extend(y_pred[i])
  else:
    true=y_true
    pred=y_pred
  return np.mean((np.array(true)-np.mean(true))*(np.array(pred)-np.mean(pred)))/(np.std(np.array(true))*np.std(np.array(pred)))

def pearsonCorrAvgDays(true,pred):
  # assert len(true.shape)>=3,"true must have at least 3 dimensions, found {}".format(len(true.shape))
  assert true.shape==pred.shape, "true and pred must have same shape, found {} and {}".format(true.shape,pred.shape)
  scores = []
  for i in range(true.shape[0]):
    scores.append(pearson(true[i],pred[i]))
  return np.mean(scores),scores

def pearsonCorrAvgPixels(true,pred):
  scores = []
  for i in range(true.shape[1]):
    scores.append(pearson(true[:,i],pred[:,i]))
  return np.mean(scores),scores

In [7]:
def loadData(df,satdir = "/content/O3/O3/satellite/",gdir = "/content/O3/O3/ground/"):
  X = []
  Y = []
  for i in range(df.shape[0]):
    factor = 46*(6.02214/6.023)*1e2
    sat = np.expand_dims(factor*np.load(os.path.join(satdir,df["SatFile"].iloc[i])),axis=2)
    ground = np.load(os.path.join(gdir,df["GroundFile"].iloc[i])).flatten()
    if not np.isnan(np.sum(sat)) and not np.isnan(np.sum(ground)):
      if not np.std(ground)==0:
        X.append(sat)
        Y.append(ground)
  return np.stack(X,axis=0),np.stack(Y,axis=0)

In [8]:
from sklearn.linear_model import LinearRegression

scores_list = []
rmses = []
maes = []
pearsons = []

for fold in range(5):
  print("\nFold {}\n".format(fold))
  train_files = files[files["Fold"]!=fold]
  val_files = files[files["Fold"]==fold]

  X_train,Y_train = loadData(train_files)
  X_val,Y_val = loadData(val_files)
  # loss_plt = utils.loss_plt()
  model = LinearRegression()
  model.fit(X_train.reshape(-1,1),Y_train.reshape(-1,1))

  #pred = model.predict(X_val.reshape(-1,1))
  pred = model.predict(X_val.reshape(-1, 1)).reshape(Y_val.shape)

  #print(Y_val.shape)
  #print(pred.shape)
  rmse = mean_squared_error(Y_val,pred,squared=False)
  mae = mean_absolute_error(Y_val,pred)
  rmses.append(rmse)
  maes.append(mae)

  print("Fold {} RMSE Score: {}".format(fold, rmse))
  print("Fold {} MAE Score: {}".format(fold, mae))

  s, ls = pearsonCorrAvgDays(Y_val, pred)
  print("Fold {} Pearson coeff avg over days: {}".format(fold, np.mean([i for i in ls if not pd.isnull(i)])))
  scores_list.append(ls)

print("\nCV RMSE Score: {}".format(np.mean(rmses)))
print("\nCV MAE Score: {}".format(np.mean(maes)))



Fold 0

Fold 0 RMSE Score: 18.542961975219843
Fold 0 MAE Score: 14.839817033755297
Fold 0 Pearson coeff avg over days: 0.09478196746649746

Fold 1

Fold 1 RMSE Score: 18.045781017505586
Fold 1 MAE Score: 14.017950771771728
Fold 1 Pearson coeff avg over days: 0.12309279244022091

Fold 2

Fold 2 RMSE Score: 17.163849678746477
Fold 2 MAE Score: 13.412882092471744
Fold 2 Pearson coeff avg over days: 0.05587593399468383

Fold 3

Fold 3 RMSE Score: 19.01402250254849
Fold 3 MAE Score: 14.709614351695842
Fold 3 Pearson coeff avg over days: 0.019692092640415444

Fold 4

Fold 4 RMSE Score: 16.374487876248573
Fold 4 MAE Score: 13.217895526787053
Fold 4 Pearson coeff avg over days: 0.05839918765721856

CV RMSE Score: 17.82822061005379

CV MAE Score: 14.039631955296333


In [9]:
inp = X_val[10][:,:,0]
#true = Y_val[10].reshape((X_train[0].shape[0],X_train[0].shape[1]))
true = Y_val[10].reshape((71,86))
pred = pred[10].reshape((71,86))
print(pred.shape)
#pred = pred[10].reshape((64,59))
#pred = val_outputs.view(X_val[10].shape[1], X_val[10].shape[2])

(71, 86)


In [10]:
import numpy as np
import folium
import branca.colormap as cm

ireland_bound = [-10.3401, -6.022 ,51.828500000000005 , 55.422] #Irealand Extent
llcrn,urcrn = (-10.3401, 51.828500000000005),(-6.022, 55.422) # Ireland bounds

# Create latitude and longitude grids
num_cols = int((urcrn[0] - llcrn[0]) // 0.05)
num_rows = int((urcrn[1] - llcrn[1]) // 0.05)
lat_coords = np.linspace(llcrn[1], urcrn[1], num_rows+1)
lon_coords = np.linspace(llcrn[0], urcrn[0], num_cols+1)
lat,lon = np.meshgrid(lon_coords, lat_coords)

true = np.array(true)
# Initialize a map
m = folium.Map(location=[53.0, -7.5], zoom_start=5, tiles='Stamen Terrain')

# Convert the numpy array to a GeoJSON-like structure
features = []
for i in range(num_rows):
    for j in range(num_cols):
        polygon = [
            [lat[i, j], lon[i, j]],
            [lat[i+1, j], lon[i+1, j]],
            [lat[i+1, j+1], lon[i+1, j+1]],
            [lat[i, j+1], lon[i, j+1]],
            [lat[i, j], lon[i, j]]
        ]
        feature = {
            'type': 'Feature',
            'geometry': {
                'type': 'Polygon',
                'coordinates': [polygon]
            },
            'properties': {
                'value': true[i, j]  # Use the corresponding value from the 'true' array
            }
        }
        features.append(feature)

# Define a color scale for the 'true' values
vmin = np.min(true)
vmax = np.max(true)
colormap = cm.LinearColormap(colors=['blue', 'green', 'yellow', 'orange', 'red'], vmin=vmin, vmax=vmax)

# Add the GeoJSON layer to the map and set fill color based on 'true' values
folium.GeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    style_function=lambda feature: {
        'fillColor': colormap(feature['properties']['value']),
        'color': 'none',
        'fillOpacity': 0.6
    }
).add_to(m)


# Create a color bar legend with values
legend_html = '''
    <div style="
        position: fixed;
        bottom: 160px; left: 1120px; width: 70px; height: 134px;
        background-color: white;
        z-index: 9999; font-size: 12px;
        border: 2px solid grey; padding: 2px;
    ">

'''

num_values = 5  # Number of values to display in the color bar legend
value_range = np.linspace(vmin, vmax, num_values)

for value in value_range:
    color = colormap(value)
    label = f'{int(value)}'  # Format the value as desired
    legend_html += f'<p><span style="background-color:{color};">&nbsp;&nbsp;&nbsp;&nbsp;</span> {label}</p>'

legend_html += '</div>'

# Add the color bar legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Crop the map to the bounds of the selected region
m.fit_bounds([[llcrn[1], llcrn[0]], [urcrn[1], urcrn[0]]])
#m.fit_bounds(bounds)

# Save and display the map
m.save('map.html')

m

Output hidden; open in https://colab.research.google.com to view.

In [11]:
import numpy as np
import folium
import branca.colormap as cm

ireland_bound = [-10.3401, -6.022 ,51.828500000000005 , 55.422] #Irealand Extent
llcrn,urcrn = (-10.3401, 51.828500000000005),(-6.022, 55.422) # Ireland bounds

# Create latitude and longitude grids
num_cols = int((urcrn[0] - llcrn[0]) // 0.05)
num_rows = int((urcrn[1] - llcrn[1]) // 0.05)
lat_coords = np.linspace(llcrn[1], urcrn[1], num_rows+1)
lon_coords = np.linspace(llcrn[0], urcrn[0], num_cols+1)
lat,lon = np.meshgrid(lon_coords, lat_coords)

pred = np.array(pred).astype(np.float64)
# Initialize a map
m = folium.Map(location=[53.0, -7.5], zoom_start=5, tiles='Stamen Terrain')

# Convert the numpy array to a GeoJSON-like structure
features = []
for i in range(num_rows):
    for j in range(num_cols):
        polygon = [
            [lat[i, j], lon[i, j]],
            [lat[i+1, j], lon[i+1, j]],
            [lat[i+1, j+1], lon[i+1, j+1]],
            [lat[i, j+1], lon[i, j+1]],
            [lat[i, j], lon[i, j]]
        ]
        feature = {
            'type': 'Feature',
            'geometry': {
                'type': 'Polygon',
                'coordinates': [polygon]
            },
            'properties': {
                'value': pred[i, j]  # Use the corresponding value from the 'pred' aklnxrray
            }
        }
        features.append(feature)

# Define a color scale for the 'pred' values
vmin = np.min(pred)
vmax = np.max(pred)
colormap = cm.LinearColormap(colors=['blue', 'green', 'yellow', 'orange', 'red'], vmin=vmin, vmax=vmax)

# Add the GeoJSON layer to the map and set fill color based on 'pred' values
folium.GeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    style_function=lambda feature: {
        'fillColor': colormap(feature['properties']['value']),
        'color': 'none',
        'fillOpacity': 0.6
    }
).add_to(m)


# Create a color bar legend with values
legend_html = '''
    <div style="
        position: fixed;
        bottom: 160px; left: 1120px; width: 70px; height: 134px;
        background-color: white;
        z-index: 9999; font-size: 12px;
        border: 2px solid grey; padding: 2px;
    ">

'''

num_values = 5  # Number of values to display in the color bar legend
value_range = np.linspace(vmin, vmax, num_values)

for value in value_range:
    color = colormap(value)
    label = f'{int(value)}'  # Format the value as desired
    legend_html += f'<p><span style="background-color:{color};">&nbsp;&nbsp;&nbsp;&nbsp;</span> {label}</p>'

legend_html += '</div>'

# Add the color bar legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Crop the map to the bounds of the selected region
m.fit_bounds([[llcrn[1], llcrn[0]], [urcrn[1], urcrn[0]]])
#m.fit_bounds(bounds)

# Save and display the map
m.save('map.html')

m

Output hidden; open in https://colab.research.google.com to view.