In [11]:
from google.colab import drive


import pandas as pd
import numpy as np
from os import listdir
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model 
from tensorflow.keras.optimizers import Adam

In [14]:
df = pd.read_csv(
    '/content/world_bank_processed.csv', parse_dates=['date'])
df.set_index(['area', 'date'], inplace=True)
df = df['ndvi_mean']
df = (df - df.mean())/df.std()
df.sort_index(inplace=True)

In [15]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
X = []
y = []

for year in [2017, 2018]:
    dir = f'/content/drive/MyDrive/encodings/{year}_new_encoded/'
    for district in listdir(dir):
        name = district[:-4]
        data = np.load(dir + district)
        n_patches = data.shape[0]
        
        df_temp = df.loc[name]
        mask = (df_temp.index < pd.to_datetime(f'{year+1}-01-01')) & (df_temp.index >= pd.to_datetime(f'{year}-01-01'))
        labels = np.repeat(df_temp[mask].to_numpy(), n_patches).reshape(12, n_patches).T
        data = data.reshape(-1, 16, 16, 64)
        labels = labels.reshape(-1)
        
        X.append(data)
        y.append(labels)

X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)

In [17]:
X_val = []
y_val = []
year = 2019

dir = f'/content/drive/MyDrive/encodings/{year}_new_encoded/'
for district in listdir(dir):
    name = district[:-4]
    data = np.load(dir + district)
    n_patches = data.shape[0]
    
    df_temp = df.loc[name]
    mask = (df_temp.index < pd.to_datetime(f'{year+1}-01-01')) & (df_temp.index >= pd.to_datetime(f'{year}-01-01'))
    labels = np.repeat(df_temp[mask].to_numpy(), n_patches).reshape(12, n_patches).T
    data = data.reshape(-1, 16, 16, 64)
    labels = labels.reshape(-1)
    
    X_val.append(data)
    y_val.append(labels)

X_val = np.concatenate(X_val, axis=0)
y_val = np.concatenate(y_val, axis=0)

In [18]:
def encoder_block(x, n_filters_in, n_filters_out, n_convs=1, activation="relu", batchNorm=False):
    x_in = x
    for _ in range(n_convs):
        x = layers.Conv2D(n_filters_in, (3, 3), activation=activation, padding="same")(x)
        #x = layers.Dropout(.8)(x)
        if batchNorm:
            x = layers.BatchNormalization()(x)
    
    if x_in.shape[-1] != x.shape[-1]:
            x_in = layers.Conv2D(n_filters_in, (1, 1), padding="same")(x_in)
    x = layers.Add()([x, x_in])

    y = layers.Conv2D(n_filters_out, (3, 3), strides=2, activation=activation, padding="same")(x)
    if batchNorm:
            y = layers.BatchNormalization()(y)
    x = layers.Conv2D(n_filters_out, (1, 1), strides=2, padding="same")(x)
    x_out = layers.Add()([y, x])
    return x_out

In [19]:
input = layers.Input(shape=(16, 16, 64))
x = encoder_block(input, 256, 256, n_convs=2, batchNorm=True)
x = layers.Flatten()(x)
x = layers.Dense(25, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(25, activation="relu")(x)
x = layers.Dense(1)(x)
model = Model(input, x)
model.compile(optimizer=Adam(learning_rate=5e-5), loss="mse")

In [20]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 16, 16, 64)  0           []                               
                                ]                                                                 
                                                                                                  
 conv2d_5 (Conv2D)              (None, 16, 16, 256)  147712      ['input_2[0][0]']                
                                                                                                  
 batch_normalization_4 (BatchNo  (None, 16, 16, 256)  1024       ['conv2d_5[0][0]']               
 rmalization)                                                                                     
                                                                                            

In [21]:
history = model.fit(x=X, y=y, epochs=20, batch_size=64, shuffle=True, validation_data=(X_val, y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [23]:
years = list(range(2017, 2020))
y_pred = {year: [] for year in years}
y_true = {year: [] for year in years}

for year in years:
    dir = f'/content/drive/MyDrive/encodings/{year}_new_encoded/'
    for district in listdir(dir):
        if 'ipynb' in district:
            continue
        name = district[:-4]
        data = np.load(dir + district)
        n_patches = data.shape[0]
        
        for i in range(12):
            data_temp = data[:, i, :, :]
            y_pred[year].append(model.predict(data_temp).mean())
        
        df_temp = df.loc[name]
        mask = (df_temp.index < pd.to_datetime(f'{year+1}-01-01')) & (df_temp.index >= pd.to_datetime(f'{year}-01-01'))
        y_true[year] += list(df_temp[mask].to_numpy())

    y_true[year] = np.array(y_true[year]).flatten()
    y_pred[year] = np.array(y_pred[year]).flatten()

In [24]:
for year in [2017, 2018, 2019]:
    print(year)
    print((((y_true[year] - y_pred[year])**2).mean()**0.5)/y_true[year].std())
    print(r2_score(y_true[year], y_pred[year]))

2017
0.1037986526570091
0.9892258397065896
2018
0.08700947566967512
0.9924293511436882
2019
0.2905634689057278
0.9155728705374702
