In [6]:
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
#from tqdm.notebook import tqdm, trange

import os
import sys
from pathlib import Path

os.chdir(Path(sys.path[0]).parent)
import modules.utils as utils

import torch
import torch.nn as nn
import torch.nn.functional as F

## Prepare the data

In [9]:
L = 32
Directory = f"data/L_{L}"

variables=['u', 'v', 'w', 'theta', 's', 'tke', 'wtheta']
nz=376

len_samples = nz*len(variables)
len_in = nz*(len(variables)-1)
len_out = nz

array([14.9804707 , 14.99043066, 15.00086523, 15.00930762,  0.50063248,
        0.50320731,  0.50840387,  0.51110868, -0.12203034, -0.12493407,
       -0.12792623, -0.13077889,  0.97752755,  0.97752575,  0.97752387,
        0.97752264,  0.13710223,  0.135585  ,  0.13606116,  0.13534779,
       -0.11928583, -0.1221244 , -0.12504954, -0.12783829])

### Split train test ds

In [5]:
times = [i for i in range(1,64)]
perm = np.random.permutation(times)
train_times = perm[:int(0.8*63)]
test_times = perm[int(0.8*63):]

In [None]:
#init train ds
path_data = Directory+f'/input_ds_for_simple_nn_T{train_times[0]}_L_{L}.nc'
ds_init = xr.open_dataset(path_data)
df_init = ds_init.to_dataframe()
train_ds = df_init.to_numpy()
n_samples = len(train_ds)//len_samples
train_ds = train_ds.reshape(n_samples, len_samples)

for t in train_times[1:]:
    path_data = Directory+f'/input_ds_for_simple_nn_T{t}_L_{L}.nc'
    ds_init = xr.open_dataset(path_data)
    df_init = ds_init.to_dataframe()

    time_ds = df_init.to_numpy()
    n_samples = len(time_ds)//len_samples
    time_ds = time_ds.reshape(n_samples, len_samples)

    train_ds = np.concatenate((train_ds, time_ds), axis=0)

In [None]:
#init test ds
path_data = Directory+f'/input_ds_for_simple_nn_T{test_times[0]}_L_{L}.nc'
ds_init = xr.open_dataset(path_data)
df_init = ds_init.to_dataframe()
test_ds = df_init.to_numpy()
n_samples = len(test_ds)//len_samples
test_ds = test_ds.reshape(n_samples, len_samples)

for t in test_times[1:]:
    path_data = Directory+f'/input_ds_for_simple_nn_T{t}_L_{L}.nc'
    ds_init = xr.open_dataset(path_data)
    df_init = ds_init.to_dataframe()

    time_ds = df_init.to_numpy()
    n_samples = len(time_ds)//len_samples
    time_ds = time_ds.reshape(n_samples, len_samples)

    test_ds = np.concatenate((test_ds, time_ds), axis=0)

In [7]:
#useless here
# split input_ds and output_ds into training and validation sets
batch_size = 32
train, test = utils.split_train_val(tot_ds,batch_size)
input_train, output_train, input_val, output_val = train[:,:len_in], train[:,len_in:], test[:,:len_in], test[:,len_in:]
input_train.shape

(192, 20)

## Define the model

In [5]:
class DNN(nn.Module):
    def __init__(self, batch_size, input_size, output_size, drop_prob1=0.5, drop_prob2=0.5, drop_prob3=0.5, hidden_size1=128, hidden_size2=256, hidden_size3=128):
        super(DNN, self).__init__()
        self.regression = nn.Sequential(nn.Linear(input_size, hidden_size1),
                                        nn.ReLU(),
                                        nn.Dropout(drop_prob1),
                                        nn.Linear(hidden_size1, hidden_size2),
                                        nn.ReLU(),
                                        nn.Dropout(drop_prob2),
                                        nn.Linear(hidden_size2, hidden_size3),
                                        nn.ReLU(),
                                        nn.Dropout(drop_prob3),
                                        nn.Linear(hidden_size3, output_size)
                                        )
        self.drop_prob1 = drop_prob1
        self.drop_prob2 = drop_prob2
        self.drop_prob3 = drop_prob3
        self.batch_size = batch_size
        self.input_shape = input_size
        self.output_shape = output_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.hidden_size3 = hidden_size3

    
    def forward(self, x):
        return self.regression(x)

In [22]:
learning_rate = 0.001
batch_size = 32
nb_epochs = 100
losses=[]

In [23]:
model = DNN(batch_size=batch_size,input_size=len_in,output_size=len_out)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
# train the model
model.train()

for epoch in range(nb_epochs):
    tot_losses=0
    for i in np.random.permutation(input_train.shape[0]//batch_size):
        # convert numpy array to torch tensor
        input = torch.from_numpy(input_train).float()
        output = torch.from_numpy(output_train).float()
        loss=0

        input_batch = input[i*batch_size:(i+1)*batch_size,:]
        output_batch = output[i*batch_size:(i+1)*batch_size,:]
        optimizer.zero_grad()
        # forward pass
        output_pred = model(input_batch)
        # compute loss
        loss += F.mse_loss(output_pred, output_batch, reduction='sum')
        tot_losses += loss.item()
        # backward pass
        loss.backward()
        optimizer.step()
    losses.append(tot_losses/(input_train.shape[0]//batch_size))
    if (epoch+1)%10==0:
        print('Epoch [{}/{}], Loss: {:.6f}'.format(epoch+1, nb_epochs, tot_losses/(input_train.shape[0]//batch_size)))

Epoch [10/1000], Loss: 3.898409
Epoch [20/1000], Loss: 3.299969
Epoch [30/1000], Loss: 3.356835
Epoch [40/1000], Loss: 3.262183
Epoch [50/1000], Loss: 3.168295
Epoch [60/1000], Loss: 2.997622
Epoch [70/1000], Loss: 2.935409
Epoch [80/1000], Loss: 2.178135
Epoch [90/1000], Loss: 1.876122
Epoch [100/1000], Loss: 1.394014
Epoch [110/1000], Loss: 1.059250
Epoch [120/1000], Loss: 1.078115
Epoch [130/1000], Loss: 0.920705
Epoch [140/1000], Loss: 0.740047
Epoch [150/1000], Loss: 0.677146
Epoch [160/1000], Loss: 0.638483
Epoch [170/1000], Loss: 0.653979
Epoch [180/1000], Loss: 0.663465
Epoch [190/1000], Loss: 0.691787
Epoch [200/1000], Loss: 0.442267
Epoch [210/1000], Loss: 0.512786
Epoch [220/1000], Loss: 0.535509
Epoch [230/1000], Loss: 0.425400
Epoch [240/1000], Loss: 0.407320
Epoch [250/1000], Loss: 0.374976
Epoch [260/1000], Loss: 0.374877
Epoch [270/1000], Loss: 0.388647
Epoch [280/1000], Loss: 0.322622
Epoch [290/1000], Loss: 0.360882
Epoch [300/1000], Loss: 0.258994
Epoch [310/1000], L

In [25]:
# test the model

# convert numpy array to torch tensor
input_test = torch.from_numpy(input_val).float()
output_test = torch.from_numpy(output_val).float()
predictions= torch.zeros(output_test.shape)
tot_losses=0

model.eval()
# prediction
output_pred = model(input_test)
# compute loss
loss = F.mse_loss(output_pred, output_test, reduction='sum')

print('Test loss: {:.6f}'.format(loss*batch_size/input_test.shape[0]))

Test loss: 0.311524


In [26]:
print(output_pred[-5:])
print(output_test[-5:])

tensor([[0.1531, 0.1564, 0.1598, 0.1614],
        [0.2477, 0.2538, 0.2582, 0.2626],
        [0.2980, 0.3049, 0.3104, 0.3164],
        [0.2896, 0.2966, 0.3018, 0.3076],
        [0.1728, 0.1770, 0.1803, 0.1827]], grad_fn=<SliceBackward>)
tensor([[0.1177, 0.1207, 0.1238, 0.1267],
        [0.2500, 0.2542, 0.2579, 0.2616],
        [0.3858, 0.3905, 0.3950, 0.3996],
        [0.2752, 0.2856, 0.2963, 0.3070],
        [0.1327, 0.1382, 0.1438, 0.1497]])
