## Train ResNets

### created by Yuying Liu, 04/30/2020

This script is a template for training neural network time-steppers for different systems and different time scales. To reproduce the results in the paper, one needs to obtain all 11 neural network models for each nonlinear system under study. For setup details, please refer to Table 2 in the paper.

In [1]:
import os
import sys
import torch
import numpy as np

import matplotlib.pyplot as plt
module_path = os.path.abspath(os.path.join('../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import ResNet as net

In [2]:
# adjustables


t = np.linspace(0, 8*np.pi, 500)
k = 0                         # model index: should be in {0, 2, ..., 10}
dt = t[1] - t[0]              # time unit: 0.0005 for Lorenz and 0.01 for others
system = 'toy1_longer'
noise = 0.0                   # noise percentage: 0.00, 0.01 or 0.02

lr = 1e-3                     # learning rate
max_epoch = 100000            # the maximum training epoch 
batch_size = 320              # training batch size
min_k = 0
max_k = 5

In [3]:
# paths
data_dir = os.path.join('../data/', system)
model_dir = os.path.join('../model/', system)

level = 0
filter_group_num = 0
encoded = np.load(os.path.join(data_dir, 'data_L{}_{}.npy'.format(level, filter_group_num)))

print(encoded.shape)
n_steps, _, n_per_dim, _ = encoded.shape
total_dim = n_per_dim **2
arch = [total_dim, 128, 128, 128, total_dim] 

# global const
n_forward = 5
step_size = 2**k

(5000, 1, 3, 3)


In [4]:
print(encoded.shape)

(5000, 1, 3, 3)


In [6]:
#divide data
percent_train = 0.6
percent_val = 0.2
percent_test = 1-percent_train-percent_val
assert percent_test >= 0


train_break = int(n_steps * percent_train)
val_break = int(n_steps * (percent_train + percent_val))

train_data = encoded[:train_break,0,:,:]
train_data = train_data.reshape((1,len(train_data),n_per_dim**2))

val_data = encoded[train_break:val_break,0,:,:] 
val_data = val_data.reshape((1,len(val_data),n_per_dim**2))
test_data = encoded[val_break:,0,:,:]
test_data = test_data.reshape((1,len(test_data),n_per_dim**2))


print(train_data.shape)
print(val_data.shape)
print(test_data.shape)
n_train = train_data.shape[0]
n_val = val_data.shape[0]
n_test = test_data.shape[0]
print(n_train)
print(n_val)
print(n_test)


print(n_forward)
print("step_size*n_forward+1 = ", step_size*n_forward+1)
# create dataset object
dataset = net.DataSet(train_data, val_data, test_data, dt, step_size, n_forward)

(1, 3000, 9)
(1, 1000, 9)
(1, 1000, 9)
1
1
1
[[0.7210292  0.73232096 0.7309484  ... 0.7134194  0.7242388  0.7210751 ]
 [0.7200456  0.7312804  0.72991467 ... 0.7106514  0.7213417  0.72009134]
 [0.71710485 0.72816896 0.726824   ... 0.7060499  0.71650434 0.7171498 ]
 ...
 [0.71538645 0.72635096 0.72501814 ... 0.7073065  0.7177841  0.71543103]
 [0.719103   0.730283   0.7289241  ... 0.71217996 0.72291726 0.71914846]
 [0.72087187 0.7321544  0.7307829  ... 0.71374214 0.7245721  0.72091764]]
5
step_size*n_forward+1 =  6


  return torch._C._cuda_getDeviceCount() > 0


In [11]:
for k in range(min_k, max_k)
    print("k = ", k)
    step_size = 2**k
    dataset = net.DataSet(train_data, val_data, test_data, dt, step_size, n_forward)
    print(dataset.train_x.shape)
    model_name = 'model_D{}_noise{}_0.pt'.format(step_size, noise)

    # create/load model object
    try:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model = torch.load(os.path.join(model_dir, model_name), map_location=device)
        model.device = device
    except:
        print('create model {} ...'.format(model_name))
        model = net.ResNet(arch=arch, dt=dt, step_size=step_size)

    # training
    model.train_net(dataset, max_epoch=30000, batch_size=batch_size, lr=lr,
                    model_path=os.path.join(model_dir, model_name))

k =  8
torch.Size([1, 9])
create model model_D256_noise0.0_0.pt ...
self.n_dim=  9
dataset.n_dim =  9
epoch 1000, training loss 4.0479997551301494e-05, validation loss 4.8220685130218044e-05
(--> new model saved @ epoch 1000)
epoch 2000, training loss 2.5171135348500684e-05, validation loss 7.928694685688242e-05
epoch 3000, training loss 2.1627847672789358e-05, validation loss 8.024937415029854e-05
epoch 4000, training loss 2.6440631700097583e-05, validation loss 6.376430974341929e-05
epoch 5000, training loss 0.00015018825070001185, validation loss 0.0002995535032823682
epoch 6000, training loss 2.2553202143171802e-05, validation loss 4.475070454645902e-05
(--> new model saved @ epoch 6000)
epoch 7000, training loss 3.471951276878826e-05, validation loss 7.577236101496965e-05
epoch 8000, training loss 0.000160580690135248, validation loss 4.3616404582280666e-05
(--> new model saved @ epoch 8000)
epoch 9000, training loss 4.702523256128188e-06, validation loss 4.647011883207597e-05
epo

AssertionError: 

In [None]:
 

.