## Train ResNets

### created by Yuying Liu, 04/30/2020

This script is a template for training neural network time-steppers for different systems and different time scales. To reproduce the results in the paper, one needs to obtain all 11 neural network models for each nonlinear system under study. For setup details, please refer to Table 2 in the paper.

In [1]:
import os
import sys
import torch
import numpy as np

import matplotlib.pyplot as plt
module_path = os.path.abspath(os.path.join('../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import ResNet as net

In [2]:
# adjustables

t = np.linspace(0, 8*np.pi, 500)
k = 0                         # model index: should be in {0, 2, ..., 10}
dt = t[1] - t[0]#0.01# 0.0005                     # time unit: 0.0005 for Lorenz and 0.01 for others
system = 'toy1_longer'#_4x4'         # system name: 'Hyperbolic', 'Cubic', 'VanDerPol', 'Hopf' or 'Lorenz'
# total_dim = 16
noise = 0.0                   # noise percentage: 0.00, 0.01 or 0.02

lr = 1e-3                     # learning rate
max_epoch = 100000            # the maximum training epoch 
batch_size = 320              # training batch size
# arch = [total_dim, 128, 128, 128, total_dim]  # architecture of the neural network

In [3]:
# paths
data_dir = os.path.join('../data/', system)
model_dir = os.path.join('../model/', system)

level = 0
filter_group_num = 0
encoded = np.load(os.path.join(data_dir, 'data_L{}_{}.npy'.format(level, filter_group_num)))

print(encoded.shape)
n_steps, _, n_per_dim, _ = encoded.shape
total_dim = n_per_dim **2
arch = [total_dim, 128, 128, 128, total_dim] 

# global const
n_forward = 5
step_size = 2**k

(5000, 1, 3, 3)


In [5]:
# load data
data = np.load(os.path.join(data_dir, "data_L0_0.npy "))#"train_data.npy"))#inputs_2x2.npy"))#'train_noise{}.npy'.format(noise)))

print(data.shape)

(100, 500, 4)
train_data = encoded[:2500,0,:,:].reshape((1,2500,9))
val_data = encoded[2500:3750,0,:,:].reshape((1,1250,9))
test_data = encoded[3750:,0,:,:].reshape((1,1250,9))

# print(np.load("../../data/Hyperbolic/train_noise0.0.npy").shape)
# data = np.expand_dims(data, 1)
# train_data = data[:75]
# val_data = data[75:90]
# test_data = data[90:]
# val_data = np.load(os.path.join(data_dir, 'val_noise{}.npy'.format(noise)))
# test_data = np.load(os.path.join(data_dir, 'test_noise{}.npy'.format(noise)))
print(train_data.shape)
print(val_data.shape)
print(test_data.shape)
n_train = train_data.shape[0]
n_val = val_data.shape[0]
n_test = test_data.shape[0]
print(n_train)
print(n_val)
print(n_test)

print(train_data[0,:,:])

print("step_size*n_forward+1 = ", step_size*n_forward+1)
# create dataset object
dataset = net.DataSet(train_data, val_data, test_data, dt, step_size, n_forward)

(5000, 1, 3, 3)
(1, 2500, 9)
(1, 1250, 9)
(1, 1250, 9)
1
1
1
[[0.7210292  0.73232096 0.7309484  ... 0.7134194  0.7242388  0.7210751 ]
 [0.7200456  0.7312804  0.72991467 ... 0.7106514  0.7213417  0.72009134]
 [0.71710485 0.72816896 0.726824   ... 0.7060499  0.71650434 0.7171498 ]
 ...
 [0.714909   0.72584575 0.72451633 ... 0.706664   0.7171077  0.7149534 ]
 [0.71881855 0.72998196 0.72862506 ... 0.7118338  0.72255224 0.71886396]
 [0.72078323 0.73206073 0.7306898  ... 0.71374124 0.7245696  0.72082907]]
step_size*n_forward+1 =  6


  return torch._C._cuda_getDeviceCount() > 0


In [None]:
# for i in range(100):
#     plt.plot(train_data[i,:,0], train_data[i,:,1], 'b')

In [7]:
for k in range(3):#3,8):#[0, 1, 2, 3, 4, 5]:
    print("k = ", k)
    step_size = 2**k
    dataset = net.DataSet(train_data, val_data, test_data, dt, step_size, n_forward)
    print(dataset.train_x.shape)
    model_name = 'model_D{}_noise{}_0.pt'.format(step_size, noise)

    # create/load model object
    try:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model = torch.load(os.path.join(model_dir, model_name), map_location=device)
        model.device = device
    except:
        print('create model {} ...'.format(model_name))
        model = net.ResNet(arch=arch, dt=dt, step_size=step_size)

    # training
    model.train_net(dataset, max_epoch=30000, batch_size=batch_size, lr=lr,
                    model_path=os.path.join(model_dir, model_name))

k =  0
torch.Size([1, 9])
create model model_D1_noise0.0_0.pt ...
self.n_dim=  9
dataset.n_dim =  9
epoch 1000, training loss 6.411835329345195e-06, validation loss 1.3198136912251357e-05
(--> new model saved @ epoch 1000)
epoch 2000, training loss 5.035839876654791e-06, validation loss 1.101105135603575e-05
(--> new model saved @ epoch 2000)
epoch 3000, training loss 4.23353321821196e-06, validation loss 9.77457057160791e-06
(--> new model saved @ epoch 3000)
epoch 4000, training loss 8.16800366010284e-06, validation loss 1.4408931747311726e-05
epoch 5000, training loss 4.42626378571731e-06, validation loss 1.0213742825726513e-05
epoch 6000, training loss 4.017501851194538e-06, validation loss 9.335253707831725e-06
(--> new model saved @ epoch 6000)
epoch 7000, training loss 2.960645133498474e-06, validation loss 7.593992449983489e-06
(--> new model saved @ epoch 7000)
epoch 8000, training loss 2.4726446099521127e-06, validation loss 6.771351308998419e-06
(--> new model saved @ epoch 

epoch 22000, training loss 6.587547886738321e-06, validation loss 2.6488316507311538e-05
epoch 23000, training loss 2.4113944618875394e-06, validation loss 4.0017577703110874e-05
epoch 24000, training loss 5.304891601554118e-07, validation loss 3.9581685996381566e-05
epoch 25000, training loss 2.0693228179879952e-06, validation loss 4.080068538314663e-05
epoch 26000, training loss 1.2589952120833914e-06, validation loss 3.1042123737279326e-05
epoch 27000, training loss 3.0307603537949035e-06, validation loss 3.150305929011665e-05
epoch 28000, training loss 1.8119228570867563e-06, validation loss 3.860657670884393e-05
epoch 29000, training loss 1.2578316272993106e-06, validation loss 4.703145896201022e-05
epoch 30000, training loss 1.3375926073422306e-06, validation loss 4.2929499613819644e-05


In [None]:
 

.