### Train ETH data to CNN generative network

In [None]:
%reload_ext autoreload
%autoreload 2
import numpy as np
import torch
if torch.cuda.device_count():
    device = 'cuda'
    use_gpu = True
    print('Good to go')
else:
    device = 'cpu'
    use_gpu = False
    print('Using cpu')

In [None]:
from ReadData import ReadETHFolder, ReadETHFile
foldername="./ETH_Data/v/"
currentname = "./ETH_Data/"+"currents_3787.h5"
file_num = 300
data_shape = (16,16,16,3)
Bfield = torch.tensor(ReadETHFolder(foldername,file_num, data_shape)).permute(0,4,1,2,3)
current = torch.tensor(ReadETHFile(currentname))
current = current[0:Bfield.shape[0],:]

print(Bfield.shape)
print(current.shape)

In [None]:
from Neural_network import Generative_net,Generative_net_test ,ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset
###############################################
# Config the neural network
###############################################
num_input = 8
output_shape = (3,16,16,16)
SB_args = (64,64,1,4) # (Cin, Cout, num_repeat, num_block)
BB_args = (2,3) # (scale_factor, num_block)
SB_block = ResidualEMNSBlock_3d 
BB_block = BigBlock
DF = False # whether using divergence free model

Generative_network = Generative_net_test(SB_args, BB_args, SB_block, BB_block, num_input=num_input, output_shape= output_shape)
print(Generative_network)

from torchviz import make_dot
import torch.nn.functional as F
from Training_loop import grad_loss_Jacobain
x = torch.randn(2,8)
y = Bfield[0:2]
preds = Generative_network(x)
print(preds.shape)
loss =   F.l1_loss(preds,y)+grad_loss_Jacobain(preds,y)
        # optimizer.zero_grad() #zero out all of gradient
loss.backward()

make_dot(loss, params=dict(Generative_network.named_parameters()))


### Tune hyperparameters

In [None]:
from Neural_network import Generative_net, Generative_net_test, ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset
from Training_loop_v2 import train_GM
from functools import partial
from ray.train import RunConfig, ScalingConfig, CheckpointConfig
from ray.train.torch import TorchTrainer
from ray.tune.tuner import Tuner
from ray import tune
from ray.tune.schedulers import ASHAScheduler
import ray

# construct dataset
dataset = eMNS_Dataset(
    train_x=current,
    train_y=Bfield
)
# split the dataset to train, validation, test
train_set, valid_set = torch.utils.data.random_split(dataset, [0.9,0.1])

# normailzation
extremes = dataset.train_norm(train_indices = train_set.indices)

tune_schedule = ASHAScheduler(
        metric="rmse_val", # metric to optimize. This metric should be reported with tune.report()
        mode="min",
        max_t=120,
        grace_period=10, # minimum stop epoch
        reduction_factor=2,
    )
param_space = {
    "scaling_config": ScalingConfig(
        num_workers = 1,
        use_gpu = use_gpu,
        resources_per_worker = {"CPU":10, "GPU":2}
    ),
    # You can even grid search various datasets in Tune.
    # "datasets": {
    #     "train": tune.grid_search(
    #         [ds1, ds2]
    #     ),
    # },
    "train_loop_config": {
                'epochs': tune.choice([350]),
                'lr_max': tune.loguniform(1e-4,1e-2),
                'lr_min': tune.loguniform(1e-5,1e-7),
                'batch_size': tune.choice([4,8,16]),
                'L2_norm'   : tune.choice([0]),
                'verbose': False,
                'DF'     : tune.choice([True,False]),
                'schedule': [],
                'grid_space': 16**3,
                'learning_rate_decay': 0.5,
                'skip_spacing': tune.choice([1,2,4]),
                'num_repeat'  : tune.choice([1,2,4]),
                'num_block'   : tune.choice([1,2,3]),
                'maxB'        : extremes[2],
                'minB'        : extremes[3],
                'train_set'   : train_set,
                'valid_set'   : valid_set,
            }

}

train_percents = np.arange(1.0,1.01,0.1)
RMSE_history_end = np.zeros(len(train_percents))
RMSE_val_history_end = np.zeros(len(train_percents))
loss_history_end = np.zeros(len(train_percents))
iter_history_end = np.zeros(len(train_percents))
mse_history_end = np.zeros(len(train_percents))
mse_val_history_end = np.zeros(len(train_percents))
train_stop_epoch = np.zeros(len(train_percents))

################################################
# Train the neural network
################################################

train_loop_config = {
                'epochs': 10,
                'lr_max': 1e-4,
                'lr_min': 2.5e-6,
                'batch_size': 8,
                'L2_norm'   : 0,
                'verbose': False,
                'DF'     : False,
                'schedule': [],
                'grid_space': 16**3,
                'learning_rate_decay': 0.5,
                'skip_spacing': 1,
                'num_repeat'  : 4,
                'num_block'   : 2,
                'maxB'        : extremes[2],
                'minB'        : extremes[3],
                'device'      : device,
                'train_set'   : train_set,
                'valid_set'   : valid_set
                # You can even grid search various datasets in Tune.
                # "datasets": tune.grid_search(
                #         [ds1, ds2]
                #     ),
}

scaling_config = ScalingConfig(
    num_workers = 2,
    use_gpu = use_gpu,
    resources_per_worker = {"CPU":4, "GPU":1}
)

run_config = RunConfig(checkpoint_config=CheckpointConfig(num_to_keep=1))

# def train_loop_per_worker(params):
#     train_GM(train_set=train_set, valid_set=valid_set,  device=device, config=params)

trainer = TorchTrainer(
    train_loop_per_worker = train_GM,
    train_loop_config = train_loop_config,
    scaling_config = scaling_config,
    run_config = run_config,

)
# result = trainer.fit()
tuner = tune.Tuner(
    trainer,
    param_space = param_space,
    tune_config =tune.TuneConfig(
        scheduler=tune_schedule,
        num_samples=30, # number of samples of hyperparameter space
    ),
    # run_config = RunConfig(storage_path="./results", name="test_experiment")
)
    
results = tuner.fit()

In [None]:
best_result = results.get_best_result(metric='rmse_val',mode='min')

In [None]:
from utils import plot_ray_results
plot_ray_results(best_result, metrics_names=['rmse_train','rmse_val'])

In [None]:
plot_ray_results(results, metrics_names=['rmse_train','rmse_val'],ylim=[20,50])

In [None]:
!tensorboard --logdir=~/ray_results

In [None]:
from Neural_network import Generative_net, Generative_net_test, ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset
from Training_loop_v2 import train_GM
from tqdm import tqdm

# construct dataset
dataset = eMNS_Dataset(
    train_x=current,
    train_y=Bfield
)

config = {
    'epochs': 10,
    'lr_max': 1e-4,
    'lr_min': 2.5e-6,
    'batch_size': 8,
    'L2_norm'   : 0,
    'verbose': False,
    'DF'     : True,
    'schedule': [],
    'grid_space': 16**3,
    'learning_rate_decay': 0.5,
    'skip_spacing': 1,
    'num_repeat'  : 4,
    'num_block'   : 2,
    'device'      : device,
}
train_percents = np.arange(1.0,1.01,0.1)
RMSE_history_end = np.zeros(len(train_percents))
RMSE_val_history_end = np.zeros(len(train_percents))
loss_history_end = np.zeros(len(train_percents))
iter_history_end = np.zeros(len(train_percents))
mse_history_end = np.zeros(len(train_percents))
mse_val_history_end = np.zeros(len(train_percents))
train_stop_epoch = np.zeros(len(train_percents))

################################################
# Train the neural network
################################################
index=0
for train_percent in train_percents:
    epoch_stop = 0
    print('train_percent',train_percent)

    # split the dataset to train, validation, test
    train_set, valid_set = torch.utils.data.random_split(dataset, [0.9,0.1])

    # normailzation
    extremes = dataset.train_norm(train_indices = train_set.indices)

    config['maxB'] = extremes[2]
    config['minB'] = extremes[3]
    config['train_set'] = train_set 
    config['valid_set'] = valid_set



    print("----------------------------")
    
    print("----------------------------")
    # test_loader = torch.utils.data.DataLoader(dataset=test_set,batch_size=batch_size,shuffle=True)


    
    RMSE_history, RMSE_val_history, loss_history, iter_history, mse_history, mse_val_history,epoch_stop,Rsquare = train_GM(
        config=config)
        
    
    #save RMSE and loss after early stopping
    RMSE_history_end[index] = RMSE_history[epoch_stop]
    RMSE_val_history_end[index]= RMSE_val_history[epoch_stop]
    loss_history_end[index] = loss_history[epoch_stop]
    iter_history_end[index] = iter_history[epoch_stop]
    mse_history_end[index] = mse_history[epoch_stop]
    mse_val_history_end[index] = mse_val_history[epoch_stop]
    index=index+1
    print('training stop at epoch:',epoch_stop)
    print('training stop at epoch:',Rsquare)


In [None]:
torch.save(Generative_network, 'EMS_CNN_ETH.pt')	# 这里会存储迄今最优模型的参数

In [None]:

import matplotlib.pyplot as plt
import numpy as np
ave_site = 5
ave_kernel = 1/ave_site*np.ones(ave_site)
loss_history_conv = np.convolve(loss_history.numpy(),ave_kernel,'same')


plt.title('loss')
plt.plot(iter_history,loss_history,'-o')
plt.plot(iter_history,loss_history_conv,'-*')
plt.legend(['loss','loss_conv'])
plt.xlabel('iterations')
plt.ylabel('loss')
plt.ylim([0,10])
plt.show()

plt.title('Train and Val RMSE(sample_num=1000)')
plt.plot(iter_history[0:epoch_stop],RMSE_history[0:epoch_stop],'-o')
plt.plot(iter_history[0:epoch_stop],RMSE_val_history[0:epoch_stop],'-*')
# plt.plot(2e-5*np.arange(epoch_stop),RMSE_history[0:epoch_stop]*1000,'-o')
# plt.plot(2e-5*np.arange(epoch_stop),RMSE_val_history[0:epoch_stop]*1000,'-*')
# plt.ylim([15,20])
plt.legend(['train CNN','val CNN'])
plt.xlabel('iterations')
plt.ylabel('RMSE(mT)')
plt.ylim([0,100])
plt.grid()
plt.show()

plt.title('Train and Val loss(sample_num=1000)')
plt.plot(iter_history[0:epoch_stop],mse_history[0:epoch_stop]*1e6,'-o')
plt.plot(iter_history[0:epoch_stop],mse_val_history[0:epoch_stop]*1e6,'-*')
plt.legend(['train CNN','val CNN'])
plt.xlabel('iterations')
plt.ylabel('mse(mT^2)')
plt.grid()
plt.show()
print(epoch_stop)

