### Train ETH data to CNN generative network

In [None]:
!pip install -U "ray[data,train,tune,serve]"

In [None]:
%reload_ext autoreload
%autoreload 2
import numpy as np
import torch
import os
os.environ["PL_TORCH_DISTRIBUTED_BACKEND"] = "gloo"

if torch.cuda.device_count():
    device = 'cuda'
    use_gpu = True
    print('Good to go')
else:
    device = 'cpu'
    use_gpu = False
    print('Using cpu')

In [None]:
from ReadData import ReadCurrentAndField_CNN,add_gaussian_noise
import glob
import os 

# TODO zhoujing edit this Data loading 
# print(os.getcwd())
foldername="./Data/"
filepattern = "MagneticField[0-9]*.txt"
train_file_num= 1200
#data = ReadFolder(foldername,filepattern)
current,data = ReadCurrentAndField_CNN (foldername,filepattern,train_file_num)

fileList = glob.glob(foldername+filepattern)
position = data[:,0:3,2:18,2:18,2:18]
Bfield = data[:,3:,2:18,2:18,2:18]
noise = 0.05
# print(fileList)
print(data.shape)
print('current shape', current.shape)
print('Bfield shape', Bfield.shape)
current = add_gaussian_noise(current,noise=noise)
Bfield = add_gaussian_noise(Bfield,noise=noise)

In [None]:
from Neural_network import Generative_net,Generative_net_test ,ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset
###############################################
# Config the neural network
###############################################
num_input = 12
output_shape = (3,16,16,16)
SB_args = (64,64,1,4) # (Cin, Cout, num_repeat, num_block)
BB_args = (2,3) # (scale_factor, num_block)
SB_block = ResidualEMNSBlock_3d 
BB_block = BigBlock
DF = False # whether using divergence free model

Generative_network = Generative_net_test(SB_args, BB_args, SB_block, BB_block, num_input=num_input, output_shape= output_shape)
print(Generative_network)

from torchviz import make_dot
import torch.nn.functional as F
from Training_loop import grad_loss_Jacobain
x = torch.randn(2,8)
y = Bfield[0:2]
preds = Generative_network(x)
print(preds.shape)
loss =   F.l1_loss(preds,y)+grad_loss_Jacobain(preds,y)
        # optimizer.zero_grad() #zero out all of gradient
loss.backward()

make_dot(loss, params=dict(Generative_network.named_parameters()))


### Tune hyperparameters

In [None]:
from Neural_network import eMNS_Dataset
from Training_loop_v2 import train_GM
from ray.train import RunConfig, ScalingConfig, CheckpointConfig
from ray.train.torch import TorchTrainer,TorchConfig
from ray.tune.tuner import Tuner
from ray import tune
from ray.tune.schedulers import ASHAScheduler
import ray
import os
os.environ["PL_TORCH_DISTRIBUTED_BACKEND"] = "gloo"
# construct dataset
dataset = eMNS_Dataset(
    x=current,
    y=Bfield
)
# split the dataset to train, validation, test
train_set, valid_set, test_set = torch.utils.data.random_split(dataset, [0.85,0.1,0.05])

# normailzation
extremes = dataset.train_norm(train_indices = train_set.indices)

tune_schedule = ASHAScheduler(
        metric="rmse_val", # metric to optimize. This metric should be reported with tune.report()
        mode="min",
        max_t=350,
        grace_period=350, # minimum stop epoch
        reduction_factor=2,
    )
param_space = {
    "scaling_config": ScalingConfig(
        num_workers = 1,
        use_gpu = use_gpu,
        resources_per_worker = {"CPU":4, "GPU":0}
    ),
    # You can even grid search various datasets in Tune.
    # "datasets": {
    #     "train": tune.grid_search(
    #         [ds1, ds2]
    #     ),
    # },
    "train_loop_config": {
                'epochs': 350,
                'lr_max': tune.grid_search([1e-3,1e-4,5e-4]),
                'lr_min': tune.grid_search([1e-5,2.5e-6,2.5e-7]),
                'batch_size': 8,
                'L2_norm'   : 0,
                'verbose': False,
                'DF'     : False,
                'schedule': [],
                'grid_space': 16**3,
                'learning_rate_decay': 0.5,
                'skip_spacing': 2,
                'num_repeat'  : 1,
                'num_block'   : 3,
                'maxB'        : extremes[2],
                'minB'        : extremes[3],
                'train_set'   : train_set,
                'valid_set'   : valid_set,
                'num_input'   : 12,
            }

}

train_percents = np.arange(1.0,1.01,0.1)
RMSE_history_end = np.zeros(len(train_percents))
RMSE_val_history_end = np.zeros(len(train_percents))
loss_history_end = np.zeros(len(train_percents))
iter_history_end = np.zeros(len(train_percents))
mse_history_end = np.zeros(len(train_percents))
mse_val_history_end = np.zeros(len(train_percents))
train_stop_epoch = np.zeros(len(train_percents))

################################################
# Train the neural network
################################################

train_loop_config = {
                'epochs': 350,
                'lr_max': 5e-4,
                'lr_min': 2.5e-6,
                'batch_size': 8,
                'L2_norm'   : 0,
                'verbose': False,
                'DF'     : False,
                'schedule': [],
                'grid_space': 16**3,
                'learning_rate_decay': 0.5,
                'skip_spacing': 2,
                'num_repeat'  : 1,
                'num_block'   : 3,
                'maxB'        : extremes[2],
                'minB'        : extremes[3],
                'device'      : device,
                'train_set'   : train_set,
                'valid_set'   : valid_set,
                'num_input'   : 12,
                # You can even grid search various datasets in Tune.
                # "datasets": tune.grid_search(
                #         [ds1, ds2]
                #     ),
}

scaling_config = ScalingConfig(
    num_workers = 1,
    use_gpu = use_gpu,
    # resources_per_worker = {"CPU":4, "GPU":1}
)

run_config = RunConfig(checkpoint_config=CheckpointConfig(num_to_keep=1))#,storage_path='D:\Qubot\Trained_model', 
                       #name='EMS_CNN_'+'s_'+str(train_loop_config['skip_spacing'])+'r_'+str(train_loop_config['num_repeat'])+'b_'+str(train_loop_config['num_block']) )
#
# def train_loop_per_worker(params):
#     train_GM(train_set=train_set, valid_set=valid_set,  device=device, config=params)
torch_config = TorchConfig(backend="gloo")
trainer = TorchTrainer(
    train_loop_per_worker = train_GM,
    train_loop_config = train_loop_config,
    torch_config=torch_config,
    scaling_config = scaling_config,
    run_config = run_config,

)
# train the model
result = trainer.fit()
#----------------------------------------------
# tuner = tune.Tuner(
#     trainer,
#     param_space = param_space,
#     tune_config =tune.TuneConfig(
#         scheduler=tune_schedule,
#         num_samples=1, # number of samples of hyperparameter space
#     ),
#     # run_config = RunConfig(checkpoint_config=CheckpointConfig(num_to_keep=2),storage_path="/home/qubot/ray_results", name="test_experiment"),
                            # checkpoint_score_attribute='rmse_val', checkpoint_score_order='min
# )
# # tune the model    
# results = tuner.fit()

In [None]:
from utils import plot_ray_results
print(result)

plot_ray_results(result, metrics_names=['rmse_train','rmse_val'],ylim=[0,5])

In [None]:
best_result = results.get_best_result(metric='rmse_val',mode='min')
print(best_result)

In [None]:
from utils import plot_ray_results
plot_ray_results(best_result, metrics_names=['rmse_train','rmse_val'])

In [None]:
!tensorboard --logdir=~/ray_results

In [None]:

from utils import estimate_test_set 
test_estimator = estimate_test_set(result.checkpoint, test_set, train_loop_config)
test_estimator.fit()
test_estimator.peek_z(10)

In [None]:
test_estimator.peek_3D(length=0.15)

## Old version of training loop

In [None]:
from Neural_network import Generative_net, Generative_net_test, ResidualEMNSBlock_3d, BigBlock, weight_init, eMNS_Dataset
from Training_loop_v2 import train_GM
from tqdm import tqdm

# construct dataset
dataset = eMNS_Dataset(
    train_x=current,
    train_y=Bfield
)

config = {
    'epochs': 350,
    'lr_max': 1e-4,
    'lr_min': 2.5e-6,
    'batch_size': 8,
    'L2_norm'   : 0,
    'verbose': False,
    'DF'     : False,
    'schedule': [],
    'grid_space': 16**3,
    'learning_rate_decay': 0.5,
    'skip_spacing': 2,
    'num_repeat'  : 2,
    'num_block'   : 3,
    'device'      : device,
    'num_input'   : 12,
}
train_percents = np.arange(1.0,1.01,0.1)
RMSE_history_end = np.zeros(len(train_percents))
RMSE_val_history_end = np.zeros(len(train_percents))
loss_history_end = np.zeros(len(train_percents))
iter_history_end = np.zeros(len(train_percents))
mse_history_end = np.zeros(len(train_percents))
mse_val_history_end = np.zeros(len(train_percents))
train_stop_epoch = np.zeros(len(train_percents))

################################################
# Train the neural network
################################################
index=0
for train_percent in train_percents:
    epoch_stop = 0
    print('train_percent',train_percent)

    # split the dataset to train, validation, test
    train_set, valid_set = torch.utils.data.random_split(dataset, [0.9,0.1])

    # normailzation
    extremes = dataset.train_norm(train_indices = train_set.indices)

    config['maxB'] = extremes[2]
    config['minB'] = extremes[3]
    config['train_set'] = train_set 
    config['valid_set'] = valid_set



    print("----------------------------")
    
    print("----------------------------")
    # test_loader = torch.utils.data.DataLoader(dataset=test_set,batch_size=batch_size,shuffle=True)


    
    RMSE_history, RMSE_val_history, loss_history, iter_history, mse_history, mse_val_history,epoch_stop,Rsquare = train_GM(
        config=config)
        
    
    #save RMSE and loss after early stopping
    RMSE_history_end[index] = RMSE_history[epoch_stop]
    RMSE_val_history_end[index]= RMSE_val_history[epoch_stop]
    loss_history_end[index] = loss_history[epoch_stop]
    iter_history_end[index] = iter_history[epoch_stop]
    mse_history_end[index] = mse_history[epoch_stop]
    mse_val_history_end[index] = mse_val_history[epoch_stop]
    index=index+1
    print('training stop at epoch:',epoch_stop)
    print('training stop at epoch:',Rsquare)


In [None]:
torch.save(Generative_network, 'EMS_CNN_ETH.pt')	# 这里会存储迄今最优模型的参数

In [None]:

import matplotlib.pyplot as plt
import numpy as np
ave_site = 5
ave_kernel = 1/ave_site*np.ones(ave_site)
loss_history_conv = np.convolve(loss_history.numpy(),ave_kernel,'same')


plt.title('loss')
plt.plot(iter_history,loss_history,'-o')
plt.plot(iter_history,loss_history_conv,'-*')
plt.legend(['loss','loss_conv'])
plt.xlabel('iterations')
plt.ylabel('loss')
plt.ylim([0,10])
plt.show()

plt.title('Train and Val RMSE(sample_num=1000)')
plt.plot(iter_history[0:epoch_stop],RMSE_history[0:epoch_stop],'-o')
plt.plot(iter_history[0:epoch_stop],RMSE_val_history[0:epoch_stop],'-*')
# plt.plot(2e-5*np.arange(epoch_stop),RMSE_history[0:epoch_stop]*1000,'-o')
# plt.plot(2e-5*np.arange(epoch_stop),RMSE_val_history[0:epoch_stop]*1000,'-*')
# plt.ylim([15,20])
plt.legend(['train CNN','val CNN'])
plt.xlabel('iterations')
plt.ylabel('RMSE(mT)')
plt.ylim([0,100])
plt.grid()
plt.show()

plt.title('Train and Val loss(sample_num=1000)')
plt.plot(iter_history[0:epoch_stop],mse_history[0:epoch_stop]*1e6,'-o')
plt.plot(iter_history[0:epoch_stop],mse_val_history[0:epoch_stop]*1e6,'-*')
plt.legend(['train CNN','val CNN'])
plt.xlabel('iterations')
plt.ylabel('mse(mT^2)')
plt.grid()
plt.show()
print(epoch_stop)

