# Race to low rms

Import latin-hypercube test set

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import netcdf_read_write as nrw
import training_data_generation as tdg
import utils_optimizers as uopt
import optimize as opt
%matplotlib inline
plt.ion()
run_dir = "Data_input"
trainingdata_filename = "flipped_training_data_and_labels.nc"
num_modes = 30
num_inputs = 16
random_seed = 12345

In [None]:
sys_params = tdg.define_system_params(run_dir)
sys_params["trainingdata_filename"] = trainingdata_filename
X_all, Y_all, avg_powers_all = nrw.import_training_data(sys_params)

In [None]:
target_all = np.sqrt(np.sum(Y_all**2, axis=0))

target_mean = np.mean(target_all)
target_variance = np.sqrt(np.var(target_all))
print(target_mean*100.0, target_variance*100.0)

input_means = np.mean(X_all, axis=1)
input_standard_deviation = np.sqrt(np.var(X_all, axis=1))
print(input_means*100.0, input_standard_deviation*100.0)

In [None]:
print(np.shape(X_all[:,0]), np.shape(Y_all[:,0]))
mindex = np.argmin(np.mean(Y_all, axis=0))
print(mindex)
print(np.sum(Y_all[:,mindex]))
print(target_all[mindex])
mindex = np.argmin(target_all)
print(mindex)
print(np.sum(Y_all[:,mindex]))
print(target_all[mindex])

num_init_examples = np.shape(X_all)[1]
print(num_init_examples)

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot(np.arange(num_modes), Y_all[:,mindex] * 100.0)
ax.set_xticks(range(0, num_modes+1, int(num_modes/5)))
plt.xlim([0, num_modes])
plt.title("Unweighted Modes")
plt.xlabel("l mode")
plt.ylabel(r"amplitude ($\%$)");

## Evaluate Minimisation

In [None]:
dirs_for_comparison = ["Data_221122a_ga_1kex_bo_gd", "Data_221125d_all_gradientdescent_1000ex","Data_221116c_ga_10kex_100x100_fewer_parents","Data_221201a_all_bayesian_5000ex_gd_1000ex"]#"Data_221125c_all_bayesian_5000ex",
colours = ["b","g","r","m","c"]
optimization_label = ["Mixed","All gradient descent","All genetic algorithm","Bayesian with gradient descent"]#,"All bayesian"

In [None]:
plt.figure(figsize=(8, 6), dpi=80)
ax = plt.axes()

plt.xlabel("Number of simulations")
plt.ylabel(r"RMS Statistics for moving window size: " + str(window_size) + r" ($\%$)");
data_set = 0
for run_dir in dirs_for_comparison:
    sys_params = tdg.define_system_params(run_dir)
    sys_params["trainingdata_filename"] = trainingdata_filename
    X_all, Y_all, avg_powers_all = nrw.import_training_data(sys_params)
    target_all = np.sqrt(np.sum(Y_all**2, axis=0))

    target_mean = np.mean(target_all)
    target_variance = np.sqrt(np.var(target_all))

    input_means = np.mean(X_all, axis=1)
    input_standard_deviation = np.sqrt(np.var(X_all, axis=1))

    window_size = 100
  
    i = 0
    # Initialize an empty list to store moving averages
    moving_averages = []
    moving_min = []
  
    # Loop through the array t o
    #consider every window of size 3
    while i < len(target_all) - window_size + 1:
        # taken from https://www.geeksforgeeks.org/how-to-calculate-moving-averages-in-python/
  
        window_average = np.sum(target_all[i:i+window_size]) / window_size
        moving_averages.append(window_average)
    
        window_min = np.min(target_all[i:i+window_size])
        moving_min.append(window_min)
      
        # Shift window to right by one position
        i += 1
    moving_averages = np.array(moving_averages)
    moving_min = np.array(moving_min)
    
    plt.semilogy(moving_averages * 100.0, linestyle="dotted", color=colours[data_set], label=optimization_label[data_set]+" (mean)")
    plt.semilogy(moving_min * 100.0, linestyle="solid", color=colours[data_set], label=optimization_label[data_set]+" (min)")
    data_set += 1
plt.legend();
plt.savefig("Compare_optimization_techniques" + sys_params["plot_file_type"], dpi=300, bbox_inches='tight')

## Method 1, Brute force

In [None]:
##

## Method 2, Gradient descent

The partial derivative is determined using a 2*16=32 grid of points (2 points in every dimension) around the current minima. These points can be evaluated in either a NN or Ifriit depending on speed.

In [None]:
root_dir = "Data_output"
num_parallel = 11
n_iter = 10
optimizer_params = uopt.define_optimizer_parameters(root_dir, num_inputs, num_modes,
                                                    num_init_examples, n_iter, num_parallel, random_seed)
dataset = uopt.define_optimizer_dataset(X_all, Y_all, avg_powers_all)
gd_params = uopt.define_gradient_descent_params(num_parallel)
dataset = opt.wrapper_gradient_descent(dataset, gd_params, optimizer_params)

## Method 3, Use surrogate NN to pick low RMS from random inputs

## Method 4, Use inverse NN to indentify low rms by inputing other low rms cases

## Method 5, Genetic algorithm

Iterative procedure taking best features of first generation. Mutate and mix inputs between the best and produce subsequent generation.

In [None]:
root_dir = "Data_output"
n_iter = 10
init_points = 10
num_parallel = 10
X_all = np.array([], dtype=np.int64).reshape(num_inputs,0)
Y_all= np.array([], dtype=np.int64).reshape(num_modes,0)
avg_powers_all = np.array([], dtype=np.int64)

num_parents_mating = int(init_points / 10.0)
if (num_parents_mating % 2) != 0:
    num_parents_mating -=1
if num_parents_mating < 2:
    num_parents_mating = 2

In [None]:
num_init_examples = 0 # genetic algorithm generates it's own intial data
opt_params = uopt.define_optimizer_parameters(root_dir, num_inputs, num_modes,
                                              num_init_examples, n_iter, num_parallel, random_seed)
dataset = uopt.define_optimizer_dataset(X_all, Y_all, avg_powers_all)
ga_params = uopt.define_genetic_algorithm_params(init_points, num_parents_mating)
dataset = opt.wrapper_genetic_algorithm(dataset, ga_params, opt_params)

## Method 6, Bayesian optimization

Gaussian process surrogate and bayesian optimization used with multiple sources of information. First we create a bayesian model with the true data points and select new simulations based on that. The model (Kriging method?) could use a "gaussian process approximation" to reduce computational expense.

Future extension: multi-source bayesian optimization, (ifriit is high quality source and NN is low quality)

In [None]:
root_dir = "Data_output"
n_iter = 10
num_parallel = 10
target_set_undetermined = - target_mean / 2.0
opt_params = uopt.define_optimizer_parameters(root_dir, num_inputs, num_modes,
                                              num_init_examples, n_iter, num_parallel, random_seed)
dataset = uopt.define_optimizer_dataset(X_all, Y_all, avg_powers_all)
bo_params = uopt.define_bayesian_optimisation_params(target_set_undetermined)
dataset = opt.wrapper_bayesian_optimisation(dataset, bo_params, opt_params)

## Method 7, Grid search algorithm

Split the entire search space into a grid (start coarse 2 or 3 cells per dimension) 3^16 = 43M. Evaluate each cell depending on the data points within or 8 nearest neighbours.

## Method 8, Network search algorithm

Find a gradient between all data points, use this information to initialize gradient descent

## Method 9, Principle Component Analysis (PCA)

Combine with gradient descent for faster convergence? Enables plotting of dataset in 2D

## Method 10, Transfer Learning 

Generate low quality large dataset (1-50M examples?) using surrogate NN and use this for transfer learning. This might help to evaluate at what stage transfer learning becomes effective (can we use it with a dataset of 1000 or 10000?)