# Race to low rms

Import latin-hypercube test set

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import netcdf_read_write as nrw
import training_data_generation as tdg
import utils_optimizers as uopt
import optimize as opt
import healpy as hp
import utils_intensity_map as uim
import utils_healpy as uhp
import utils_deck_generation as idg
%matplotlib inline
plt.ion()
np_complex = np.vectorize(complex)
run_dir = "Data"
num_modes = 30
num_inputs = 16
random_seed = 12345
display_steradians = False

sys_params = tdg.define_system_params(run_dir)

In [None]:
dataset, dataset_params, deck_gen_params, facility_spec = idg.load_data_dicts_from_file(sys_params)

opt_params = uopt.define_optimizer_parameters(run_dir, 0, 0, dataset_params, facility_spec, sys_params)
print(np.shape(dataset["rms"]))
num_sims = np.shape(dataset["rms"])[0]

In [None]:
num_evaluated = dataset["num_evaluated"]
total_outputs = np.shape(dataset["rms"])[0]*np.shape(dataset["rms"])[1]
num_zero_vals = np.count_nonzero(dataset["rms"]==0)
ind = np.where(dataset["rms"]==0)

num_failed = (2 * num_evaluated) - (total_outputs - num_zero_vals)
print("Number of failed outputs in the dataset (2 per config): ", num_failed)
print(ind[0])

ind_bug = np.where(dataset["avg_flux"][:,1] > 100.0)
num_failed = len(ind_bug[0])
print("Number of bugs occured in intensity/drive output (caused by CBET convergence): ", num_failed)
print(ind_bug[0])

## Evaluate distribution

In [None]:
ind_profile = 0

inputs_mean = np.mean(dataset["input_parameters"])
input_standard_deviation = np.sqrt(np.var(dataset["input_parameters"], axis=0))
#print(inputs_mean, input_standard_deviation)
fitness = uopt.fitness_function(dataset, opt_params)
number_of_timesteps = np.shape(dataset["rms"][:,:])[1]
rms_of_rms = np.sqrt(np.sum(dataset["rms"][:,:]**2, axis=1) / float(number_of_timesteps))

argmin_rms = np.argmin(dataset["rms"][:,ind_profile])
print(argmin_rms, fitness[argmin_rms], rms_of_rms[argmin_rms], dataset["avg_flux"][argmin_rms, ind_profile])
rms_mean = np.mean(dataset["rms"][:,ind_profile])
rms_standard_deviation = np.sqrt(np.var(dataset["rms"][:,ind_profile]))
print(rms_mean*100.0, rms_standard_deviation*100.0)

argmax_fitness = np.argmax(fitness[:])
print("Max fitness: ", argmax_fitness, fitness[argmax_fitness], rms_of_rms[argmax_fitness], dataset["avg_flux"][argmax_fitness, ind_profile])

argmax_drive = np.argmax(dataset["avg_flux"][:,ind_profile])
print("Max drive: ", argmax_drive, fitness[argmax_drive], rms_of_rms[argmax_drive], dataset["avg_flux"][argmax_drive, ind_profile])

argmin_rms = np.argmin(rms_of_rms)
print("Min RMS: ", argmin_rms, fitness[argmin_rms], rms_of_rms[argmin_rms], dataset["avg_flux"][argmin_rms, ind_profile])

argmin_rms0 = np.argmin(dataset["rms"][:,0])
print("Min RMS0: ", argmin_rms0, fitness[argmin_rms0], rms_of_rms[argmin_rms0], dataset["avg_flux"][argmin_rms0, ind_profile])

num_of_max = 3
ind = np.argpartition(fitness[:], -num_of_max)[-num_of_max:]
print("Max 3 fitnesses: ", ind)
print(fitness[ind])
print(dataset["rms"][ind, ind_profile])
print(dataset["avg_flux"][ind, ind_profile])
rms_of_rms = np.sqrt((dataset["rms"][:,0]**2+dataset["rms"][:,1]**2)/2.0)

arg_norm = argmax_fitness
start_ind = 0
x_markers = np.linspace(start_ind, num_sims, num_sims-start_ind)
plt.plot(x_markers,fitness[start_ind:]/fitness[arg_norm], label="fitness")
plt.plot(x_markers,rms_of_rms[start_ind:]/rms_of_rms[arg_norm], label="rms")
plt.plot(x_markers,(dataset["avg_flux"][start_ind:,ind_profile]/dataset["avg_flux"][arg_norm,ind_profile])**6, label="ablation pressure")
plt.legend()

fitness_mean = np.mean(fitness)
fitness_standard_deviation = np.sqrt(np.var(fitness))
print(fitness_mean, fitness_standard_deviation, np.max(fitness), np.min(fitness))
argmin_fitness = np.argmin(fitness[:])
print(argmin_fitness, dataset["rms"][argmin_fitness,ind_profile], dataset["avg_flux"][argmin_fitness,ind_profile])

In [None]:
arg_plot = argmax_fitness

#print(dataset["input_parameters"][arg_plot,:])
#print(deck_gen_params["sim_params"][arg_plot,:])

avg_flux = dataset["avg_flux"][arg_plot, ind_profile]
real_modes = dataset["real_modes"][arg_plot,ind_profile,:]
imag_modes = dataset["imag_modes"][arg_plot,ind_profile,:]
rms = dataset["rms"][arg_plot, ind_profile]

intensity_map_normalized = uhp.modes2imap(real_modes, imag_modes, dataset_params["imap_nside"])
intensity_map_sr = (intensity_map_normalized+1)*avg_flux

#print('Mean intensity per steradian, {:.2e}W/sr'.format(avg_flux))
print("Initial rms : ", dataset["rms"][arg_plot, 0]*100.0, "%")
print("Ablation pressure rms: ", dataset["rms"][arg_plot, 1]*100.0, "%")

if ind_profile == 0:
    print('Mean intensity per steradian, {:.2e}W/sr'.format(avg_flux))
    print('Power deposited {:.2f}TW, '.format(avg_flux * 4.0 * np.pi / 1.0e12))
    #print('Power emitted {:.2f}TW, '.format(facility_spec['default_power'] * facility_spec['nbeams']))
    if display_steradians:
        drive_map = intensity_map_sr
        drive_units = r"$\rm{W/sr}$"
    else:
        drive_map = (intensity_map_normalized+1)*avg_flux / (facility_spec['target_radius'] / 10000.0)**2
        drive_units = r"$\rm{W/cm^2}$"
else:
    drive_map = intensity_map_sr
    drive_units = r"$\rm{Mbar}$"
    print('Mean ablation pressure: {:.2f}Mbar, '.format(avg_flux))

intensity_map_normalized = uhp.modes2imap(real_modes, imag_modes, dataset_params["imap_nside"])
intensity_map_sr = (intensity_map_normalized+1)*avg_flux

hp.mollview(drive_map, unit=drive_units,flip="geo")
hp.graticule()

In [None]:
complex_modes = np_complex(real_modes, imag_modes)
power_spectrum = uhp.alms2power_spectrum(complex_modes, dataset_params["LMAX"])
print("The rms is: ", np.sqrt(np.sum(power_spectrum))*100.0, "%")

LMAX = dataset_params["LMAX"]
fig = plt.figure()
ax = plt.axes()
plt.plot(np.arange(LMAX), np.sqrt(power_spectrum) * 100.0)
ax.set_xticks(range(0, LMAX+1, int(LMAX/5)))
plt.xlim([0, LMAX])
plt.title("Modes")
plt.xlabel("l mode")
plt.ylabel(r"amplitude ($\%$)");

## Compare Optimization Techniques

In [None]:
dirs_for_comparison = ["Data",
                       "Data2"]
optimization_label = ["Data label",
                      "Data2 label"]
colours = mcolors.TABLEAU_COLORS
colour_keys = list(colours.keys())

ind_cutoff = 100000

In [None]:
fig1 = plt.figure(figsize=(6, 6), dpi=80)
ax1 = plt.axes()

fig2 = plt.figure(figsize=(6, 6), dpi=80)
ax2 = plt.axes()

data_set = 0
for run_dir in dirs_for_comparison:
    sys_params = tdg.define_system_params(run_dir)
    
    dataset = nrw.read_general_netcdf(sys_params["data_dir"] + "/" + sys_params["trainingdata_filename"])
    fitness = uopt.fitness_function(dataset, opt_params)

    window_size = 100
  
    i = 0
    # Initialize an empty list to store moving averages
    moving_min_rms = []
    moving_max_fitness = []
  
    # Loop through the array to consider every window of size 3
    while i < len(fitness) - window_size + 1:
        # taken from https://www.geeksforgeeks.org/how-to-calculate-moving-averages-in-python/

        window_min = np.min(dataset["rms"][i:i+window_size,ind_profile])
        moving_min_rms.append(window_min)

        window_max = np.max(fitness[i:i+window_size])
        moving_max_fitness.append(window_max)

        # Shift window to right by one position
        i += 1
    moving_min = np.array(moving_min_rms)
    moving_max = np.array(moving_max_fitness)
    
    num_examples = len(moving_min)
    array_examples = np.linspace(1,num_examples, num=num_examples).astype(int)

    ax1.semilogy(array_examples[:ind_cutoff], moving_min[:ind_cutoff] * 100.0, linestyle="solid", color=colours[colour_keys[data_set]], label=optimization_label[data_set])
    ax2.plot(array_examples[:ind_cutoff], moving_max[:ind_cutoff], linestyle="solid", color=colours[colour_keys[data_set]], label=optimization_label[data_set]),
    data_set += 1
ax1.legend();
ax1.set_ylim(0.9, 30)
ax1.set_xlabel("Number of simulations")
ax1.set_ylabel(r"Minimum RMS for moving window size: " + str(window_size) + r" ($\%$)");
fig1.savefig("Compare_optimization_techniques_rms" + sys_params["plot_file_type"], dpi=300, bbox_inches='tight')
ax2.legend();
ax2.set_xlabel("Number of simulations")
ax2.set_ylabel(r"Fitness function for moving window size: " + str(window_size))
fig2.savefig("Compare_optimization_techniques_fitness" + sys_params["plot_file_type"], dpi=300, bbox_inches='tight');
#fig2.savefig("Compare_optimization_techniques" + sys_params["plot_file_type"], dpi=300, bbox_inches='tight')

## Method 1, Brute force

In [None]:
##

## Method 2, Gradient descent

The partial derivative is determined using a 2*16=32 grid of points (2 points in every dimension) around the current minima. These points can be evaluated in either a NN or Ifriit depending on speed.

## Method 3, Use surrogate NN to pick low RMS from random inputs

## Method 4, Use inverse NN to indentify low rms by inputing other low rms cases

## Method 5, Genetic algorithm

Iterative procedure taking best features of first generation. Mutate and mix inputs between the best and produce subsequent generation.

## Method 6, Bayesian optimization

Gaussian process surrogate and bayesian optimization used with multiple sources of information. First we create a bayesian model with the true data points and select new simulations based on that. The model (Kriging method?) could use a "gaussian process approximation" to reduce computational expense.

Future extension: multi-source bayesian optimization, (ifriit is high quality source and NN is low quality)

## Method 7, Grid search algorithm

Split the entire search space into a grid (start coarse 2 or 3 cells per dimension) 3^16 = 43M. Evaluate each cell depending on the data points within or 8 nearest neighbours.

## Method 8, Network search algorithm

Find a gradient between all data points, use this information to initialize gradient descent

## Method 9, Principle Component Analysis (PCA)

Combine with gradient descent for faster convergence? Enables plotting of dataset in 2D

## Method 10, Transfer Learning 

Generate low quality large dataset (1-50M examples?) using surrogate NN and use this for transfer learning. This might help to evaluate at what stage transfer learning becomes effective (can we use it with a dataset of 1000 or 10000?)