# Race to low rms

Import latin-hypercube test set

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import netcdf_read_write as nrw
import training_data_generation as tdg
import utils_optimizers as uopt
import optimize as opt
import healpy as hp
import utils_intensity_map as uim
import utils_healpy as uhp
%matplotlib inline
plt.ion()
np_complex = np.vectorize(complex)
run_dir = "Data"
num_modes = 30
num_inputs = 16
random_seed = 12345

sys_params = tdg.define_system_params(run_dir)

In [None]:
dataset_params = nrw.read_general_netcdf(sys_params["root_dir"] + "/" + sys_params["dataset_params_filename"])
facility_spec = nrw.read_general_netcdf(sys_params["root_dir"] + "/" + sys_params["facility_spec_filename"])
dataset = nrw.read_general_netcdf(sys_params["root_dir"] + "/" + sys_params["trainingdata_filename"])

opt_params = uopt.define_optimizer_parameters(run_dir, dataset_params["num_input_params"], 0, 0, dataset_params["random_seed"], facility_spec)
print(np.shape(dataset["rms"]))

## Evaluate distribution

In [None]:
inputs_mean = np.mean(dataset["input_parameters"])
input_standard_deviation = np.sqrt(np.var(dataset["input_parameters"], axis=1))
print(inputs_mean, input_standard_deviation)

argmin_rms = np.argmin(dataset["rms"])
print(argmin_rms, dataset["rms"][argmin_rms])
rms_mean = np.mean(dataset["rms"])
rms_standard_deviation = np.sqrt(np.var(dataset["rms"]))
print(rms_mean*100.0, rms_standard_deviation*100.0)

fitness = uopt.fitness_function(dataset, opt_params)
argmax_fitness = np.argmax(fitness[:1100])
print(argmax_fitness, dataset["rms"][argmax_fitness])
fitness_mean = np.mean(fitness)
fitness_standard_deviation = np.sqrt(np.var(fitness))
print(fitness_mean, fitness_standard_deviation, np.max(fitness), np.min(fitness))


In [None]:
ind_profile = 0

print(dataset["input_parameters"][argmax_fitness,:])

avg_flux = dataset["avg_flux"][argmax_fitness, ind_profile]
real_modes = dataset["real_modes"][argmax_fitness,ind_profile,:]
imag_modes = dataset["imag_modes"][argmax_fitness,ind_profile,:]
rms = dataset["rms"][argmax_fitness, ind_profile]

print('Mean intensity per steradian, {:.2e}W/sr'.format(avg_flux))
print("The LLE quoted rms cumulative over all modes is: ", rms*100.0, "%")

intensity_map_normalized = uhp.modes2imap(real_modes, imag_modes, dataset_params["imap_nside"])
intensity_map_sr = (intensity_map_normalized+1)*avg_flux

hp.mollview(intensity_map_sr, unit=r"$\rm{W/sr}$",flip="geo")
hp.graticule()

In [None]:
complex_modes = np_complex(real_modes, imag_modes)
power_spectrum = uhp.alms2power_spectrum(complex_modes, dataset_params["LMAX"])
print("The rms is: ", np.sqrt(np.sum(power_spectrum))*100.0, "%")

LMAX = dataset_params["LMAX"]
fig = plt.figure()
ax = plt.axes()
plt.plot(np.arange(LMAX), np.sqrt(power_spectrum) * 100.0)
ax.set_xticks(range(0, LMAX+1, int(LMAX/5)))
plt.xlim([0, LMAX])
plt.title("Modes")
plt.xlabel("l mode")
plt.ylabel(r"amplitude ($\%$)");

## Compare Optimization Techniques

In [None]:
dirs_for_comparison = ["Data",
                       "Data2"]
optimization_label = ["Data label",
                      "Data2 label"]
colours = mcolors.TABLEAU_COLORS
colour_keys = list(colours.keys())

ind_cutoff = 100000

In [None]:
fig1 = plt.figure(figsize=(6, 6), dpi=80)
ax1 = plt.axes()

fig2 = plt.figure(figsize=(6, 6), dpi=80)
ax2 = plt.axes()

data_set = 0
for run_dir in dirs_for_comparison:
    sys_params = tdg.define_system_params(run_dir)
    
    dataset = nrw.read_general_netcdf(sys_params["root_dir"] + "/" + sys_params["trainingdata_filename"])
    fitness = uopt.fitness_function(dataset, opt_params)

    window_size = 100
  
    i = 0
    # Initialize an empty list to store moving averages
    moving_min_rms = []
    moving_max_fitness = []
  
    # Loop through the array to consider every window of size 3
    while i < len(fitness) - window_size + 1:
        # taken from https://www.geeksforgeeks.org/how-to-calculate-moving-averages-in-python/

        window_min = np.min(dataset["rms"][i:i+window_size])
        moving_min_rms.append(window_min)

        window_max = np.max(fitness[i:i+window_size])
        moving_max_fitness.append(window_max)

        # Shift window to right by one position
        i += 1
    moving_min = np.array(moving_min_rms)
    moving_max = np.array(moving_max_fitness)
    
    num_examples = len(moving_min)
    array_examples = np.linspace(1,num_examples, num=num_examples).astype(int)

    ax1.semilogy(array_examples[:ind_cutoff], moving_min[:ind_cutoff] * 100.0, linestyle="solid", color=colours[colour_keys[data_set]], label=optimization_label[data_set])
    ax2.plot(array_examples[:ind_cutoff], moving_max[:ind_cutoff], linestyle="solid", color=colours[colour_keys[data_set]], label=optimization_label[data_set]),
    data_set += 1
ax1.legend();
ax1.set_xlabel("Number of simulations")
ax1.set_ylabel(r"Minimum RMS for moving window size: " + str(window_size) + r" ($\%$)");
fig1.savefig("Compare_optimization_techniques" + sys_params["plot_file_type"], dpi=300, bbox_inches='tight')
ax2.legend();
ax2.set_xlabel("Number of simulations")
ax2.set_ylabel(r"Fitness function for moving window size: " + str(window_size));
#fig2.savefig("Compare_optimization_techniques" + sys_params["plot_file_type"], dpi=300, bbox_inches='tight')

## Method 1, Brute force

In [None]:
##

## Method 2, Gradient descent

The partial derivative is determined using a 2*16=32 grid of points (2 points in every dimension) around the current minima. These points can be evaluated in either a NN or Ifriit depending on speed.

## Method 3, Use surrogate NN to pick low RMS from random inputs

## Method 4, Use inverse NN to indentify low rms by inputing other low rms cases

## Method 5, Genetic algorithm

Iterative procedure taking best features of first generation. Mutate and mix inputs between the best and produce subsequent generation.

## Method 6, Bayesian optimization

Gaussian process surrogate and bayesian optimization used with multiple sources of information. First we create a bayesian model with the true data points and select new simulations based on that. The model (Kriging method?) could use a "gaussian process approximation" to reduce computational expense.

Future extension: multi-source bayesian optimization, (ifriit is high quality source and NN is low quality)

## Method 7, Grid search algorithm

Split the entire search space into a grid (start coarse 2 or 3 cells per dimension) 3^16 = 43M. Evaluate each cell depending on the data points within or 8 nearest neighbours.

## Method 8, Network search algorithm

Find a gradient between all data points, use this information to initialize gradient descent

## Method 9, Principle Component Analysis (PCA)

Combine with gradient descent for faster convergence? Enables plotting of dataset in 2D

## Method 10, Transfer Learning 

Generate low quality large dataset (1-50M examples?) using surrogate NN and use this for transfer learning. This might help to evaluate at what stage transfer learning becomes effective (can we use it with a dataset of 1000 or 10000?)