In [None]:
import numpy as np
import utils_deck_generation as idg
import healpy_pointings as hpoint
import netcdf_read_write as nrw
import utils_intensity_map as uim
from nn_plots import figure_location
from scipy.stats import qmc
import os
import subprocess

dataset_params = {}
sys_params = {}

In [None]:
# Number of samples, size of NN training set
dataset_params["num_examples"] = 1
sys_params["num_processes"] = 10
sys_params["num_ex_checkpoint"] = 1000

dataset_params["random_seed"] = 12345
rng = np.random.default_rng(dataset_params["random_seed"])

num_sim_params = 0
# pointings
dataset_params["surface_cover_radians"] = np.radians(45.0)
num_sim_params += 2
# defocus
dataset_params["defocus_range"] = 20.0 # mm
num_sim_params += 1
#power
dataset_params["min_power"] = 0.5 # fraction of full power
num_sim_params += 1
dataset_params["num_sim_params"] = num_sim_params


dataset_params["imap_nside"] = 256
sys_params["run_gen_deck"] = True
sys_params["run_sims"] = True
sys_params["run_compression"] = True
sys_params["run_clean"] = False

dataset_params["run_type"] = "nif" #"test" #"nif"
run_data = idg.import_nif_config()

sys_params["root_dir"] = "Data"
sys_params["sim_dir"] = "run_"
sys_params["trainingdata_filename"] = "training_data_and_labels.nc"

In [None]:
dataset_params["LMAX"] = 30
dataset_params["num_coeff"] = int(((dataset_params["LMAX"] + 2) * (dataset_params["LMAX"] + 1))/2.0)
# Assume symmetry
dataset_params["num_output"] = int(run_data['num_cones']/2) * dataset_params["num_sim_params"]
print(dataset_params["num_coeff"]*2, dataset_params["num_examples"], dataset_params["num_output"])

# Generate and Save Training Data

In [None]:
sampler = qmc.LatinHypercube(d=dataset_params["num_output"], seed=rng)
sample = sampler.random(n=dataset_params["num_examples"])
Y_train = sample.T
print(np.shape(Y_train))

In [None]:
sim_params = idg.create_run_files(Y_train, dataset_params, sys_params, run_data)

X_train = np.zeros((dataset_params["num_coeff"] * 2, dataset_params["num_examples"]))
avg_powers = np.zeros(dataset_params["num_examples"])

min_parallel = 0
max_parallel = -1
run_location = sys_params["root_dir"] + "/" + sys_params["sim_dir"]
chkp_marker = 1.0

if sys_params["run_sims"]:
    num_parallel_runs = int(dataset_params["num_examples"] / sys_params["num_processes"])
    if num_parallel_runs > 0:
        for ir in range(num_parallel_runs):
            min_parallel = ir * sys_params["num_processes"]
            max_parallel = (ir + 1) * sys_params["num_processes"] - 1
            for iex in range(min_parallel, max_parallel+1):
                idg.copy_ifriit_exc(run_location, iex)
            subprocess.check_call(["./bash_parallel_ifriit", run_location, str(min_parallel), str(max_parallel)])
            for iex in range(min_parallel, max_parallel+1):
                X_train1, avg_power1 = nrw.retrieve_xtrain_and_delete(run_location, run_data['Beam'], iex, dataset_params, sys_params)

                X_train[:,iex] = X_train1
                avg_powers[iex] = avg_power1
            if sys_params["run_compression"]:
                if ((max_parallel + 1) >= (chkp_marker * sys_params["num_ex_checkpoint"])):
                    print("Save training data checkpoint at run: " + str(max_parallel))
                    nrw.save_training_data(X_train[:,:max_parallel+1], Y_train[:,:max_parallel+1], avg_powers[:max_parallel+1], filename_trainingdata)
                    chkp_marker +=1

    if max_parallel != (dataset_params["num_examples"] - 1):
        min_parallel = max_parallel + 1
        max_parallel = dataset_params["num_examples"] - 1
        for iex in range(min_parallel, max_parallel+1):
            idg.copy_ifriit_exc(run_location, iex)
        subprocess.check_call(["./bash_parallel_ifriit", run_location, str(min_parallel), str(max_parallel)])
        for iex in range(min_parallel, max_parallel+1):
            X_train1, avg_power1 = nrw.retrieve_xtrain_and_delete(run_location, run_data['Beam'], iex, dataset_params, sys_params)

            X_train[:,iex] = X_train1
            avg_powers[iex] = avg_power1

if sys_params["run_compression"]:
    filename_trainingdata = sys_params["root_dir"] + "/" + sys_params["trainingdata_filename"]
    nrw.save_training_data(X_train, Y_train, avg_powers, filename_trainingdata)
print("\n")

## Diagnostic

In [None]:
import matplotlib.pyplot as plt
import healpy as hp
import utils_intensity_map as uim

In [None]:
iex = 0

run_location = sys_params["root_dir"] + "/" + sys_params["sim_dir"] + str(iex)
theta_slice = slice(0,29,4)
phi_slice   = slice(1,30,4)
power_slice = slice(3,32,4)

cone_theta_offset = dataset_params["sim_params"][theta_slice,iex]
cone_phi_offset = dataset_params["sim_params"][phi_slice,iex]
cone_powers = dataset_params["sim_params"][power_slice,iex]

beams_prev = 0
beams_tot = 0
total_power = 0
for icone in range(run_data['num_cones']):
    beams_per_cone = run_data['beams_per_cone'][icone]
    beams_tot += beams_per_cone
    total_power += cone_powers[icone] * beams_per_cone
    beams_prev += beams_per_cone
mean_power_fraction = total_power / run_data['nbeams']

intensity_map = nrw.read_intensity(run_location, dataset_params["imap_nside"])
intensity_map_rms_spatial = uim.readout_intensity(run_data, intensity_map, mean_power_fraction)

hp.mollview(intensity_map,unit=r"$\rm{W/cm^2}$",flip="geo")
hp.graticule()
port_theta = run_data["Port_centre_theta"]
port_phi = run_data["Port_centre_phi"]
hp.projscatter(port_theta, port_phi)
hp.projscatter(dataset_params["theta_pointings"][:,iex], dataset_params["phi_pointings"][:,iex])
plt.savefig(figure_location+'/intensity_mollweide.png', dpi=300, bbox_inches='tight')

In [None]:
hp.mollview(intensity_map_rms_spatial, unit="Deviation from mean (%)",flip="geo")
plt.savefig(figure_location+'/deviation_from_mean_mollweide.png', dpi=300, bbox_inches='tight')
hp.graticule()

In [None]:
LMAX = dataset_params["LMAX"]
power_spectrum_unweighted, power_spectrum_weighted = uim.power_spectrum(intensity_map, LMAX)
x_max = 20

fig = plt.figure()
ax = plt.axes()
plt.plot(np.arange(LMAX), power_spectrum_unweighted * 100.0)
ax.set_xticks(range(0, LMAX+1, int(LMAX/5)))
plt.xlim([0, x_max])
plt.title("Unweighted Modes LLE")
plt.xlabel("l mode")
plt.ylabel(r"rms amplitude ($\%$)")
plt.savefig(figure_location+"/unweighted_modes.png", dpi=300, bbox_inches='tight')

fig = plt.figure()
ax = plt.axes()
plt.plot(np.arange(LMAX), power_spectrum_weighted * 100.0)
ax.set_xticks(range(0, LMAX+1, int(LMAX/5)))
plt.xlim([0, x_max])
plt.title("Weighted Modes")
plt.xlabel("l mode")
plt.ylabel(r"rms amplitude ($\%$)")
plt.savefig(figure_location+"/weighted_modes.png", dpi=300, bbox_inches='tight')

In [None]:
intensity_map_normalized, avg_power = uim.imap_norm(intensity_map)
hp.mollview(intensity_map_normalized, unit="Deviation from Mean", flip="geo")

X_train = uim.imap2xtrain(intensity_map_normalized, LMAX, avg_power)
intensity_map_normalized2 = uim.xtrain2imap(X_train, LMAX, dataset_params["imap_nside"], avg_power)
hp.mollview(intensity_map_normalized2, unit="Deviation from Mean",flip="geo")

# Combine Datasets

In [None]:
from netCDF4 import Dataset
from os import path
import os
import numpy as np

In [None]:
dataset1 = "Data1"
dataset2 = "Data2"

new_file = "Data3"

filename_trainingdata = dataset1 + "/" + sys_params["trainingdata_filename"] 
training_data = Dataset(filename_trainingdata)
X_1 = training_data.variables["X_train"][:]
Y_1 = training_data.variables["Y_train"][:]
avg_powers_1 = training_data.variables["avg_powers"][:]
training_data.close()

filename_trainingdata = dataset2 + "/" + sys_params["trainingdata_filename"] 
training_data = Dataset(filename_trainingdata)
X_2 = training_data.variables["X_train"][:]
Y_2 = training_data.variables["Y_train"][:]
avg_powers_2 = training_data.variables["avg_powers"][:]
training_data.close()

X_train = np.hstack((X_1, X_2))
Y_train = np.hstack((Y_1, Y_2))
avg_powers = np.hstack((avg_powers_1, avg_powers_2))
print(np.shape(X_train), np.shape(Y_train), np.shape(avg_powers))

filename_trainingdata = new_file + "/" + sys_params["trainingdata_filename"] 
nrw.save_training_data(X_train, Y_train, avg_powers, filename_trainingdata)

# Create data structure for data set and label

## Load Data Set

In [None]:
from netCDF4 import Dataset
from os import path
import os
import healpy as hp
import numpy as np

In [None]:
filename_trainingdata = sys_params["root_dir"] + "/" + sys_params["trainingdata_filename"]
training_data = Dataset(filename_trainingdata)
X_all = training_data.variables["X_train"][:]
Y_all = training_data.variables["Y_train"][:]
avg_powers_all = training_data.variables["avg_powers"][:]
training_data.close()

print(np.shape(X_all), np.shape(Y_all))
num_examples = np.shape(X_all)[1]
input_size = np.shape(X_all)[0]
output_size = np.shape(Y_all)[0]

run_shuffle = False
random_seed = 12345
rng = np.random.default_rng(random_seed)

test_size = int(num_examples / 100)
if test_size == 0:
    test_size = 1

## Shuffle and Seperate Data

In [None]:
if run_shuffle:
    index_shuf = list(range(num_examples))
    rng.shuffle(index_shuf)
    index_shuf = np.array(index_shuf)
    X_train = []
    Y_train = []
    train_avg_powers = []
    X_test = []
    Y_test = []
    test_avg_powers = []
    j=0
    for i in index_shuf:
        if (j < test_size):
            X_test.append(X_all[:,i])
            Y_test.append(Y_all[:,i])
            test_avg_powers.append(avg_powers_all[i])
        else:
            X_train.append(X_all[:,i])
            Y_train.append(Y_all[:,i])
            train_avg_powers.append(avg_powers_all[i])
        j = j + 1
    X_test = np.array(X_test)
    Y_test = np.array(Y_test)
    X_train = np.array(X_train)
    Y_train = np.array(Y_train)
else:
    X_test = X_all.T[:test_size,:]
    Y_test = Y_all.T[:test_size,:]
    test_avg_powers = avg_powers_all[:test_size]

    X_train = X_all.T[test_size:,:]
    Y_train = Y_all.T[test_size:,:]
    train_avg_powers = avg_powers_all[test_size:]

print(np.shape(X_train), np.shape(Y_train), np.shape(X_test), np.shape(Y_test))

## Check Shuffle Maintained labelling

In [None]:
if run_shuffle:
    ind = 0
    ind_first = np.where(index_shuf == ind)
    ind_first = np.squeeze(ind_first[0])

    print(Y_all[:,ind])
    intensity_map_normalized2 = uim.xtrain2imap(X_all[:,ind], dataset_params["LMAX"], dataset_params["imap_nside"], avg_powers_all[ind])
    hp.mollview(intensity_map_normalized2, unit="Deviation from Mean",flip="geo")

    if ind_first >= test_size:
        i = ind_first-test_size
        print(Y_train[i,:])
        cdata = X_train[i,:]
        apwrs = train_avg_powers[i]
    else:
        i = ind_first
        print(Y_test[i,:])
        cdata = X_test[i,:]
        apwrs = test_avg_powers[i]
    intensity_map_normalized2 = uim.xtrain2imap(cdata, dataset_params["LMAX"], dataset_params["imap_nside"], apwrs)
    hp.mollview(intensity_map_normalized2, unit="Deviation from Mean",flip="geo")

## Normalize

In [None]:
mu = np.mean(X_train)
sigma = np.std(X_train)

X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

print(np.std(X_train), np.std(X_test))

# Neural Network

In [None]:
import tf_neural_network as tfnn
import matplotlib.pyplot as plt
import nn_plots as nnp

In [None]:
dir_nn_weights = "neural_network_weights"

lr = 0.001
num_epochs = 100
costs = []
train_acc = []
test_acc = []
epochs = []
hidden_units1 = 25
hidden_units2 = 20

In [None]:
parameters, costs, train_acc, test_acc, epochs = tfnn.model_wrapper(X_train, Y_train, X_test, Y_test, learning_rate = lr, num_epochs = num_epochs, hidden_units1=hidden_units1, hidden_units2=hidden_units2)
filename_nn_weights = dir_nn_weights + "/NN" + str(0)
nrw.save_nn_weights(parameters, filename_nn_weights)

In [None]:
nnp.plotting(epochs, costs, train_acc, test_acc, lr)

## Open NN Weights

In [None]:
parameters = nrw.read_nn_weights(filename_nn_weights)

## Restart Training

In [None]:
start_epoch = num_epochs
num_epochs = 1000

parameters, costs2, train_acc2, test_acc2, epochs2 = tfnn.model_wrapper(X_train, Y_train, X_test, Y_test, learning_rate = lr, num_epochs = num_epochs, start_epoch = start_epoch, nn_weights = parameters)
filename_nn_weights = dir_nn_weights + "/NN" + str(0)
nrw.save_nn_weights(parameters, filename_nn_weights)

costs = np.append(costs, costs2)
train_acc = np.append(train_acc, train_acc2)
test_acc = np.append(test_acc, test_acc2)
epochs = np.append(epochs, epochs2)

In [None]:
nnp.plotting(epochs, costs, train_acc, test_acc, lr)

## Apply NN

In [None]:
ie = 0

x_test = np.reshape(X_test[ie,:], (-1,input_size))
y_true = np.squeeze(Y_test[ie,:])

y_pred = tfnn.apply_network(x_test, parameters)
y_pred = np.squeeze(y_pred)
print(y_pred)
print(y_true)
print((np.abs(y_true - y_pred)) / y_true * 100)
print(np.mean((np.abs(y_true - y_pred)) / y_true * 100))

## Training Multiple Models

In [None]:
num_nn = 3
num_dumps = int(num_epochs / 10) + 1

costs = np.zeros((num_dumps, num_nn))
train_acc = np.zeros((num_dumps, num_nn))
test_acc = np.zeros((num_dumps, num_nn))
epochs = np.zeros((num_dumps, num_nn))
for inn in range(num_nn):
    parameters, costs1, train_acc1, test_acc1, epochs1 = tfnn.model_wrapper(X_train, Y_train, X_test, Y_test, learning_rate = lr, num_epochs = num_epochs, hidden_units1=hidden_units1, hidden_units2=hidden_units2, initialize_seed=inn, print_cost = False)
    filename_nn_weights = dir_nn_weights + "/NN" + str(inn)
    nrw.save_nn_weights(parameters, filename_nn_weights)
    costs[:,inn] = costs1
    train_acc[:,inn] = train_acc1
    test_acc[:,inn] = test_acc1
    epochs[:,inn] = epochs1
    print("Trained neural network index/seed: ", inn)
nnp.plotting(epochs, costs, train_acc, test_acc, lr)