# M.Lytova, M.Spanner, I.Tamblyn. *Deep learning and high harmonic generation* (2020)
## Codes for Section IV.D : *Distinguishing diatomic and triatomic molecules by the dipole moment*

##Headers and constants

In [None]:
from google.colab import files
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
from tensorflow.keras import initializers
from keras import objectives
from keras.losses import binary_crossentropy, sparse_categorical_crossentropy
from keras.callbacks import TensorBoard
from keras import backend as K
import argparse
import matplotlib.pyplot as plt
import time

In [None]:
PI = 3.14159265359

t_n_points = 4096   # number of nodes in time
t_n = np.linspace(0, 800, t_n_points)/41.341   # grid in time, Tmax = 800 a.u. = 19.35 fs

n_train = 5000  # training set size - 1000 for each species
n_test = 500    # testing set size - 100 for each species

##Loading a training set

In [None]:
y_train = np.zeros((n_train, t_n_points))
label_train = np.empty(n_train, dtype = 'i')
path2load_di0 = f"/hhg_reduced/hhg"     
path2load_diA0 = f"/hhg_di_red/hhg"
path2load_tri0 = f"/hhg_tri_red/hhg"
path2load_tri10 = f"/hhg_tri_isos_red/hhg"
path2load_tri20 = f"/hhg_tri_vers_red/hhg"

tic = time.perf_counter()

for i in range(int(n_train/5)): 
    label_train[i] = 0                              # same labels as in the paper
    label_train[int(n_train/5)+i] = 1
    label_train[int(2*n_train/5)+i] = 2
    label_train[int(3*n_train/5)+i] = 3
    label_train[int(4*n_train/5)+i] = 4
    path2load_di = path2load_di0 + str(i+1) + '.dat'
    path2load_diA = path2load_diA0 + str(i+1) + '.dat'
    path2load_tri = path2load_tri0 + str(i+1) + '.dat' 
    path2load_tri1 = path2load_tri10 + str(i+1) + '.dat' 
    path2load_tri2 = path2load_tri20 + str(i+1) + '.dat'    
    load_data_di = np.loadtxt(path2load_di)
    load_data_diA = np.loadtxt(path2load_diA)
    load_data_tri = np.loadtxt(path2load_tri)
    load_data_tri1 = np.loadtxt(path2load_tri1)
    load_data_tri2 = np.loadtxt(path2load_tri2)
    y_train[i] = load_data_di[0:t_n_points] * np.sin(PI*t_n/Tmax) 
    y_train[int(n_train/5)+i] = load_data_diA[0:t_n_points] * np.sin(PI*t_n/Tmax)
    y_train[int(2*n_train/5)+i] = load_data_tri[0:t_n_points] * np.sin(PI*t_n/Tmax)
    y_train[int(3*n_train/5)+i] = load_data_tri1[0:t_n_points] * np.sin(PI*t_n/Tmax)
    y_train[int(4*n_train/5)+i] = load_data_tri2[0:t_n_points] * np.sin(PI*t_n/Tmax)
    if (round(i/100)==i/100):
        print(i)        

toc = time.perf_counter()
print(f"Training set preparation time {toc - tic:0.4f} seconds")   

In [None]:
def plot_train_example(i):
    plt.figure(figsize=(12,4), constrained_layout=False)    
    plt.plot(t_n, y_train[i], color='green')
    plt.xlabel('$t, fs$', fontsize=14)
    plt.ylabel('$y(t)$', fontsize=14)
    plt.xticks(np.arange(0, Tmax, 2.0))
    plt.grid()    
    plt.show() 
    plt.close()

### Drawing of a randomly chosen $d_k(t)$

In [None]:
i_show = np.random.randint(0, n_train-1)
plot_train_example(i_show)

##Loading a testing set

In [None]:
y_test = np.zeros((n_test, t_n_points))
label_test = np.empty(n_test, dtype = 'i')
path2load_di0 = f"/hhg_reduced/hhg"
path2load_diA0 = f"/hhg_di_red/hhg"
path2load_tri0 = f"/hhg_tri_red/hhg"
path2load_tri10 = f"/hhg_tri_isos_red/hhg"
path2load_tri20 = f"/hhg_tri_vers_red/hhg"

for i in range(int(n_test/5)): 
    label_test[i] = 0                     # same labels as in the paper
    label_test[int(n_test/5)+i] = 1
    label_test[int(2*n_test/5)+i] = 2
    label_test[int(3*n_test/5)+i] = 3
    label_test[int(4*n_test/5)+i] = 4
    path2load_di = path2load_di0 + str(i+1+int(n_train/5)) + '.dat'
    path2load_diA = path2load_diA0 + str(i+1+int(n_train/5)) + '.dat'
    path2load_tri = path2load_tri0 + str(i+1+int(n_train/5)) + '.dat'
    path2load_tri1 = path2load_tri10 + str(i+1+int(n_train/5)) + '.dat'
    path2load_tri2 = path2load_tri20 + str(i+1+int(n_train/5)) + '.dat'    
    load_data_di = np.loadtxt(path2load_di)
    load_data_diA = np.loadtxt(path2load_diA)
    load_data_tri = np.loadtxt(path2load_tri)
    load_data_tri1 = np.loadtxt(path2load_tri1)
    load_data_tri2 = np.loadtxt(path2load_tri2)
    y_test[i] = load_data_di[0:t_n_points] * np.sin(PI*t_n/Tmax) 
    y_test[int(n_test/5)+i] = load_data_diA[0:t_n_points] * np.sin(PI*t_n/Tmax)
    y_test[int(2*n_test/5)+i] = load_data_tri[0:t_n_points] * np.sin(PI*t_n/Tmax)
    y_test[int(3*n_test/5)+i] = load_data_tri1[0:t_n_points] * np.sin(PI*t_n/Tmax)
    y_test[int(4*n_test/5)+i] = load_data_tri2[0:t_n_points] * np.sin(PI*t_n/Tmax)
    

##Normalizing before training

In [None]:
y_max = 0.3
y_train_norm = y_train/y_max
y_test_norm = y_test/y_max

## Model

In [None]:
inputs = Input(shape=(t_n_points,))

x = Dense(128, activation='relu')(inputs)  
x = Dense(64, activation='relu')(x)
x = Dense(16, activation='relu')(x)
outputs = Dense(5)(x)

ModelClass = Model(inputs, outputs)
opt = Adam(lr=0.001, amsgrad=True)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
ModelClass.compile(optimizer=opt, loss=loss, metrics=['accuracy']) 

print(ModelClass.summary())

##Training

*   Training set: 5,000
*   Testing set: 500

In [None]:
def plot_losses2():
    fig = plt.subplots(1,2,figsize=(12,5),constrained_layout=False)
    plt.subplot(121)
    plt.plot(np.log10(loss),color='blue')
    plt.ylabel('log(Loss)', fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.grid()
    plt.subplot(122)
    plt.plot(accur,color='red')
    plt.ylabel('Accuracy', fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.ylim(0.4, 1.0)
    plt.grid()
    plt.show()

In [None]:
tic = time.perf_counter()

batch_size = 32

for n in range(1, 5):  # in a loop - to see the results every 500 epochs

      history = ModelClass.fit(y_train_norm, label_train,                         
                               epochs=500,
                               batch_size=batch_size,
                               shuffle=True,
                               verbose=0)
      
      path = f"/model_classify/model_1" 
      ModelClass.save(path) 

      loss_save = history.history['loss']
      accur_save = history.history['accuracy']

      if n > 0:
            loss = np.concatenate((loss, loss_save), axis = 0)
            accur = np.concatenate((accur, accur_save), axis = 0)
      else:
            loss = loss_save
            accur = accur_save

      print("loss: ", loss[-1])
      print("accuracy: ", accur[-1])
      plot_losses2()


toc = time.perf_counter()
print(f"Execution time {toc - tic:0.4f} seconds")
    

##Making a prediction

In [None]:
probability_model = tf.keras.Sequential([ModelClass, tf.keras.layers.Softmax()])
prediction = probability_model.predict(y_test_norm)

###Print a random example

In [None]:
i_show = np.random.randint(0, n_test-1)
print(i_show)
print(prediction[i_show])

##Function that plots the results

In [None]:
def plot_histograms():
  di_sym = np.zeros(5)
  di_asym = np.zeros(5)
  tri_sym = np.zeros(5)
  tri1_asym = np.zeros(5)
  tri2_asym = np.zeros(5)

  for i in range(n_test):
      true_label = label_test[i]
      if true_label == 0:
          di_sym += prediction[i]
      elif true_label == 1:
          di_asym += prediction[i]  
      elif true_label == 2:
          tri_sym += prediction[i]
      elif true_label == 3:
          tri1_asym += prediction[i]     
      else :   
          tri2_asym += prediction[i]

  print(di_sym)          
  print(di_asym)
  print(tri_sym)
  print(tri1_asym)
  print(tri2_asym)


  di_sym = di_sym/n_test*5.0
  di_asym = di_asym/n_test*5.0
  tri_sym = tri_sym/n_test*5.0
  tri1_asym = tri1_asym/n_test*5.0
  tri2_asym = tri2_asym/n_test*5.0

  fig = plt.subplots(1,5,figsize=(20,4),constrained_layout=False)
  plt.subplot(151)
  plt.title("True label: 0", fontsize=15)
  plt.bar(range(5), di_sym, color="blue", width=0.5)
  plt.xticks(range(5), fontsize=12)
  plt.yticks(np.arange(0, 1.2, 0.2), fontsize=12)
  plt.ylim([0, 1.2])
  plt.ylabel("Propability", fontsize=15)
  plt.subplot(152)
  plt.title("True label: 1", fontsize=15)
  plt.bar(range(5), di_asym, color="red", width=0.5)
  plt.xticks(range(5), fontsize=12)
  plt.yticks(np.arange(0, 1.2, 0.2), fontsize=12)
  plt.ylim([0, 1.2])
  plt.subplot(153)
  plt.title("True label: 2", fontsize=15)
  plt.bar(range(5), tri_sym, color="green", width=0.5)
  plt.xticks(range(5), fontsize=12)
  plt.yticks(np.arange(0, 1.2, 0.2), fontsize=12)
  plt.ylim([0, 1.2])
  plt.subplot(154)
  plt.title("True label: 3", fontsize=15)
  plt.bar(range(5), tri1_asym, color="magenta", width=0.5)
  plt.xticks(range(5), fontsize=12)
  plt.yticks(np.arange(0, 1.2, 0.2), fontsize=12)
  plt.ylim([0, 1.2])
  plt.subplot(155)
  plt.title("True label: 4", fontsize=15)
  plt.bar(range(5), tri2_asym, color="cyan", width=0.5)
  plt.xticks(range(5), fontsize=12)
  plt.yticks(np.arange(0, 1.2, 0.2), fontsize=12)
  plt.ylim([0, 1.2])
  plt.show()

##Predicted and True

In [None]:
plot_histograms() 