In [1]:
# Import

#! /usr/bin/env python

'''
Trains 7D QuaLiKiz-NN with a single output (efiTG)
'''

from __future__ import print_function

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop, adam, Adam
from keras.initializers import TruncatedNormal
from keras import regularizers
from keras import backend as K
import pandas
import numpy
import sys
import os

import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

from copy import deepcopy
from keras.models import load_model

Using TensorFlow backend.


In [2]:
# Define new Metric: rmse = Root Mean Square Error
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square( y_true-y_pred )))

# Gets the current file name. Useful for procedurally generating output/log files.
file_name =  os.path.basename(sys.argv[0][:-3])

# Define neural network parameters
batch_size = 10
#num_classes = 1
epochs = 100

# Load Data (which is in HDF5 or .h5 format)
store = pandas.HDFStore("../unstable_training_gen2_7D_nions0_flat_filter7.h5")
target_df = store['efeETG_GB'].to_frame()  # This one is relatively easy to train
input_df = store['input']

In [3]:
# Puts inputs and outputs in the same pandas dataframe.
# Also only keeps overlapping entries.
joined_dataFrame = target_df.join(input_df)

# Make a copy of joined_dataFrame for late use
joined_dataFrame_original = deepcopy(joined_dataFrame)

# Normalize data by standard deviation and mean-centering the data
joined_dataFrame['efeETG_GB'] = joined_dataFrame['efeETG_GB'] / joined_dataFrame['efeETG_GB'].std()
joined_dataFrame['efeETG_GB'] = (joined_dataFrame['efeETG_GB'] - joined_dataFrame['efeETG_GB'].mean())/joined_dataFrame['efeETG_GB'].std()

joined_dataFrame['Ati'] = joined_dataFrame['Ati'] / joined_dataFrame['Ati'].std()
joined_dataFrame['Ati'] = (joined_dataFrame['Ati'] - joined_dataFrame['Ati'].mean())/joined_dataFrame['Ati'].std()

joined_dataFrame['Ate'] = joined_dataFrame['Ate'] / joined_dataFrame['Ate'].std()
joined_dataFrame['Ate'] = (joined_dataFrame['Ate'] - joined_dataFrame['Ate'].mean())/joined_dataFrame['Ate'].std()

joined_dataFrame['An'] = joined_dataFrame['An'] / joined_dataFrame['An'].std()
joined_dataFrame['An'] = (joined_dataFrame['An'] - joined_dataFrame['An'].mean())/joined_dataFrame['An'].std()

joined_dataFrame['qx'] = joined_dataFrame['qx'] / joined_dataFrame['qx'].std()
joined_dataFrame['qx'] = (joined_dataFrame['qx'] - joined_dataFrame['qx'].mean())/joined_dataFrame['qx'].std()

joined_dataFrame['smag'] = joined_dataFrame['smag'] / joined_dataFrame['smag'].std()
joined_dataFrame['smag'] = (joined_dataFrame['smag'] - joined_dataFrame['smag'].mean())/joined_dataFrame['smag'].std()

joined_dataFrame['x'] = joined_dataFrame['x'] / joined_dataFrame['x'].std()
joined_dataFrame['x'] = (joined_dataFrame['x'] - joined_dataFrame['x'].mean())/joined_dataFrame['x'].std()

joined_dataFrame['Ti_Te'] = joined_dataFrame['Ti_Te'] / joined_dataFrame['Ti_Te'].std()
joined_dataFrame['Ti_Te'] = (joined_dataFrame['Ti_Te'] - joined_dataFrame['Ti_Te'].mean())/joined_dataFrame['Ti_Te'].std()



# Shuffles dataset
shuffled_joined_dataFrame = joined_dataFrame.reindex(numpy.random.permutation(
                                                joined_dataFrame.index))

# Creates a pandas dataframe for the outputs
shuffled_clean_output_df = shuffled_joined_dataFrame['efeETG_GB']

# Creates a pandas dataframe for the inputs
shuffled_clean_input_df = shuffled_joined_dataFrame.drop('efeETG_GB', axis=1)

# Creates training dataset (90% of total data) for outputs
y_train = shuffled_clean_output_df.iloc[:int(
    numpy.round(len(shuffled_clean_output_df)*0.9))]

# Creates training dataset (90% of total data) for inputs
x_train = shuffled_clean_input_df.iloc[:int(
    numpy.round(len(shuffled_clean_input_df)*0.9))]

# Creates testing dataset (10% of total data) for outputs
y_test = shuffled_clean_output_df.iloc[int(
    numpy.round(len(shuffled_clean_output_df)*0.9)):]

# Creates testing dataset (10% of total data) for inputs
x_test = shuffled_clean_input_df.iloc[int(
    numpy.round(len(shuffled_clean_input_df)*0.9)):]

# Deletes pandas dataframes that are no longer needed
del target_df, input_df

# Closes the HDFStore. This is good practice.
store.close()

In [4]:
joined_dataFrame.describe(include='all')

Unnamed: 0,efeETG_GB,Ati,Ate,An,qx,smag,x,Ti_Te
count,638880.0,638880.0,638880.0,638880.0,638880.0,638880.0,638880.0,638880.0
mean,1.86562e-14,-6.174258e-12,1.61655e-12,-2.162388e-12,1.051146e-12,-4.408851e-13,1.82764e-12,-3.604733e-12
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-1.136272,-1.848058,-2.151526,-4.17772,-0.873515,-1.264894,-1.375825,-1.666559
25%,-0.8023415,-0.6589535,-0.6642361,-0.6349019,-0.6749461,-0.3146967,-0.956749,-0.9260135
50%,-0.301849,-0.1493374,-0.1684729,-0.04443218,-0.4385544,-0.05555202,-0.118598,-0.06698017
75%,0.5578201,0.6150866,0.4925446,0.5460375,0.1524246,0.4627374,0.719553,0.42178
max,3.965472,2.908359,1.81458,2.317447,2.516341,3.918,1.767242,1.665897
