# Sequential neural network for prediction of continuous values

Import Libraries

In [40]:
# Importing the libraries
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam

Helpful Functions

In [77]:
#function that retrieves the input variable columns from the CSV and returns a pandas dataframe containing the columns
def retrieve_X_df(df, verbose=True):
  """
  :param df: Required, the dataframe containing the input and output variables. The first 900 values for each observation are assumed the input variables.
  :param verbose: Optional, default==True, prints the shape of the dataframe after retrieving the input columns.

  :return: dataframe containing the input data. 
  """
  df = df.iloc[:,0:900]
  if verbose:
    print("df shape: ", df.shape)
  return df

#function that retrieves the target columns from the CSV and returns a pandas dataframe containing the columns
def retrieve_y_df(df, n_last_cols = 3, verbose=True):
  """
  :param df: Required, the dataframe containing the input and target variables. 
  :param n_last_cols: Optional, default==3, The number of column values for each observation that are the target variables.
  :param verbose: Optional, default==True, prints the shape of the dataframe after retrieving the target columns.

  :return: dataframe containing the target data. 
  """
  df = df.iloc[:,-n_last_cols:]
  df.columns = ['amplitude','X','Y']
  if verbose:
    print("df shape: ", df.shape)
  return df

#function that converts the input dataframe into 2-D matrix
def turn_rows_into_np_matrix(df, width=30, height=30,verbose=True):
  """
  :param df: Required, the dataframe containing the input variables. 
  :param width: Optional, default==30, The width to use when transforming a 1-D representation of data to 2-D representation.
  :param height: Optional, default==30, The height to use when transforming a 1-D representation of data to 2-D representation.
  :param verbose: Optional, default==True, opperate in verbose mode.

  :return: numpy ndarray shape(n_observations, width, height)
  """
  # save the dataframe as a numpy arrray
  np_img_matrices = df.to_numpy()
  if (verbose):
    print("len numpy array: ", len(np_img_matrices))

  # for each row, turn into a width x height matrix using the shape method
  # set the shape
  shape = (len(np_img_matrices),width,height)
  # re shape the array
  np_img_matrices = np_img_matrices.reshape(shape)

  if verbose:
    # print the first "image" matrix
    print(np_img_matrices[0])

  return np_img_matrices

Read csv-files and retrieve the train and test datasets. Split train dataset into train and validation datasets with ratio 90%

In [78]:
source = "https://userweb.jlab.org/~tbritton/Hackathon2021_DataSets/Problem1/"

#Read the train csv-file
df_train = pd.read_csv(os.path.join(source,"train.csv"), na_values=['NA', '?'], header=None)

#Set up a splitting ratio
train_pct = 0.9
train_cut = int(len(df_train) * train_pct)

#Retrieve the train X and train y datasets
df_tr = df_train[0:train_cut]
df_train_X = retrieve_X_df(df_tr)
df_train_y = retrieve_y_df(df_tr).to_numpy()

#Retrieve the validation X and train y datasets
df_val = df_train[train_cut:]
df_val_X = retrieve_X_df(df_val)
df_val_y = retrieve_y_df(df_val).to_numpy()

#Read the test csv-file
df_test = pd.read_csv(os.path.join(source,"test.csv"), na_values=['NA', '?'], header=None)

#Retrieve the test X and test y datasets
df_test_X = retrieve_X_df(df_test)
df_test_y = retrieve_y_df(df_test).to_numpy()

df_train_y

df shape:  (9000, 900)
df shape:  (9000, 3)
df shape:  (1000, 900)
df shape:  (1000, 3)
df shape:  (2000, 900)
df shape:  (2000, 3)


array([[   4.2439,  702.5571, -118.9555],
       [   2.7615,  -67.0084,  747.8185],
       [   3.162 , -831.5248,  691.4152],
       ...,
       [   3.9983,  724.0626,  239.1365],
       [   1.9245,  233.1289, -277.3137],
       [   1.8139, -425.4246, -232.5975]])

Turn the rows into matrices

In [26]:
train_X = turn_rows_into_np_matrix(df_train_X, 30,30, False)
print(train_X.shape)
val_X = turn_rows_into_np_matrix(df_val_X, 30,30, False)
print(val_X.shape)

(9000, 30, 30)
(1000, 30, 30)


Build the model

In [37]:
# sequential neural network with selected parameters
def create_sequential_dnn(width, num_nodes = (16, 16, 16), activations = ('relu','relu','relu'), outputs = 3):

  model = Sequential()

  if len(num_nodes) != len(activations):
    print('The length of the number of nodes array and activations array must be the same.')
    sys.exit()

  # loop over the number of nodes
  for (i, n) in enumerate(num_nodes):
    if i == 0:
      model.add(Dense(n, activation = activations[i], input_dim = width))
    else:
      model.add(Dense(n, activation = activations[i]))

  # output
  model.add(Dense(outputs, activation = "linear"))
  
  # return the model
  return model

Set up the target names and create the model

In [38]:
target_names = ['amplitude','X','Y']
model = create_sequential_dnn(900, outputs = len(target_names))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Compile the model

In [73]:
# Adam optimizer
opt = Adam(learning_rate = 1e-3)
model.compile(loss = "mse", optimizer = opt)

Fit the model with train dataset and validate it with validation dataset

In [54]:
history = model.fit(x = df_train_X, y = df_train_y[:], 
        validation_data = (df_test_X, df_test_y[:]),
        epochs = 50, batch_size = 64)

Train on 9000 samples, validate on 2000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Predict the outputs for test dataset and evaluate the model

In [55]:
preds = model.predict(df_test_X)

In [76]:
model.evaluate(x=df_test_X, y=df_test_y)



220.25632720947266

Predict the outputs for judge dataset 

In [64]:
#Read judge csv 
df_judge = pd.read_csv(os.path.join(source,"judge.csv"), na_values=['NA', '?'], header=None)

#Get inputs
df_judge_X = retrieve_X_df(df_judge)

#Predict the outputs 
preds_judge = model.predict(df_judge_X)

#Put the predictions into the dataframe
output_judge = pd.DataFrame({'amplitude':preds_judge[:,0].flatten(),
                      'X':preds_judge[:,1].flatten(),'Y':preds_judge[:,2].flatten()})
output_judge

df shape:  (2000, 900)


Unnamed: 0,amplitude,X,Y
0,4.020967,290.168945,-77.878807
1,3.368213,172.946609,-844.473511
2,3.151659,-24.089024,32.475533
3,3.134604,550.524414,-175.967468
4,4.053262,-281.069336,-353.006653
...,...,...,...
1995,1.519884,-903.620300,368.979401
1996,3.874111,305.609619,-245.970673
1997,2.829836,285.808899,-33.228050
1998,3.899166,-449.398224,147.527237


In [None]:
#Create the csv-file with outputs

In [65]:
path = "C:/Users/matsiuk/Documents/Python Scripts/DATA/Problem1"
output_judge.to_csv(os.path.join(path,"output_judge_Pr1_seq_model_with_3_outputs.csv"),index=False,header=False)