# 0. Introduction

This notebook will utilize neural networks to predict the deal price of startups

The results can than be compared to the machine learning models from the modelling notebook

In [None]:
pip install keras-tuner --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-tuner
  Downloading keras_tuner-1.1.2-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 15.2 MB/s 
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.1.2 kt-legacy-1.0.4


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from google.colab import drive

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
import keras_tuner as kt

In [None]:
keras.__version__

'2.8.0'

In [None]:
tf.__version__

'2.8.2'

In [None]:
kt.__version__

'1.1.2'

**mount to Google Drive!!**

In [None]:
df = pd.read_csv(r"/content/drive/MyDrive/Colab Notebooks/ML in M&A/modelling_data.csv")
df.drop("Unnamed: 0", axis=1, inplace=True)

# 1. Preproces

In [None]:
# select all the row other than deal price as independent variables
X = df.iloc[:, 2:]
# select log deal price as target
y = df.iloc[:, 1]

In [None]:
# create a train test split
X_train_dev, X_test, y_train_dev, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# create a development set
X_train, X_dev, y_train, y_dev = train_test_split(X_train_dev, y_train_dev, test_size=0.2, random_state=42)

In [None]:
# transform the data to be zero-mean and unit-variance normalized
ss = StandardScaler()
ss.fit(X_train)
X_train_trans = ss.transform(X_train)
X_dev_trans = ss.transform(X_dev)
X_test_trans = ss.transform(X_test)

# 2. Model

In [None]:
# define a function that builds a NN with specified layers, neurons, regularization, learning rate and optimizer

def build_model(hp):
  model = keras.models.Sequential()
  model.add(keras.layers.InputLayer(input_shape=(38,)))
  
  L2 = hp.Float('regularization', min_value=0.00001, max_value=10, sampling='log')

  # hidden layers
  for layer in range(hp.Int('num_layers', 1, 3)):
    model.add(
        keras.layers.Dense(
        units=hp.Int('units', min_value=10, max_value=50, step=1),
        activation="relu",
        kernel_regularizer=regularizers.l2(L2)
        )
    )
  
  # output layer
  model.add(keras.layers.Dense(units=1, activation='relu'))
  
  # optimizer
  learning_rate = hp.Float('lr', min_value=1e-4, max_value=1e-1, sampling='log')
  hp_optimizer = hp.Choice('optimizer', values=['sgd', 'rmsprop', 'adam'])
  if hp_optimizer == 'sgd':
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
  elif hp_optimizer == 'rmsprop':
    optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
  elif hp_optimizer == 'adam':
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
  
  model.compile(
      optimizer=optimizer,
      loss='mae',
      metrics=['mae']
  )
  return model

In [None]:
build_model(kt.HyperParameters())

<keras.engine.sequential.Sequential at 0x7f11f1b97f90>

In [None]:
# tune the model using bayesian optimization
tuner = kt.BayesianOptimization(
    hypermodel=build_model,
    objective='mae',
    max_trials=50,
    overwrite=True,
    directory='/content/drive/MyDrive/Colab Notebooks/ML in M&A',
    project_name='NN_keras_tuner'
)

In [None]:
# search the best model configurationa
tuner.search(X_train, y_train, epochs=600, validation_data=(X_dev_trans, y_dev),
             callbacks=[keras.callbacks.EarlyStopping(patience=10),
                         keras.callbacks.ModelCheckpoint("/content/drive/MyDrive/Colab Notebooks/ML in M&A/NN_keras_tuned.h5", monitor='val_loss', save_best_only=True),
                        tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])

Trial 50 Complete [00h 00m 34s]
mae: 376688448.0

Best mae So Far: 372363680.0
Total elapsed time: 00h 40m 42s
INFO:tensorflow:Oracle triggered exit


In [None]:
tuner.results_summary()

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
model = tuner.hypermodel.build(best_hp)

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 37)                1443      
                                                                 
 dense_5 (Dense)             (None, 1)                 38        
                                                                 
Total params: 1,481
Trainable params: 1,481
Non-trainable params: 0
_________________________________________________________________


# 3. Model results

The structure of the optimal model can be found below

In [None]:
best_hp.get('num_layers')

1

In [None]:
best_hp.get('units')

37

In [None]:
best_hp.get('regularization')

1e-05

In [None]:
best_hp.get('optimizer')

'rmsprop'

In [None]:
best_hp.get('lr')

0.1