In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import sklearn as skl
import seaborn as sns
import config
from sqlalchemy import create_engine
import math
import matplotlib.pyplot as plt
from tensorflow.keras import Model
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import MeanSquaredLogarithmicError



In [2]:
#set up SQL connection

protocol = 'postgresql'
username = config.username
password = config.password
host = 'localhost'
port = 5432
database_name = config.database_name
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)
con = engine.connect()

In [3]:
#read in cleaned X table from SQL

X = pd.read_sql('Select * from "X"',con)
X.head()

Unnamed: 0,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,netProfitMargin,pretaxProfitMargin,grossProfitMargin,operatingProfitMargin,returnOnAssets,returnOnCapitalEmployed,...,Rating Agency Name_Standard & Poor's Ratings Services,Sector_Basic Industries,Sector_Capital Goods,Sector_Consumer Non-Durables,Sector_Consumer Services,Sector_Energy,Sector_Health Care,Sector_Other,Sector_Public Utilities,Sector_Technology
0,0.945894,0.426395,0.09969,44.203245,0.03748,0.049351,0.176631,0.06151,0.041189,0.091514,...,0,0,0,0,0,0,0,1,0,0
1,1.033559,0.498234,0.20312,38.991156,0.044062,0.048857,0.175715,0.066546,0.053204,0.1048,...,0,0,0,0,0,0,0,1,0,0
2,0.963703,0.451505,0.122099,50.841385,0.032709,0.044334,0.170843,0.059783,0.032497,0.075955,...,0,0,0,0,0,0,0,1,0,0
3,1.019851,0.510402,0.176116,41.161738,0.020894,-0.012858,0.138059,0.04243,0.02569,-0.027015,...,0,0,0,0,0,0,0,1,0,0
4,0.957844,0.495432,0.141608,47.761126,0.042861,0.05377,0.17772,0.065354,0.046363,0.096945,...,1,0,0,0,0,0,0,1,0,0


In [4]:
#Pull the numerically encoded categories as a target variable
cleaned_y_df = pd.read_sql("Select * from y",con)
cleaned_y_df.head()

Unnamed: 0,Rating
0,2
1,3
2,3
3,3
4,3


In [5]:
#One-hot encode the target variable
y = pd.get_dummies(cleaned_y_df, columns = ['Rating'])
y.head()

Unnamed: 0,Rating_0,Rating_1,Rating_2,Rating_3,Rating_4,Rating_5,Rating_6,Rating_7,Rating_8,Rating_9
0,0,0,1,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,0,0


In [6]:
#Set up train/test data
X_train, X_test, y_train, y_test = train_test_split(X.values,y.values,random_state = 48)
scaler=StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [7]:
X_train.shape

(1521, 39)

In [8]:
#Building Tuner
import kerastuner as kt
msle = MeanSquaredLogarithmicError()


def build_model(hp):
  model = tf.keras.Sequential()
  
  # Tune the number of units in the first Dense layer
  # Choose an optimal value between 32-512
  hp_units1 = hp.Int('units1', min_value=32, max_value=512, step=32)
  hp_units2 = hp.Int('units2', min_value=32, max_value=512, step=32)
  hp_units3 = hp.Int('units3', min_value=32, max_value=512, step=32)
  #hp_units4 = hp.Int('units4', min_value=32, max_value=512, step=32)
  model.add(Dense(units=hp_units1, activation='relu'))
  model.add(tf.keras.layers.Dense(units=hp_units2, activation='relu'))
  model.add(tf.keras.layers.Dense(units=hp_units3, activation='sigmoid'))
  #model.add(tf.keras.layers.Dense(units=hp_units4, activation='relu'))
  model.add(Dense(10, kernel_initializer='normal', activation='softmax'))

  # Tune the learning rate for the optimizer
  # Choose an optimal value from 0.01, 0.001, or 0.0001
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

  model.compile(
      optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
      loss=msle,
      metrics=[msle]
  )

  return model

# HyperBand algorithm from keras tuner
tuner = kt.Hyperband(
    build_model,
    objective='val_mean_squared_logarithmic_error',
    max_epochs=10,
    directory='keras_tuner_dir',
    project_name='keras_tuner_demo'
)

tuner.search(X_train_scaled, y_train, epochs=10, validation_split=0.2)

Trial 30 Complete [00h 00m 01s]
val_mean_squared_logarithmic_error: 0.03421960771083832

Best val_mean_squared_logarithmic_error So Far: 0.033935945481061935
Total elapsed time: 00h 00m 36s
INFO:tensorflow:Oracle triggered exit


In [13]:
for h_param in [f"units{i}" for i in range(1,4)] + ['learning_rate']:
  print(h_param, tuner.get_best_hyperparameters()[0].get(h_param))

units1 288
units2 224
units3 352
learning_rate 0.001




In [19]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_layer = len(X_train_scaled[0])
hidden_nodes_L1= 288
hidden_nodes_L2 = 224
hidden_nodes_L3 = 352


nn2 = tf.keras.models.Sequential()

# First hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_L1, activation="relu", input_dim=input_layer))

# Second hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_L2, activation="relu"))

# Third hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_L3, activation="sigmoid"))

# Output layer
nn2.add(tf.keras.layers.Dense(units=10, activation="softmax"))

# Check the structure of the model
nn2.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 288)               11520     
                                                                 
 dense_9 (Dense)             (None, 224)               64736     
                                                                 
 dense_10 (Dense)            (None, 352)               79200     
                                                                 
 dense_11 (Dense)            (None, 10)                3530      
                                                                 
Total params: 158,986
Trainable params: 158,986
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Compile the model
nn2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [25]:
# Train the model
fit_model = nn2.fit(X_train_scaled, y_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [26]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

16/16 - 0s - loss: 0.4565 - accuracy: 0.5276 - 23ms/epoch - 1ms/step
Loss: 0.4564847946166992, Accuracy: 0.5275590419769287
