In [None]:
# !pip install keras-tuner

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import kerastuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from kerastuner.tuners import Hyperband, BayesianOptimization
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np



  import kerastuner as kt


In [3]:
# df = pd.read_csv('Resources/funding_processed.csv')
df = pd.read_csv('funding_processed.csv')

df

Unnamed: 0,STATUS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,APPLICATION_TYPE_T8,...,ORGANIZATION_Trust,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,5000,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,6692,1,0,1,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,0
3,1,142590,1,0,1,0,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0
4,1,5000,1,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34293,1,5000,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34294,1,5000,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34295,1,5000,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34296,1,5000,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
def split_data(data, X, y, seed=5):
    """splits feature and target data into test and train sets and scales feature data

    Args:
        model (class): model class name
        X (array): array containing feature column values
        y (1D array): array containing target variable
        seed (int): random seed value; default=5
    """
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
    scaler = StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled

def make_model(hp):

    model = Sequential()

    
    hp_activation = hp.Choice('activation', ['relu'])
    
    
    model.add(Dense(units = hp.Int('first_dense', min_value=2, max_value=27),
                    activation = hp_activation,
                    input_dim=36
    ))
    
    for i in range(hp.Int('n_hidden', 1, 2)):
        model.add(Dense(units=hp.Int('dense_' + str(i), min_value=1, max_value=9),
                        activation=hp_activation
    ))
        
        model.add(Dropout(hp.Choice('dropout_' + str(i), [0.0, 0.01, 0.1])
    ))

    
    model.add(Dense(units=1, activation='sigmoid'))
    
    hp_learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2)

    
    model.compile(loss='binary_crossentropy',
                  optimizer=Adam(learning_rate = hp_learning_rate),
                  metrics=['accuracy'])
    
    return model

In [5]:
#split preprocessed data into features and target arrays
X = df.drop(columns=['IS_SUCCESSFUL'])
#target array
y = df['IS_SUCCESSFUL'].values

In [6]:
X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled = split_data(df,X,y)

In [7]:
X_train_scaled.shape

(25723, 36)

In [8]:
tuner = BayesianOptimization(
    hypermodel = make_model,
    objective='val_accuracy',
    max_trials=30,
    num_initial_points=15,
    seed=5,
    tune_new_entries=True
)

In [10]:
tuner.search(X_train_scaled, y_train, epochs=200, validation_data=(X_test_scaled,y_test))

Trial 30 Complete [00h 08m 23s]
val_accuracy: 0.7269970774650574

Best val_accuracy So Far: 0.7313119769096375
Total elapsed time: 04h 05m 46s


In [11]:
#pull best hyperparameters
best_hp = tuner.get_best_hyperparameters(1)[0]
best_hp.values


{'activation': 'relu',
 'first_dense': 19,
 'n_hidden': 1,
 'dense_0': 5,
 'dropout_0': 0.01,
 'learning_rate': 0.006662753910077288,
 'dense_1': 9,
 'dropout_1': 0.1}

In [12]:
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 0.5571 - accuracy: 0.7313 - 543ms/epoch - 2ms/step
Loss: 0.5570906400680542, Accuracy: 0.7313119769096375


In [14]:
best_model.save('hp_model')
best_model.save('hp_model/hp_model.h5')


In [15]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
