In [64]:
# Preprocessing - Scale data
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from kerastuner.tuners import BayesianOptimization

# Retrieve Training and Testing Data
df_train = pd.read_csv('data/trainClean.csv')
df_test = pd.read_csv('data/testClean.csv')

X_train = df_train.drop(columns='Transported')
y_train = df_train['Transported']

# Preprocessing - Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Define hypermodel for hyperparameter tuning
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('input_units', min_value=50, max_value=200, step=10), activation='relu', input_shape=(X_train.shape[1],)))
    
    # Add variable number of hidden layers and their sizes
    for i in range(hp.Int('num_hidden_layers', 1, 5)):
        model.add(Dense(units=hp.Int(f'layer_{i}_units', min_value=50, max_value=200, step=10), activation='relu'))
        model.add(tf.keras.layers.Dropout(hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(Dense(units=1, activation='sigmoid'))
    
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-4, 1e-3, 1e-2])),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Instantiate the BayesianOptimization tuner
tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    directory='keras_tuner_logs',
    project_name='my_hyperparameter_search'
)

# Perform the hyperparameter search
tuner.search(X_train_scaled, y_train, epochs=20, validation_split=0.2)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters()[0]

# Build the model using the best hyperparameters
best_model = build_model(best_hps)

# Train the best model using all training data
best_model.fit(X_train_scaled, y_train, epochs=20)

# Prepare test data for predictions
X_test_scaled = scaler.transform(df_test)

# Predict using the best model on test data
predictions = best_model.predict(X_test_scaled)
pred = (predictions > 0.5).astype(int)

tfMap = {0: False, 1: True}

kaggleCSV = pd.DataFrame(columns=['PassengerId', 'Transported'])
kaggleCSV['PassengerId'] = df_test['PassengerId']
kaggleCSV['Transported'] = [tfMap[i[0]] for i in pred]


kaggleCSV.to_csv('results/nn_results.csv', index=False)





Reloading Tuner from keras_tuner_logs\my_hyperparameter_search\tuner0.json
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
