In [1]:
# Preprocessing - Scale data
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from kerastuner.tuners import BayesianOptimization

# Retrieve Training and Testing Data
df_train = pd.read_csv('data/trainClean.csv')
df_test = pd.read_csv('data/testClean.csv')

X_train = df_train.drop(columns='Transported')
y_train = df_train['Transported']

# Preprocessing - Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Define hypermodel for hyperparameter tuning
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=13, activation='relu', input_shape=(X_train.shape[1],)))
    
    # Add variable number of hidden layers and their sizes
    for i in range(hp.Int('num_hidden_layers', 1, 10)):
        model.add(Dense(units=hp.Int(f'layer_{i}_units', min_value=2, max_value=512, step=2), activation='relu'))
        model.add(tf.keras.layers.Dropout(hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.01)))
    
    model.add(Dense(units=1, activation='sigmoid'))
    
    # Compile the model

    model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='binary_crossentropy',
              metrics=['accuracy'])
    
    return model

# Instantiate the BayesianOptimization tuner
tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=1000,
    directory='keras_tuner_logs',
    project_name='my_hyperparameter_search',
    distribution_strategy=tf.distribute.MirroredStrategy()
)


# Perform the hyperparameter search with Early Stopping callback
tuner.search(X_train_scaled, y_train, epochs=30, validation_split=0.2, batch_size=32)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters()[0]

#Print the best hyperparameters
print(best_hps.get_config())

# Build the model using the best hyperparameters
best_model = build_model(best_hps)

# Train the best model using all training data
best_model.fit(X_train_scaled, y_train, epochs=30, batch_size=32)

# Prepare test data for predictions
X_test_scaled = scaler.transform(df_test)

# Predict using the best model on test data
predictions = best_model.predict(X_test_scaled)
pred = (predictions > 0.5).astype(int)

tfMap = {0: False, 1: True}

kaggleCSV = pd.DataFrame(columns=['PassengerId', 'Transported'])
kaggleCSV['PassengerId'] = df_test['PassengerId']
kaggleCSV['Transported'] = [tfMap[i[0]] for i in pred]

# Save predictions to a CSV file
kaggleCSV.to_csv('results/nn_results.csv', index=False)





Trial 1000 Complete [00h 00m 14s]
val_accuracy: 0.7809085845947266

Best val_accuracy So Far: 0.8113858699798584
Total elapsed time: 05h 40m 32s
{'space': [{'class_name': 'Int', 'config': {'name': 'num_hidden_layers', 'default': None, 'conditions': [], 'min_value': 1, 'max_value': 10, 'step': 1, 'sampling': 'linear'}}, {'class_name': 'Int', 'config': {'name': 'layer_0_units', 'default': None, 'conditions': [], 'min_value': 2, 'max_value': 512, 'step': 2, 'sampling': 'linear'}}, {'class_name': 'Float', 'config': {'name': 'dropout_rate', 'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.01, 'sampling': 'linear'}}, {'class_name': 'Int', 'config': {'name': 'layer_1_units', 'default': None, 'conditions': [], 'min_value': 2, 'max_value': 512, 'step': 2, 'sampling': 'linear'}}, {'class_name': 'Int', 'config': {'name': 'layer_2_units', 'default': None, 'conditions': [], 'min_value': 2, 'max_value': 512, 'step': 2, 'sampling': 'linear'}}, {'class_name': 'Int', 'con

In [4]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

# Retrieve Training and Testing Data 
df_train = pd.read_csv('data/trainClean.csv')
df_test = pd.read_csv('data/testClean.csv')

X_train = df_train.drop(columns='Transported')
y_train = df_train['Transported']

nn = MLPClassifier()
nn.fit(X_train, y_train)

pred = nn.predict(df_test)
pred

tfMap = { 0: False, 1: True }

kaggleCSV = pd.DataFrame(columns=['PassengerId', 'Transported'])
kaggleCSV['PassengerId'] = df_test['PassengerId']
kaggleCSV['Transported'] = [tfMap[i] for i in pred]

kaggleCSV.to_csv('results/nn_default_results.csv', index=False)


