In [25]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from scipy.stats import expon, reciprocal
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow import keras
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
import deap
import skopt
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.models import save_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l1, l2, l1_l2
from sklearn.model_selection import ParameterGrid

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
import joblib

def preprocess_data(file_path, is_train=True):
    df = pd.read_csv(file_path)
    
    # Convert binary categorical features to 0 and 1
    binary_features = ['CryoSleep', 'VIP']
    df[binary_features] = df[binary_features].astype(bool).astype(int)
    
    # Feature Engineering
    df['TotalSpending'] = df[['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']].sum(axis=1)
    
    # Conditionally set spending-related features to 0 for passengers in cryosleep
    spending_features = ['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
    df.loc[df['CryoSleep'] == 1, spending_features] = 0
    
    # Create interaction features
    df['HomePlanet_TotalSpending'] = df['HomePlanet'].astype(str) + '_' + df['TotalSpending'].astype(str)
    df['Destination_TotalSpending'] = df['Destination'].astype(str) + '_' + df['TotalSpending'].astype(str)
    
    # Extract components from 'Cabin'
    if 'Cabin' in df.columns:
        df[['Cabin_Deck', 'Cabin_Number', 'Cabin_Side']] = df['Cabin'].str.split('/', expand=True)
        df['Cabin_Side'] = df['Cabin_Side'].map({'P': 1, 'S': 0})
        df['Cabin_Number'] = pd.to_numeric(df['Cabin_Number'], errors='coerce')
        df.drop('Cabin', axis=1, inplace=True)
    
    # One-hot encode multi-category features
    multi_cat_features = ['HomePlanet', 'Destination']
    if is_train:
        one_hot_encoder = OneHotEncoder(handle_unknown='ignore')
        encoded_features = one_hot_encoder.fit_transform(df[multi_cat_features])
        encoded_feature_names = one_hot_encoder.get_feature_names_out(multi_cat_features)
        joblib.dump(one_hot_encoder, 'one_hot_encoder.pkl')
    else:
        one_hot_encoder = joblib.load('one_hot_encoder.pkl')
        encoded_features = one_hot_encoder.transform(df[multi_cat_features])
        encoded_feature_names = one_hot_encoder.get_feature_names_out(multi_cat_features)
    
    encoded_features_df = pd.DataFrame(encoded_features.toarray(), columns=encoded_feature_names)
    df = pd.concat([df, encoded_features_df], axis=1)
    df.drop(multi_cat_features, axis=1, inplace=True)
    
    # Imputation and Scaling
    numeric_features = ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 'Cabin_Number', 'Cabin_Side', 'TotalSpending']
    if is_train:
        imputer = KNNImputer(n_neighbors=5)
        scaler = StandardScaler()
        df[numeric_features] = imputer.fit_transform(df[numeric_features])
        df[numeric_features] = scaler.fit_transform(df[numeric_features])
        joblib.dump(imputer, 'imputer.pkl')
        joblib.dump(scaler, 'scaler.pkl')
    else:
        imputer = joblib.load('imputer.pkl')
        scaler = joblib.load('scaler.pkl')
        df[numeric_features] = imputer.transform(df[numeric_features])
        df[numeric_features] = scaler.transform(df[numeric_features])
    
    if is_train:
        # Convert 'Transported' to integer (True=1, False=0) for modeling
        df['Transported'] = df['Transported'].astype(int)
        
        # Save the list of features used for training
        train_features = [col for col in df.columns if col not in ['PassengerId', 'Name', 'Transported', 'Cabin_Deck', 'HomePlanet_TotalSpending', 'Destination_TotalSpending']]
        joblib.dump(train_features, 'train_features.pkl')
    
    return df

In [29]:
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args

train_df = preprocess_data('csv_files/train.csv', is_train=True)
test_df = preprocess_data('csv_files/test.csv', is_train=False)

# Function to create the model
def create_model(input_shape, layers, activation, dropout_rate, learning_rate):
    model = Sequential()
    model.add(Dense(layers[0], activation=activation, input_shape=(input_shape,)))
    model.add(Dropout(dropout_rate))
    
    for layer_size in layers[1:]:
        model.add(Dense(layer_size, activation=activation))
        model.add(Dropout(dropout_rate))
    
    model.add(Dense(1, activation='sigmoid'))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

def train_and_evaluate(model, X_train, y_train, X_val, y_val, batch_size, epochs, class_weight):
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, 
              validation_data=(X_val, y_val), class_weight=class_weight, 
              callbacks=[early_stopping])
    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    return val_loss, val_acc

# Prepare features and target for the model
features = [col for col in train_df.columns if col not in ['PassengerId', 'Name', 'Transported', 'Cabin_Deck', 'HomePlanet_TotalSpending', 'Destination_TotalSpending']]
X = train_df[features]
y = train_df['Transported']

# Splitting the dataset
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Handling class imbalance
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_smote),
    y=y_train_smote)
class_weight_dict = dict(enumerate(class_weights))

# Define the hyperparameter search space
dim_layers = Integer(low=1, high=4, name='layers')
dim_units = Integer(low=32, high=512, name='units')
dim_activation = Categorical(categories=['relu', 'tanh'], name='activation')
dim_dropout = Real(low=0.1, high=0.5, prior='uniform', name='dropout_rate')
dim_learning_rate = Real(low=1e-4, high=1e-2, prior='log-uniform', name='learning_rate')

dimensions = [dim_layers, dim_units, dim_activation, dim_dropout, dim_learning_rate]

@use_named_args(dimensions=dimensions)
def fitness(layers, units, activation, dropout_rate, learning_rate):
    layers = [units] * layers
    model = create_model(input_shape=X_train_smote.shape[1], layers=layers, 
                         activation=activation, dropout_rate=dropout_rate, learning_rate=learning_rate)
    val_loss, val_acc = train_and_evaluate(model, X_train_smote, y_train_smote, X_val, y_val, 
                                           batch_size=32, epochs=50, class_weight=class_weight_dict)
    return -val_acc  # Minimize the negative of validation accuracy

# Perform Bayesian Optimization
n_calls = 20
best_result = gp_minimize(func=fitness, dimensions=dimensions, n_calls=n_calls, 
                          random_state=42, verbose=True)

# Retrieve the best hyperparameters
best_params = {
    'layers': [best_result.x[1]] * best_result.x[0],
    'activation': best_result.x[2],
    'dropout_rate': best_result.x[3],
    'learning_rate': best_result.x[4]
}
best_val_acc = -best_result.fun

# Train the final model with the best hyperparameters
model_enhanced = create_model(input_shape=X_train_smote.shape[1], **best_params)

# Combine the original training and validation sets for final training
X_full, y_full = smote.fit_resample(X, y)
model_enhanced.fit(X_full, y_full, batch_size=32, epochs=100, class_weight=class_weight_dict)

print(f"Best Validation Accuracy: {best_val_acc}")
print(f"Best Hyperparameters: {best_params}")

Iteration No: 1 started. Evaluating function at random point.
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 944us/step - accuracy: 0.7050 - loss: 0.5458 - val_accuracy: 0.7671 - val_loss: 0.4743
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 623us/step - accuracy: 0.7865 - loss: 0.4575 - val_accuracy: 0.7734 - val_loss: 0.4645
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 609us/step - accuracy: 0.7736 - loss: 0.4714 - val_accuracy: 0.7809 - val_loss: 0.4599
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 604us/step - accuracy: 0.7759 - loss: 0.4653 - val_accuracy: 0.7717 - val_loss: 0.4713
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 774us/step - accuracy: 0.7862 - loss: 0.4517 - val_accuracy: 0.7775 - val_loss: 0.4651
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 721us/step - accuracy: 0.7760 - loss: 0.4586 - val_accuracy: 0.7700 - val_loss: 0.4639
Epoch 7/50
[1m219/219[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 597us/step - accuracy: 0.5174 - loss: 0.7565 - val_accuracy: 0.7303 - val_loss: 0.5704
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348us/step - accuracy: 0.6795 - loss: 0.5973 - val_accuracy: 0.7568 - val_loss: 0.5128
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344us/step - accuracy: 0.7352 - loss: 0.5359 - val_accuracy: 0.7596 - val_loss: 0.4886
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 341us/step - accuracy: 0.7638 - loss: 0.4947 - val_accuracy: 0.7585 - val_loss: 0.4768
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 345us/step - accuracy: 0.7684 - loss: 0.4941 - val_accuracy: 0.7608 - val_loss: 0.4714
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343us/step - accuracy: 0.7615 - loss: 0.4858 - val_accuracy: 0.7648 - val_loss: 0.4678
Epoch 7/50
[1m219/219[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712us/step - accuracy: 0.6888 - loss: 0.5863 - val_accuracy: 0.7556 - val_loss: 0.4771
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 503us/step - accuracy: 0.7841 - loss: 0.4585 - val_accuracy: 0.7711 - val_loss: 0.4704
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 485us/step - accuracy: 0.7751 - loss: 0.4595 - val_accuracy: 0.7700 - val_loss: 0.4643
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 492us/step - accuracy: 0.7910 - loss: 0.4490 - val_accuracy: 0.7688 - val_loss: 0.4626
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469us/step - accuracy: 0.7935 - loss: 0.4354 - val_accuracy: 0.7723 - val_loss: 0.4623
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 471us/step - accuracy: 0.7852 - loss: 0.4527 - val_accuracy: 0.7711 - val_loss: 0.4618
Epoch 7/50
[1m219/219[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7242 - loss: 0.6294 - val_accuracy: 0.7775 - val_loss: 0.5156
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7508 - loss: 0.5714 - val_accuracy: 0.7338 - val_loss: 0.6498
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7456 - loss: 0.5890 - val_accuracy: 0.7757 - val_loss: 0.4665
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7473 - loss: 0.5934 - val_accuracy: 0.7395 - val_loss: 0.5008
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7395 - loss: 0.5887 - val_accuracy: 0.7746 - val_loss: 0.5960
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7711 - loss: 0.5421 - val_accuracy: 0.7619 - val_loss: 0.4995
Epoch 7/50
[1m219/219[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7436 - loss: 0.5163 - val_accuracy: 0.7769 - val_loss: 0.4463
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7820 - loss: 0.4492 - val_accuracy: 0.7815 - val_loss: 0.4437
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7969 - loss: 0.4362 - val_accuracy: 0.7867 - val_loss: 0.4366
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7937 - loss: 0.4283 - val_accuracy: 0.7780 - val_loss: 0.4385
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7893 - loss: 0.4386 - val_accuracy: 0.7740 - val_loss: 0.4377
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8028 - loss: 0.4236 - val_accuracy: 0.7763 - val_loss: 0.4282
Epoch 7/50
[1m219/219[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 749us/step - accuracy: 0.5614 - loss: 0.6783 - val_accuracy: 0.7568 - val_loss: 0.5261
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 494us/step - accuracy: 0.7666 - loss: 0.5010 - val_accuracy: 0.7579 - val_loss: 0.4817
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 483us/step - accuracy: 0.7921 - loss: 0.4558 - val_accuracy: 0.7637 - val_loss: 0.4699
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 501us/step - accuracy: 0.7820 - loss: 0.4556 - val_accuracy: 0.7671 - val_loss: 0.4672
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701us/step - accuracy: 0.7907 - loss: 0.4374 - val_accuracy: 0.7677 - val_loss: 0.4648
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 514us/step - accuracy: 0.7813 - loss: 0.4480 - val_accuracy: 0.7677 - val_loss: 0.4635
Epoch 7/50
[1m219/219[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6619 - loss: 0.6088 - val_accuracy: 0.7660 - val_loss: 0.4821
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7542 - loss: 0.4997 - val_accuracy: 0.7688 - val_loss: 0.4706
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7602 - loss: 0.4912 - val_accuracy: 0.7711 - val_loss: 0.4749
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7630 - loss: 0.4963 - val_accuracy: 0.7740 - val_loss: 0.4694
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7801 - loss: 0.4702 - val_accuracy: 0.7717 - val_loss: 0.4710
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7739 - loss: 0.4809 - val_accuracy: 0.7740 - val_loss: 0.4692
Epoch 7/50
[1m219/219[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7383 - loss: 0.5242 - val_accuracy: 0.7545 - val_loss: 0.4889
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7771 - loss: 0.4765 - val_accuracy: 0.7757 - val_loss: 0.4786
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7689 - loss: 0.4884 - val_accuracy: 0.7711 - val_loss: 0.4878
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7813 - loss: 0.4702 - val_accuracy: 0.7723 - val_loss: 0.4698
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7876 - loss: 0.4659 - val_accuracy: 0.7752 - val_loss: 0.4748
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7833 - loss: 0.4667 - val_accuracy: 0.7723 - val_loss: 0.4681
Epoch 7/50
[1m219/219[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7140 - loss: 0.7850 - val_accuracy: 0.7269 - val_loss: 0.5417
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7447 - loss: 0.5488 - val_accuracy: 0.7148 - val_loss: 0.6008
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7553 - loss: 0.5331 - val_accuracy: 0.7706 - val_loss: 0.4610
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7587 - loss: 0.5311 - val_accuracy: 0.7637 - val_loss: 0.4783
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7560 - loss: 0.5204 - val_accuracy: 0.6774 - val_loss: 0.6868
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7465 - loss: 0.5701 - val_accuracy: 0.7240 - val_loss: 0.5618
Epoch 7/50
[1m219/219[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7658 - loss: 0.4887 - val_accuracy: 0.7665 - val_loss: 0.4744
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7823 - loss: 0.4581 - val_accuracy: 0.7665 - val_loss: 0.4627
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7891 - loss: 0.4494 - val_accuracy: 0.7780 - val_loss: 0.4668
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7922 - loss: 0.4450 - val_accuracy: 0.7665 - val_loss: 0.4653
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7960 - loss: 0.4312 - val_accuracy: 0.7711 - val_loss: 0.4817
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7976 - loss: 0.4273 - val_accuracy: 0.7786 - val_loss: 0.4510
Epoch 7/50
[1m219/219[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 619us/step - accuracy: 0.7553 - loss: 0.4942 - val_accuracy: 0.7775 - val_loss: 0.4609
Epoch 2/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 374us/step - accuracy: 0.7948 - loss: 0.4438 - val_accuracy: 0.7723 - val_loss: 0.4653
Epoch 3/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367us/step - accuracy: 0.7966 - loss: 0.4394 - val_accuracy: 0.7763 - val_loss: 0.4618
Epoch 4/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 370us/step - accuracy: 0.7927 - loss: 0.4393 - val_accuracy: 0.7746 - val_loss: 0.4625
Epoch 5/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375us/step - accuracy: 0.7881 - loss: 0.4367 - val_accuracy: 0.7752 - val_loss: 0.4566
Epoch 6/50
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363us/step - accuracy: 0.7890 - loss: 0.4485 - val_accuracy: 0.7786 - val_loss: 0.4555
Epoch 7/50
[1m219/219[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 360us/step - accuracy: 0.7552 - loss: 0.4879
Epoch 2/100
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328us/step - accuracy: 0.7830 - loss: 0.4603
Epoch 3/100
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 423us/step - accuracy: 0.7850 - loss: 0.4599
Epoch 4/100
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327us/step - accuracy: 0.7780 - loss: 0.4569
Epoch 5/100
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322us/step - accuracy: 0.7853 - loss: 0.4473
Epoch 6/100
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 320us/step - accuracy: 0.7885 - loss: 0.4440
Epoch 7/100
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322us/step - accuracy: 0.7942 - loss: 0.4416
Epoch 8/100
[1m274/274[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 319us/step - accuracy: 0.7945 - loss: 0.4341
Epoch 9/100
[1m274/274[0m 

Run 1:
Best Validation Accuracy: 0.7998849749565125
Best Hyperparameters: {'layers': [379], 'activation': 'tanh', 'dropout_rate': 0.10031150633640573, 'learning_rate': 0.009647685075720105}

In [30]:
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
import joblib


# Preprocess the test data using the same steps as the training data
test_df = preprocess_data('csv_files/test.csv', is_train=False)

# Load the features used during training
train_features = joblib.load('train_features.pkl')

# Select the features used by the model, ensuring they are the same as those used in training
features = [col for col in train_features if col in test_df.columns]

# Ensure that X_test is a DataFrame with the correct numerical types
X_test = test_df[features].copy()

# Convert X_test to a NumPy array, which is the expected format for TensorFlow models
X_test_np = X_test.values

# Predict using the enhanced model
y_pred_test_proba = model_enhanced.predict(X_test_np)
y_pred_test = (y_pred_test_proba > 0.5).astype(int).flatten()

# Prepare the submission dataframe
submission_df = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Transported': y_pred_test
})

# Convert predictions back to boolean (True/False) if necessary
submission_df['Transported'] = submission_df['Transported'].astype(bool)

# Save the submission file
submission_df.to_csv('neuralnetbaye.csv', index=False)

[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293us/step
