In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.utils import get_custom_objects
import joblib

# Register custom metric with Keras
def f1_metric(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true * y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1 - y_true) * (1 - y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1 - y_true) * y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true * (1 - y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2 * p * r / (p + r + K.epsilon())
    return K.mean(f1)

get_custom_objects().update({"f1_metric": f1_metric})

# Global Variables
dropout_value = 0.5
epochs_value = 10000
patience_value = 100
random_state_value = 50

# Load the training data
print("Loading training data...")
train_file_path = 'train.csv'  # replace with your actual file path
train_data = pd.read_csv(train_file_path)
print("Training data loaded.")

# Separate features and target variable
X_train = train_data.drop(columns=['Target'])
y_train = train_data['Target']

# Handle missing values
X_train = X_train.fillna(X_train.mean())

# Normalize the features
print("Normalizing features...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
print("Features normalized.")

# Build the neural network model
print("Building neural network model...")
def build_nn_model(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_value))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_value))
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_value))
    model.add(Dense(16, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=[f1_metric])
    return model

# Initialize the neural network model
nn_model = build_nn_model(X_train_scaled.shape[1])

# Train the neural network model
print("Training neural network model...")
early_stopping = EarlyStopping(monitor='val_loss', patience=patience_value, restore_best_weights=True, min_delta=0.001)
model_checkpoint = ModelCheckpoint('best_nn_model.h5', monitor='val_loss', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001, verbose=1)

history = nn_model.fit(X_train_scaled, y_train, validation_split=0.2, epochs=epochs_value, batch_size=32, callbacks=[early_stopping, model_checkpoint, reduce_lr])

# Load the best neural network model
print("Loading best neural network model...")
nn_model.load_weights('best_nn_model.h5')

# Save the neural network model
print("Saving neural network model...")
nn_model_path = "neural_network_model.h5"
save_model(nn_model, nn_model_path)

# Initialize the BaggingClassifier with best parameters
print("Initializing BaggingClassifier with best parameters...")
bagging_clf = BaggingClassifier(
    base_estimator=RandomForestClassifier(
        criterion='gini',
        max_depth=8,
        max_features='log2',
        n_estimators=100,
        random_state=random_state_value
    ),
    n_estimators=10,
    random_state=random_state_value
)

# Train the BaggingClassifier
print("Training BaggingClassifier...")
bagging_clf.fit(X_train_scaled, y_train)

# Save the BaggingClassifier model
print("Saving BaggingClassifier model...")
bagging_model_path = "bagging_classifier_model.pkl"
joblib.dump(bagging_clf, bagging_model_path)

# Load the test data
print("Loading test data...")
test_file_path = 'test.csv'  # replace with your actual file path
test_data = pd.read_csv(test_file_path)
print("Test data loaded.")

# Handle missing values in the test data
X_test = test_data.fillna(test_data.mean())

# Normalize the test data
print("Normalizing test data...")
X_test_scaled = scaler.transform(X_test)
print("Test data normalized.")

# Make predictions with the BaggingClassifier model
print("Making predictions with the BaggingClassifier model...")
y_pred_bagging = bagging_clf.predict(X_test_scaled)

# Save predictions to a CSV file
print("Saving predictions...")
output = pd.DataFrame({'ID': test_data['ID'], 'Target': y_pred_bagging})
output.to_csv('predictions.csv', index=False)
print("Predictions saved to predictions.csv.")


Loading training data...
Training data loaded.
Normalizing features...
Features normalized.
Building neural network model...
Training neural network model...
Epoch 1/10000
Epoch 1: val_loss improved from inf to 0.68983, saving model to best_nn_model.h5
Epoch 2/10000

  saving_api.save_model(



Epoch 2: val_loss did not improve from 0.68983
Epoch 3/10000
Epoch 3: val_loss did not improve from 0.68983
Epoch 4/10000
Epoch 4: val_loss did not improve from 0.68983
Epoch 5/10000
Epoch 5: val_loss did not improve from 0.68983
Epoch 6/10000
Epoch 6: val_loss did not improve from 0.68983

Epoch 6: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 7/10000
Epoch 7: val_loss did not improve from 0.68983
Epoch 8/10000
Epoch 8: val_loss did not improve from 0.68983
Epoch 9/10000
Epoch 9: val_loss did not improve from 0.68983
Epoch 10/10000
Epoch 10: val_loss did not improve from 0.68983
Epoch 11/10000
Epoch 11: val_loss did not improve from 0.68983

Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 12/10000
Epoch 12: val_loss did not improve from 0.68983
Epoch 13/10000
Epoch 13: val_loss did not improve from 0.68983
Epoch 14/10000
Epoch 14: val_loss did not improve from 0.68983
Epoch 15/10000
Epoch 15: val_loss did not improve from 0.68983
Epoch 

Epoch 30/10000
Epoch 30: val_loss did not improve from 0.68983
Epoch 31/10000
Epoch 31: val_loss did not improve from 0.68983
Epoch 32/10000
Epoch 32: val_loss did not improve from 0.68983
Epoch 33/10000
Epoch 33: val_loss did not improve from 0.68983
Epoch 34/10000
Epoch 34: val_loss did not improve from 0.68983
Epoch 35/10000
Epoch 35: val_loss did not improve from 0.68983
Epoch 36/10000
Epoch 36: val_loss did not improve from 0.68983
Epoch 37/10000
Epoch 37: val_loss did not improve from 0.68983
Epoch 38/10000
Epoch 38: val_loss did not improve from 0.68983
Epoch 39/10000
Epoch 39: val_loss did not improve from 0.68983
Epoch 40/10000
Epoch 40: val_loss did not improve from 0.68983
Epoch 41/10000
Epoch 41: val_loss did not improve from 0.68983
Epoch 42/10000
Epoch 42: val_loss did not improve from 0.68983
Epoch 43/10000
Epoch 43: val_loss did not improve from 0.68983
Epoch 44/10000
Epoch 44: val_loss improved from 0.68983 to 0.68982, saving model to best_nn_model.h5
Epoch 45/10000
Ep

Epoch 57/10000
Epoch 57: val_loss did not improve from 0.68783
Epoch 58/10000
Epoch 58: val_loss improved from 0.68783 to 0.68761, saving model to best_nn_model.h5
Epoch 59/10000
Epoch 59: val_loss did not improve from 0.68761
Epoch 60/10000
Epoch 60: val_loss improved from 0.68761 to 0.68721, saving model to best_nn_model.h5
Epoch 61/10000
Epoch 61: val_loss improved from 0.68721 to 0.68633, saving model to best_nn_model.h5
Epoch 62/10000
Epoch 62: val_loss improved from 0.68633 to 0.68630, saving model to best_nn_model.h5
Epoch 63/10000
Epoch 63: val_loss improved from 0.68630 to 0.68611, saving model to best_nn_model.h5
Epoch 64/10000
Epoch 64: val_loss improved from 0.68611 to 0.68564, saving model to best_nn_model.h5
Epoch 65/10000
Epoch 65: val_loss improved from 0.68564 to 0.68534, saving model to best_nn_model.h5
Epoch 66/10000
Epoch 66: val_loss improved from 0.68534 to 0.68518, saving model to best_nn_model.h5
Epoch 67/10000
Epoch 67: val_loss improved from 0.68518 to 0.68476

Epoch 83/10000
Epoch 83: val_loss did not improve from 0.68225
Epoch 84/10000
Epoch 84: val_loss improved from 0.68225 to 0.68208, saving model to best_nn_model.h5
Epoch 85/10000
Epoch 85: val_loss did not improve from 0.68208
Epoch 86/10000
Epoch 86: val_loss improved from 0.68208 to 0.68194, saving model to best_nn_model.h5
Epoch 87/10000
Epoch 87: val_loss did not improve from 0.68194
Epoch 88/10000
Epoch 88: val_loss did not improve from 0.68194
Epoch 89/10000
Epoch 89: val_loss improved from 0.68194 to 0.68187, saving model to best_nn_model.h5
Epoch 90/10000
Epoch 90: val_loss improved from 0.68187 to 0.68129, saving model to best_nn_model.h5
Epoch 91/10000
Epoch 91: val_loss improved from 0.68129 to 0.68031, saving model to best_nn_model.h5
Epoch 92/10000
Epoch 92: val_loss improved from 0.68031 to 0.67929, saving model to best_nn_model.h5
Epoch 93/10000
Epoch 93: val_loss improved from 0.67929 to 0.67926, saving model to best_nn_model.h5
Epoch 94/10000
Epoch 94: val_loss did not

Epoch 109/10000
Epoch 109: val_loss improved from 0.67720 to 0.67649, saving model to best_nn_model.h5
Epoch 110/10000
Epoch 110: val_loss improved from 0.67649 to 0.67626, saving model to best_nn_model.h5
Epoch 111/10000
Epoch 111: val_loss improved from 0.67626 to 0.67564, saving model to best_nn_model.h5
Epoch 112/10000
Epoch 112: val_loss did not improve from 0.67564
Epoch 113/10000
Epoch 113: val_loss did not improve from 0.67564
Epoch 114/10000
Epoch 114: val_loss did not improve from 0.67564
Epoch 115/10000
Epoch 115: val_loss improved from 0.67564 to 0.67546, saving model to best_nn_model.h5
Epoch 116/10000
Epoch 116: val_loss did not improve from 0.67546
Epoch 117/10000
Epoch 117: val_loss improved from 0.67546 to 0.67529, saving model to best_nn_model.h5
Epoch 118/10000
Epoch 118: val_loss improved from 0.67529 to 0.67513, saving model to best_nn_model.h5
Epoch 119/10000
Epoch 119: val_loss improved from 0.67513 to 0.67500, saving model to best_nn_model.h5
Epoch 120/10000
Epo

Epoch 135/10000
Epoch 135: val_loss did not improve from 0.67354
Epoch 136/10000
Epoch 136: val_loss did not improve from 0.67354
Epoch 137/10000
Epoch 137: val_loss did not improve from 0.67354
Epoch 138/10000
Epoch 138: val_loss improved from 0.67354 to 0.67345, saving model to best_nn_model.h5
Epoch 139/10000
Epoch 139: val_loss improved from 0.67345 to 0.67278, saving model to best_nn_model.h5
Epoch 140/10000
Epoch 140: val_loss did not improve from 0.67278
Epoch 141/10000
Epoch 141: val_loss improved from 0.67278 to 0.67258, saving model to best_nn_model.h5
Epoch 142/10000
Epoch 142: val_loss did not improve from 0.67258
Epoch 143/10000
Epoch 143: val_loss improved from 0.67258 to 0.67222, saving model to best_nn_model.h5
Epoch 144/10000
Epoch 144: val_loss improved from 0.67222 to 0.67210, saving model to best_nn_model.h5
Epoch 145/10000
Epoch 145: val_loss improved from 0.67210 to 0.67179, saving model to best_nn_model.h5
Epoch 146/10000
Epoch 146: val_loss did not improve from 

Epoch 161/10000
Epoch 161: val_loss did not improve from 0.66771
Epoch 162/10000
Epoch 162: val_loss did not improve from 0.66771
Epoch 163/10000
Epoch 163: val_loss did not improve from 0.66771
Epoch 164/10000
Epoch 164: val_loss improved from 0.66771 to 0.66760, saving model to best_nn_model.h5
Epoch 165/10000
Epoch 165: val_loss improved from 0.66760 to 0.66759, saving model to best_nn_model.h5
Epoch 166/10000
Epoch 166: val_loss did not improve from 0.66759
Epoch 167/10000
Epoch 167: val_loss did not improve from 0.66759
Epoch 168/10000
Epoch 168: val_loss did not improve from 0.66759
Epoch 169/10000
Epoch 169: val_loss did not improve from 0.66759
Epoch 170/10000
Epoch 170: val_loss improved from 0.66759 to 0.66747, saving model to best_nn_model.h5
Epoch 171/10000
Epoch 171: val_loss did not improve from 0.66747
Epoch 172/10000
Epoch 172: val_loss did not improve from 0.66747
Epoch 173/10000
Epoch 173: val_loss did not improve from 0.66747
Epoch 174/10000
Epoch 174: val_loss did n

Epoch 187/10000
Epoch 187: val_loss improved from 0.66478 to 0.66457, saving model to best_nn_model.h5
Epoch 188/10000
Epoch 188: val_loss improved from 0.66457 to 0.66456, saving model to best_nn_model.h5
Epoch 189/10000
Epoch 189: val_loss improved from 0.66456 to 0.66447, saving model to best_nn_model.h5
Epoch 190/10000
Epoch 190: val_loss improved from 0.66447 to 0.66394, saving model to best_nn_model.h5
Epoch 191/10000
Epoch 191: val_loss did not improve from 0.66394
Epoch 192/10000
Epoch 192: val_loss did not improve from 0.66394
Epoch 193/10000
Epoch 193: val_loss did not improve from 0.66394
Epoch 194/10000
Epoch 194: val_loss improved from 0.66394 to 0.66379, saving model to best_nn_model.h5
Epoch 195/10000
Epoch 195: val_loss improved from 0.66379 to 0.66321, saving model to best_nn_model.h5
Epoch 196/10000
Epoch 196: val_loss improved from 0.66321 to 0.66277, saving model to best_nn_model.h5
Epoch 197/10000
Epoch 197: val_loss improved from 0.66277 to 0.66267, saving model t

Epoch 213: val_loss improved from 0.66015 to 0.65969, saving model to best_nn_model.h5
Epoch 214/10000
Epoch 214: val_loss improved from 0.65969 to 0.65938, saving model to best_nn_model.h5
Epoch 215/10000
Epoch 215: val_loss did not improve from 0.65938
Epoch 216/10000
Epoch 216: val_loss improved from 0.65938 to 0.65909, saving model to best_nn_model.h5
Epoch 217/10000
Epoch 217: val_loss did not improve from 0.65909
Epoch 218/10000
Epoch 218: val_loss did not improve from 0.65909
Epoch 219/10000
Epoch 219: val_loss did not improve from 0.65909
Epoch 220/10000
Epoch 220: val_loss did not improve from 0.65909
Epoch 221/10000
Epoch 221: val_loss did not improve from 0.65909
Epoch 222/10000
Epoch 222: val_loss did not improve from 0.65909
Epoch 223/10000
Epoch 223: val_loss did not improve from 0.65909
Epoch 224/10000
Epoch 224: val_loss did not improve from 0.65909
Epoch 225/10000
Epoch 225: val_loss improved from 0.65909 to 0.65889, saving model to best_nn_model.h5
Epoch 226/10000
Epo

Epoch 240/10000
Epoch 240: val_loss did not improve from 0.65800
Epoch 241/10000
Epoch 241: val_loss did not improve from 0.65800
Epoch 242/10000
Epoch 242: val_loss did not improve from 0.65800
Epoch 243/10000
Epoch 243: val_loss did not improve from 0.65800
Epoch 244/10000
Epoch 244: val_loss did not improve from 0.65800
Epoch 245/10000
Epoch 245: val_loss did not improve from 0.65800
Epoch 246/10000
Epoch 246: val_loss did not improve from 0.65800
Epoch 247/10000
Epoch 247: val_loss did not improve from 0.65800
Epoch 248/10000
Epoch 248: val_loss improved from 0.65800 to 0.65788, saving model to best_nn_model.h5
Epoch 249/10000
Epoch 249: val_loss improved from 0.65788 to 0.65770, saving model to best_nn_model.h5
Epoch 250/10000
Epoch 250: val_loss improved from 0.65770 to 0.65744, saving model to best_nn_model.h5
Epoch 251/10000
Epoch 251: val_loss did not improve from 0.65744
Epoch 252/10000
Epoch 252: val_loss did not improve from 0.65744
Epoch 253/10000
Epoch 253: val_loss did n

Epoch 267: val_loss did not improve from 0.65744
Epoch 268/10000
Epoch 268: val_loss did not improve from 0.65744
Epoch 269/10000
Epoch 269: val_loss did not improve from 0.65744
Epoch 270/10000
Epoch 270: val_loss did not improve from 0.65744
Epoch 271/10000
Epoch 271: val_loss did not improve from 0.65744
Epoch 272/10000
Epoch 272: val_loss did not improve from 0.65744
Epoch 273/10000
Epoch 273: val_loss did not improve from 0.65744
Epoch 274/10000
Epoch 274: val_loss did not improve from 0.65744
Epoch 275/10000
Epoch 275: val_loss did not improve from 0.65744
Epoch 276/10000
Epoch 276: val_loss did not improve from 0.65744
Epoch 277/10000
Epoch 277: val_loss did not improve from 0.65744
Epoch 278/10000
Epoch 278: val_loss did not improve from 0.65744
Epoch 279/10000
Epoch 279: val_loss did not improve from 0.65744
Epoch 280/10000
Epoch 280: val_loss did not improve from 0.65744
Epoch 281/10000
Epoch 281: val_loss did not improve from 0.65744
Epoch 282/10000
Epoch 282: val_loss did n

Epoch 295/10000
Epoch 295: val_loss did not improve from 0.65744
Epoch 296/10000
Epoch 296: val_loss did not improve from 0.65744
Epoch 297/10000
Epoch 297: val_loss did not improve from 0.65744
Epoch 298/10000
Epoch 298: val_loss did not improve from 0.65744
Epoch 299/10000
Epoch 299: val_loss did not improve from 0.65744
Epoch 300/10000
Epoch 300: val_loss did not improve from 0.65744
Epoch 301/10000
Epoch 301: val_loss did not improve from 0.65744
Epoch 302/10000
Epoch 302: val_loss did not improve from 0.65744
Epoch 303/10000
Epoch 303: val_loss did not improve from 0.65744
Epoch 304/10000
Epoch 304: val_loss did not improve from 0.65744
Epoch 305/10000
Epoch 305: val_loss did not improve from 0.65744
Epoch 306/10000
Epoch 306: val_loss did not improve from 0.65744
Epoch 307/10000
Epoch 307: val_loss did not improve from 0.65744
Epoch 308/10000
Epoch 308: val_loss did not improve from 0.65744
Epoch 309/10000
Epoch 309: val_loss did not improve from 0.65744
Epoch 310/10000
Epoch 310

Epoch 323/10000
Epoch 323: val_loss did not improve from 0.65744
Epoch 324/10000
Epoch 324: val_loss did not improve from 0.65744
Epoch 325/10000
Epoch 325: val_loss did not improve from 0.65744
Epoch 326/10000
Epoch 326: val_loss did not improve from 0.65744
Epoch 327/10000
Epoch 327: val_loss did not improve from 0.65744
Epoch 328/10000
Epoch 328: val_loss did not improve from 0.65744
Epoch 329/10000
Epoch 329: val_loss did not improve from 0.65744
Epoch 330/10000
Epoch 330: val_loss did not improve from 0.65744
Epoch 331/10000
Epoch 331: val_loss did not improve from 0.65744
Epoch 332/10000
Epoch 332: val_loss did not improve from 0.65744
Epoch 333/10000
Epoch 333: val_loss did not improve from 0.65744
Loading best neural network model...
Saving neural network model...
Initializing BaggingClassifier with best parameters...
Training BaggingClassifier...


  save_model(nn_model, nn_model_path)


Saving BaggingClassifier model...
Loading test data...
Test data loaded.
Normalizing test data...
Test data normalized.
Making predictions with the BaggingClassifier model...
Saving predictions...
Predictions saved to predictions.csv.
