In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.utils import get_custom_objects
from sklearn.base import BaseEstimator, ClassifierMixin
import joblib

# Register custom metric with Keras
def f1_metric(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true * y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1 - y_true) * (1 - y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1 - y_true) * y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true * (1 - y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2 * p * r / (p + r + K.epsilon())
    return K.mean(f1)

get_custom_objects().update({"f1_metric": f1_metric})

# Global Variables
dropout_value = 0.5
epochs_value = 10000
patience_value = 100
random_state_value = 50

# Load the training data
print("Loading training data...")
train_file_path = 'train.csv'  # replace with your actual file path
train_data = pd.read_csv(train_file_path)
print("Training data loaded.")

# Separate features and target variable
X_train = train_data.drop(columns=['Target'])
y_train = train_data['Target']

# Handle missing values
X_train = X_train.fillna(X_train.mean())

# Normalize the features
print("Normalizing features...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
print("Features normalized.")

# Build the neural network model
print("Building neural network model...")
def build_nn_model(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_value))
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_value))
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_value))
    model.add(Dense(16, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=[f1_metric])
    return model

# Initialize the neural network model
nn_model = build_nn_model(X_train_scaled.shape[1])

# Train the neural network model
print("Training neural network model...")
early_stopping = EarlyStopping(monitor='val_loss', patience=patience_value, restore_best_weights=True, min_delta=0.001)
model_checkpoint = ModelCheckpoint('best_nn_model.h5', monitor='val_loss', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001, verbose=1)

history = nn_model.fit(X_train_scaled, y_train, validation_split=0.2, epochs=epochs_value, batch_size=32, callbacks=[early_stopping, model_checkpoint, reduce_lr])

# Load the best neural network model
print("Loading best neural network model...")
nn_model.load_weights('best_nn_model.h5')

# Create a wrapper for the neural network model to use it in the StackingClassifier
class NeuralNetworkClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, model):
        self.model = model
        self.classes_ = [0, 1]  # Add the classes_ attribute

    def fit(self, X, y, epochs=100, batch_size=32, verbose=0):
        self.model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=verbose)
        return self

    def predict(self, X):
        return (self.model.predict(X) > 0.5).astype(int).reshape(-1)

    def predict_proba(self, X):
        probas = self.model.predict(X)
        return np.hstack(((1 - probas), probas))

# Initialize the classifiers
print("Initializing classifiers...")
nn_wrapper = NeuralNetworkClassifier(nn_model)

base_classifiers = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=random_state_value)),
    ('knn', KNeighborsClassifier()),
    ('svc', SVC(probability=True, random_state=random_state_value)),
    ('adb', AdaBoostClassifier(random_state=random_state_value)),
    ('gbc', GradientBoostingClassifier(random_state=random_state_value)),
    ('etc', ExtraTreesClassifier(random_state=random_state_value)),
    ('xgb', XGBClassifier(random_state=random_state_value)),
    ('cat', CatBoostClassifier(verbose=0, random_state=random_state_value)),
    ('dt', DecisionTreeClassifier(random_state=random_state_value)),
    ('nn', nn_wrapper)
]

stacking_clf = StackingClassifier(
    estimators=base_classifiers,
    final_estimator=LogisticRegression(),
    cv=5
)

# Train the StackingClassifier on the entire training data
print("Training StackingClassifier on the entire training data...")
stacking_clf.fit(X_train_scaled, y_train)

# Save the StackingClassifier model
print("Saving StackingClassifier model...")
stacking_model_path = "stacking_classifier_model.pkl"
joblib.dump(stacking_clf, stacking_model_path)

# Load the test data
print("Loading test data...")
test_file_path = 'test.csv'  # replace with your actual file path
test_data = pd.read_csv(test_file_path)
print("Test data loaded.")

# Handle missing values in the test data
X_test_final = test_data.fillna(test_data.mean())

# Normalize the test data
print("Normalizing test data...")
X_test_final_scaled = scaler.transform(X_test_final)
print("Test data normalized.")

# Make predictions with the StackingClassifier model
print("Making predictions with the StackingClassifier model...")
y_pred_final = stacking_clf.predict(X_test_final_scaled)

# Save predictions to a CSV file
print("Saving predictions...")
output = pd.DataFrame({'ID': test_data['ID'], 'Target': y_pred_final})
output.to_csv('predictions.csv', index=False)
print("Predictions saved to predictions.csv.")


Loading training data...
Training data loaded.
Normalizing features...
Features normalized.
Building neural network model...
Training neural network model...
Epoch 1/10000
Epoch 1: val_loss improved from inf to 0.68594, saving model to best_nn_model.h5
Epoch 2/10000
Epoch 2: val_loss improved from 0.68594 to 0.68243, saving model to best_nn_model.h5


  saving_api.save_model(


Epoch 3/10000
Epoch 3: val_loss improved from 0.68243 to 0.68173, saving model to best_nn_model.h5
Epoch 4/10000
Epoch 4: val_loss improved from 0.68173 to 0.67990, saving model to best_nn_model.h5
Epoch 5/10000
Epoch 5: val_loss improved from 0.67990 to 0.67903, saving model to best_nn_model.h5
Epoch 6/10000
Epoch 6: val_loss did not improve from 0.67903
Epoch 7/10000
Epoch 7: val_loss improved from 0.67903 to 0.67784, saving model to best_nn_model.h5
Epoch 8/10000
Epoch 8: val_loss did not improve from 0.67784
Epoch 9/10000
Epoch 9: val_loss did not improve from 0.67784
Epoch 10/10000
Epoch 10: val_loss did not improve from 0.67784
Epoch 11/10000
Epoch 11: val_loss did not improve from 0.67784
Epoch 12/10000
Epoch 12: val_loss did not improve from 0.67784

Epoch 12: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 13/10000
Epoch 13: val_loss did not improve from 0.67784
Epoch 14/10000
Epoch 14: val_loss did not improve from 0.67784
Epoch 15/10000
Epoch 15: va

Epoch 30/10000
Epoch 30: val_loss did not improve from 0.67784
Epoch 31/10000
Epoch 31: val_loss did not improve from 0.67784
Epoch 32/10000
Epoch 32: val_loss did not improve from 0.67784
Epoch 33/10000
Epoch 33: val_loss did not improve from 0.67784
Epoch 34/10000
Epoch 34: val_loss did not improve from 0.67784
Epoch 35/10000
Epoch 35: val_loss did not improve from 0.67784
Epoch 36/10000
Epoch 36: val_loss improved from 0.67784 to 0.67748, saving model to best_nn_model.h5
Epoch 37/10000
Epoch 37: val_loss improved from 0.67748 to 0.67741, saving model to best_nn_model.h5
Epoch 38/10000
Epoch 38: val_loss did not improve from 0.67741
Epoch 39/10000
Epoch 39: val_loss improved from 0.67741 to 0.67735, saving model to best_nn_model.h5
Epoch 40/10000
Epoch 40: val_loss improved from 0.67735 to 0.67717, saving model to best_nn_model.h5
Epoch 41/10000
Epoch 41: val_loss did not improve from 0.67717
Epoch 42/10000
Epoch 42: val_loss did not improve from 0.67717
Epoch 43/10000
Epoch 43: val_

Epoch 56/10000
Epoch 56: val_loss improved from 0.67456 to 0.67448, saving model to best_nn_model.h5
Epoch 57/10000
Epoch 57: val_loss did not improve from 0.67448
Epoch 58/10000
Epoch 58: val_loss improved from 0.67448 to 0.67423, saving model to best_nn_model.h5
Epoch 59/10000
Epoch 59: val_loss did not improve from 0.67423
Epoch 60/10000
Epoch 60: val_loss improved from 0.67423 to 0.67410, saving model to best_nn_model.h5
Epoch 61/10000
Epoch 61: val_loss improved from 0.67410 to 0.67389, saving model to best_nn_model.h5
Epoch 62/10000
Epoch 62: val_loss improved from 0.67389 to 0.67369, saving model to best_nn_model.h5
Epoch 63/10000
Epoch 63: val_loss improved from 0.67369 to 0.67349, saving model to best_nn_model.h5
Epoch 64/10000
Epoch 64: val_loss improved from 0.67349 to 0.67339, saving model to best_nn_model.h5
Epoch 65/10000
Epoch 65: val_loss did not improve from 0.67339
Epoch 66/10000
Epoch 66: val_loss improved from 0.67339 to 0.67309, saving model to best_nn_model.h5
Epo

Epoch 82/10000
Epoch 82: val_loss did not improve from 0.67002
Epoch 83/10000
Epoch 83: val_loss improved from 0.67002 to 0.66968, saving model to best_nn_model.h5
Epoch 84/10000
Epoch 84: val_loss improved from 0.66968 to 0.66900, saving model to best_nn_model.h5
Epoch 85/10000
Epoch 85: val_loss improved from 0.66900 to 0.66873, saving model to best_nn_model.h5
Epoch 86/10000
Epoch 86: val_loss did not improve from 0.66873
Epoch 87/10000
Epoch 87: val_loss did not improve from 0.66873
Epoch 88/10000
Epoch 88: val_loss did not improve from 0.66873
Epoch 89/10000
Epoch 89: val_loss improved from 0.66873 to 0.66848, saving model to best_nn_model.h5
Epoch 90/10000
Epoch 90: val_loss improved from 0.66848 to 0.66796, saving model to best_nn_model.h5
Epoch 91/10000
Epoch 91: val_loss improved from 0.66796 to 0.66776, saving model to best_nn_model.h5
Epoch 92/10000
Epoch 92: val_loss improved from 0.66776 to 0.66706, saving model to best_nn_model.h5
Epoch 93/10000
Epoch 93: val_loss did not

Epoch 107: val_loss improved from 0.66510 to 0.66481, saving model to best_nn_model.h5
Epoch 108/10000
Epoch 108: val_loss improved from 0.66481 to 0.66406, saving model to best_nn_model.h5
Epoch 109/10000
Epoch 109: val_loss improved from 0.66406 to 0.66381, saving model to best_nn_model.h5
Epoch 110/10000
Epoch 110: val_loss did not improve from 0.66381
Epoch 111/10000
Epoch 111: val_loss improved from 0.66381 to 0.66381, saving model to best_nn_model.h5
Epoch 112/10000
Epoch 112: val_loss did not improve from 0.66381
Epoch 113/10000
Epoch 113: val_loss improved from 0.66381 to 0.66327, saving model to best_nn_model.h5
Epoch 114/10000
Epoch 114: val_loss did not improve from 0.66327
Epoch 115/10000
Epoch 115: val_loss did not improve from 0.66327
Epoch 116/10000
Epoch 116: val_loss did not improve from 0.66327
Epoch 117/10000
Epoch 117: val_loss did not improve from 0.66327
Epoch 118/10000
Epoch 118: val_loss improved from 0.66327 to 0.66255, saving model to best_nn_model.h5
Epoch 11

Epoch 133/10000
Epoch 133: val_loss improved from 0.66060 to 0.66055, saving model to best_nn_model.h5
Epoch 134/10000
Epoch 134: val_loss improved from 0.66055 to 0.66054, saving model to best_nn_model.h5
Epoch 135/10000
Epoch 135: val_loss did not improve from 0.66054
Epoch 136/10000
Epoch 136: val_loss improved from 0.66054 to 0.66052, saving model to best_nn_model.h5
Epoch 137/10000
Epoch 137: val_loss did not improve from 0.66052
Epoch 138/10000
Epoch 138: val_loss improved from 0.66052 to 0.66008, saving model to best_nn_model.h5
Epoch 139/10000
Epoch 139: val_loss did not improve from 0.66008
Epoch 140/10000
Epoch 140: val_loss did not improve from 0.66008
Epoch 141/10000
Epoch 141: val_loss improved from 0.66008 to 0.65981, saving model to best_nn_model.h5
Epoch 142/10000
Epoch 142: val_loss did not improve from 0.65981
Epoch 143/10000
Epoch 143: val_loss did not improve from 0.65981
Epoch 144/10000
Epoch 144: val_loss improved from 0.65981 to 0.65964, saving model to best_nn_m

Epoch 159/10000
Epoch 159: val_loss did not improve from 0.65774
Epoch 160/10000
Epoch 160: val_loss did not improve from 0.65774
Epoch 161/10000
Epoch 161: val_loss did not improve from 0.65774
Epoch 162/10000
Epoch 162: val_loss did not improve from 0.65774
Epoch 163/10000
Epoch 163: val_loss did not improve from 0.65774
Epoch 164/10000
Epoch 164: val_loss did not improve from 0.65774
Epoch 165/10000
Epoch 165: val_loss improved from 0.65774 to 0.65766, saving model to best_nn_model.h5
Epoch 166/10000
Epoch 166: val_loss did not improve from 0.65766
Epoch 167/10000
Epoch 167: val_loss improved from 0.65766 to 0.65736, saving model to best_nn_model.h5
Epoch 168/10000
Epoch 168: val_loss improved from 0.65736 to 0.65712, saving model to best_nn_model.h5
Epoch 169/10000
Epoch 169: val_loss improved from 0.65712 to 0.65695, saving model to best_nn_model.h5
Epoch 170/10000
Epoch 170: val_loss improved from 0.65695 to 0.65678, saving model to best_nn_model.h5
Epoch 171/10000
Epoch 171: val

Epoch 185: val_loss did not improve from 0.65505
Epoch 186/10000
Epoch 186: val_loss did not improve from 0.65505
Epoch 187/10000
Epoch 187: val_loss did not improve from 0.65505
Epoch 188/10000
Epoch 188: val_loss improved from 0.65505 to 0.65503, saving model to best_nn_model.h5
Epoch 189/10000
Epoch 189: val_loss improved from 0.65503 to 0.65437, saving model to best_nn_model.h5
Epoch 190/10000
Epoch 190: val_loss improved from 0.65437 to 0.65394, saving model to best_nn_model.h5
Epoch 191/10000
Epoch 191: val_loss improved from 0.65394 to 0.65371, saving model to best_nn_model.h5
Epoch 192/10000
Epoch 192: val_loss did not improve from 0.65371
Epoch 193/10000
Epoch 193: val_loss did not improve from 0.65371
Epoch 194/10000
Epoch 194: val_loss did not improve from 0.65371
Epoch 195/10000
Epoch 195: val_loss improved from 0.65371 to 0.65367, saving model to best_nn_model.h5
Epoch 196/10000
Epoch 196: val_loss improved from 0.65367 to 0.65330, saving model to best_nn_model.h5
Epoch 19

Epoch 211/10000
Epoch 211: val_loss did not improve from 0.65163
Epoch 212/10000
Epoch 212: val_loss did not improve from 0.65163
Epoch 213/10000
Epoch 213: val_loss did not improve from 0.65163
Epoch 214/10000
Epoch 214: val_loss did not improve from 0.65163
Epoch 215/10000
Epoch 215: val_loss did not improve from 0.65163
Epoch 216/10000
Epoch 216: val_loss did not improve from 0.65163
Epoch 217/10000
Epoch 217: val_loss did not improve from 0.65163
Epoch 218/10000
Epoch 218: val_loss did not improve from 0.65163
Epoch 219/10000
Epoch 219: val_loss did not improve from 0.65163
Epoch 220/10000
Epoch 220: val_loss did not improve from 0.65163
Epoch 221/10000
Epoch 221: val_loss did not improve from 0.65163
Epoch 222/10000
Epoch 222: val_loss improved from 0.65163 to 0.65138, saving model to best_nn_model.h5
Epoch 223/10000
Epoch 223: val_loss improved from 0.65138 to 0.65122, saving model to best_nn_model.h5
Epoch 224/10000
Epoch 224: val_loss improved from 0.65122 to 0.65093, saving mo

Epoch 238/10000
Epoch 238: val_loss did not improve from 0.65000
Epoch 239/10000
Epoch 239: val_loss did not improve from 0.65000
Epoch 240/10000
Epoch 240: val_loss did not improve from 0.65000
Epoch 241/10000
Epoch 241: val_loss did not improve from 0.65000
Epoch 242/10000
Epoch 242: val_loss did not improve from 0.65000
Epoch 243/10000
Epoch 243: val_loss did not improve from 0.65000
Epoch 244/10000
Epoch 244: val_loss did not improve from 0.65000
Epoch 245/10000
Epoch 245: val_loss did not improve from 0.65000
Epoch 246/10000
Epoch 246: val_loss did not improve from 0.65000
Epoch 247/10000
Epoch 247: val_loss did not improve from 0.65000
Epoch 248/10000
Epoch 248: val_loss did not improve from 0.65000
Epoch 249/10000
Epoch 249: val_loss did not improve from 0.65000
Epoch 250/10000
Epoch 250: val_loss did not improve from 0.65000
Epoch 251/10000
Epoch 251: val_loss did not improve from 0.65000
Epoch 252/10000
Epoch 252: val_loss did not improve from 0.65000
Epoch 253/10000
Epoch 253

Epoch 266/10000
Epoch 266: val_loss did not improve from 0.65000
Epoch 267/10000
Epoch 267: val_loss did not improve from 0.65000
Epoch 268/10000
Epoch 268: val_loss did not improve from 0.65000
Epoch 269/10000
Epoch 269: val_loss did not improve from 0.65000
Epoch 270/10000
Epoch 270: val_loss did not improve from 0.65000
Epoch 271/10000
Epoch 271: val_loss did not improve from 0.65000
Epoch 272/10000
Epoch 272: val_loss did not improve from 0.65000
Epoch 273/10000
Epoch 273: val_loss did not improve from 0.65000
Epoch 274/10000
Epoch 274: val_loss did not improve from 0.65000
Epoch 275/10000
Epoch 275: val_loss did not improve from 0.65000
Epoch 276/10000
Epoch 276: val_loss did not improve from 0.65000
Epoch 277/10000
Epoch 277: val_loss did not improve from 0.65000
Epoch 278/10000
Epoch 278: val_loss did not improve from 0.65000
Epoch 279/10000
Epoch 279: val_loss did not improve from 0.65000
Epoch 280/10000
Epoch 280: val_loss did not improve from 0.65000
Epoch 281/10000
Epoch 281

Epoch 294/10000
Epoch 294: val_loss did not improve from 0.65000
Epoch 295/10000
Epoch 295: val_loss did not improve from 0.65000
Epoch 296/10000
Epoch 296: val_loss did not improve from 0.65000
Epoch 297/10000
Epoch 297: val_loss did not improve from 0.65000
Epoch 298/10000
Epoch 298: val_loss did not improve from 0.65000
Epoch 299/10000
Epoch 299: val_loss did not improve from 0.65000
Epoch 300/10000
Epoch 300: val_loss did not improve from 0.65000
Epoch 301/10000
Epoch 301: val_loss did not improve from 0.65000
Epoch 302/10000
Epoch 302: val_loss did not improve from 0.65000
Epoch 303/10000
Epoch 303: val_loss did not improve from 0.65000
Epoch 304/10000
Epoch 304: val_loss did not improve from 0.65000
Epoch 305/10000
Epoch 305: val_loss did not improve from 0.65000
Epoch 306/10000
Epoch 306: val_loss did not improve from 0.65000
Epoch 307/10000
Epoch 307: val_loss did not improve from 0.65000
Epoch 308/10000
Epoch 308: val_loss did not improve from 0.65000
Epoch 309/10000
Epoch 309

Epoch 322/10000
Epoch 322: val_loss did not improve from 0.65000
Epoch 323/10000
Epoch 323: val_loss did not improve from 0.65000
Epoch 324/10000
Epoch 324: val_loss did not improve from 0.65000
Epoch 325/10000
Epoch 325: val_loss did not improve from 0.65000
Epoch 326/10000
Epoch 326: val_loss did not improve from 0.65000
Epoch 327/10000
Epoch 327: val_loss did not improve from 0.65000
Epoch 328/10000
Epoch 328: val_loss did not improve from 0.65000
Epoch 329/10000
Epoch 329: val_loss did not improve from 0.65000
Epoch 330/10000
Epoch 330: val_loss did not improve from 0.65000
Epoch 331/10000
Epoch 331: val_loss did not improve from 0.65000
Loading best neural network model...
Initializing classifiers...
Training StackingClassifier on the entire training data...
Saving StackingClassifier model...
Loading test data...
Test data loaded.
Normalizing test data...
Test data normalized.
Making predictions with the StackingClassifier model...
Saving predictions...
Predictions saved to predic