In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU, Input, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from scipy.sparse import csr_matrix

# Load datasets and initialize models
for n in [24]:
    # Load dataset
    import os
    print("Current Directory:", os.getcwd())
    Xs = np.load(f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/kryptonite-{n}-X.npy")
    Ys = np.load(f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/kryptonite-{n}-y.npy")
    df_x = pd.DataFrame(Xs)
    df_y = pd.Series(Ys)  # Use Series if Ys is 1D
    # Apply Gaussian Mixture Model to capture bimodality
    gmm = GaussianMixture(n_components=2, random_state=42)
    gmm.fit(df_x)
    df_x_gmm_proba = gmm.predict_proba(df_x)
    df_x_transformed = pd.DataFrame(df_x_gmm_proba, columns=['Mode_1_Prob', 'Mode_2_Prob'])

    # Add GMM probabilities to the original data
    df_x_combined = pd.concat([(df_x >= 0.5).astype(int), df_x_transformed], axis=1)

    # Sparse representation for high dimensionality
    X_sparse = csr_matrix(df_x_combined.values)

    # Standardize the continuous features for improved learning
    scaler = StandardScaler(with_mean=False)  # with_mean=False due to sparse matrix
    X_scaled = scaler.fit_transform(X_sparse)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, df_y, test_size=0.2, random_state=42)

    # Build the neural network model
    input_layer = Input(shape=(X_train.shape[1],), sparse=True)
    x = Dense(128)(input_layer)
    x = LeakyReLU(alpha=0.1)(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    x = Dense(64)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    x = Dense(32)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = BatchNormalization()(x)

    output_layer = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=input_layer, outputs=output_layer)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    # Set up early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=1000,
        batch_size=32,
        validation_split=0.1, 
        verbose=1,
    )

    # Make predictions and evaluate
    y_pred_nn = (model.predict(X_test) > 0.5).astype(int)
    accuracy_nn = accuracy_score(y_test, y_pred_nn)
    print(f"Accuracy of the Neural Network Classifier on test set for n = {n}: {accuracy_nn:.4f}")
    print(classification_report(y_test, y_pred_nn))
