In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU, Input, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.mixture import GaussianMixture

# Load datasets and initialize models
for n in [24]:
    # Load dataset
    import os
    print("Current Directory:", os.getcwd())
    Xs = np.load(f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/kryptonite-{n}-X.npy")
    Ys = np.load(f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/kryptonite-{n}-y.npy")
    df_x = pd.DataFrame(Xs)
    df_y = pd.Series(Ys)  # Use Series if Ys is 1D
    # Apply Gaussian Mixture Model to capture bimodality
    gmm = GaussianMixture(n_components=2, random_state=42)
    gmm.fit(df_x)
    df_x_gmm_proba = gmm.predict_proba(df_x)
    df_x_transformed = pd.DataFrame(df_x_gmm_proba, columns=['Mode_1_Prob', 'Mode_2_Prob'])

    # Add GMM probabilities to the original data
    df_x_combined = pd.concat([(df_x >= 0.5).astype(int), df_x_transformed], axis=1)
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(df_x_combined, df_y, test_size=0.2, random_state=42)

    # Build the neural network model
    model = Sequential([
        Input(shape=(X_train.shape[1],), sparse=True),
        Dense(128),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        
        Dense(64),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        
        Dense(32),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        
        Dense(1, activation='sigmoid')
    ])

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    # Set up early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=150,
        batch_size=32,
        validation_split=0.1, 
        verbose=1,
    )

    # Make predictions and evaluate
    y_pred_nn = (model.predict(X_test) > 0.5).astype(int)
    accuracy_nn = accuracy_score(y_test, y_pred_nn)
    print(f"Accuracy of the Neural Network Classifier on test set for n = {n}: {accuracy_nn:.4f}")
    print(classification_report(y_test, y_pred_nn))


Current Directory: \\wsl.localhost\Ubuntu\home\moritz\maths-for-ml
Epoch 1/150




[1m1080/1080[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.4992 - loss: 0.7394 - val_accuracy: 0.5023 - val_loss: 0.7017
Epoch 2/150
[1m1080/1080[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 887us/step - accuracy: 0.5253 - loss: 0.6943 - val_accuracy: 0.4885 - val_loss: 0.6988
Epoch 3/150
[1m1080/1080[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 915us/step - accuracy: 0.5261 - loss: 0.6921 - val_accuracy: 0.5096 - val_loss: 0.6967
Epoch 4/150
[1m1080/1080[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 880us/step - accuracy: 0.5354 - loss: 0.6902 - val_accuracy: 0.5060 - val_loss: 0.6986
Epoch 5/150
[1m1080/1080[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 877us/step - accuracy: 0.5375 - loss: 0.6882 - val_accuracy: 0.5047 - val_loss: 0.6982
Epoch 6/150
[1m1080/1080[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 879us/step - accuracy: 0.5414 - loss: 0.6881 - val_accuracy: 0.4997 - val_loss: 0.6982
Epoch 7/150
