In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU, Input, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback
from sklearn.mixture import GaussianMixture
from tensorflow.keras.regularizers import l2

# Load datasets and initialize models
for n in [30]:
    # Load dataset
    import os
    print("Current Directory:", os.getcwd())
    
    Xs = np.load(f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/additional-kryptonite-{n}-X.npy")
    Ys = np.load(f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/additional-kryptonite-{n}-y.npy")
    df_x = pd.DataFrame(Xs)
    df_y = pd.Series(Ys)
    df_x_transformed = pd.DataFrame()

    X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
    
    X_train_transformed = pd.DataFrame()
    X_test_transformed = pd.DataFrame()

    print(X_train.shape)
    print(X_test.shape)

    # 
    columns_to_exclude = [0, 6, 7, 12, 13, 14, 17, 19, 20, 24]
    for column in df_x.columns:
        if column not in columns_to_exclude:
            gmm = GaussianMixture(n_components=2, random_state=42)
            gmm.fit(X_train[[column]])
            proba = gmm.predict_proba(X_train[[column]])
            X_train_transformed[f'{column}_Mode_Prob'] = np.where(proba[:, 0] > proba[:, 1], -proba[:, 0], proba[:, 1])
            proba = gmm.predict_proba(X_test[[column]])
            X_test_transformed[f'{column}_Mode_Prob'] = np.where(proba[:, 0] > proba[:, 1], -proba[:, 0], proba[:, 1])
    
    print((X_train >= 0.5).astype(int).shape, X_train_transformed.shape)
    print((X_test >= 0.5).astype(int).shape, X_test_transformed.shape)
    X_train = pd.concat([(X_train >= 0.5).astype(int).reset_index(drop=True), X_train_transformed.reset_index(drop=True)], axis=1)
    X_test = pd.concat([(X_test >= 0.5).astype(int).reset_index(drop=True), X_test_transformed.reset_index(drop=True)], axis=1)

    print(X_train.shape)
    print(X_test.shape)
    # Split data
    
    # Build the neural network model
   

    model = Sequential([
    Input(shape=(X_train.shape[1],)),

    Dense(512),
    LeakyReLU(),
    BatchNormalization(),
    Dropout(0.2),

    Dense(256),
    LeakyReLU(),
    BatchNormalization(),
    Dropout(0.2),

    Dense(128),
    LeakyReLU(),
    BatchNormalization(),
    Dropout(0.2),

    
    Dense(64),
    LeakyReLU(),
    BatchNormalization(),
    Dropout(0.1),

    
    Dense(32),
    LeakyReLU(),
    BatchNormalization(),

    Dense(1, activation='sigmoid')
])


    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    # Define a callback to stop training when accuracy reaches 95%
    class EarlyStoppingByAccuracy(Callback):
        def on_epoch_end(self, epoch, logs=None):
            if logs.get('accuracy') >= 0.94:
                print("\nReached 94% accuracy, stopping training!")
                self.model.stop_training = True

    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=500,
        batch_size=32,
        validation_split=0.1, 
        verbose=1,
        callbacks=[EarlyStoppingByAccuracy()]
    )

    # Make predictions and evaluate
    y_pred_nn = (model.predict(X_test) > 0.5).astype(int)
    accuracy_nn = accuracy_score(y_test, y_pred_nn)
    print(f"Accuracy of the Neural Network Classifier on test set for n = {n}: {accuracy_nn:.4f}")
    print(classification_report(y_test, y_pred_nn))


Current Directory: \\wsl.localhost\Ubuntu\home\moritz\maths-for-ml
(200000, 30)
(50000, 30)
(200000, 30) (200000, 20)
(50000, 30) (50000, 20)
(200000, 50)
(50000, 50)
Epoch 1/500
[1m5625/5625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 7ms/step - accuracy: 0.4993 - loss: 0.7071 - val_accuracy: 0.5024 - val_loss: 0.6943
Epoch 2/500
[1m5625/5625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 5ms/step - accuracy: 0.5003 - loss: 0.6945 - val_accuracy: 0.5038 - val_loss: 0.6937
Epoch 3/500
[1m5625/5625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.4987 - loss: 0.6940 - val_accuracy: 0.4929 - val_loss: 0.6936
Epoch 4/500
[1m5625/5625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.5031 - loss: 0.6940 - val_accuracy: 0.5090 - val_loss: 0.6933
Epoch 5/500
[1m5625/5625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.4956 - loss: 0.6941 - val_accuracy: 0.5048 - val_loss: 0.6932
Epo

In [4]:
# Save the model as moritz-n24
model.save('moritz-n30.keras')


In [8]:
# Load the hidden kryptonite 30 dataset from a .npy file
hidden_X = np.load(f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/additional-kryptonite-{n}-X.npy")
df_hidden_x = pd.DataFrame(Xs)
hidden_X_transformed = pd.DataFrame()
for column in df_x.columns:
        if column not in columns_to_exclude:
            gmm = GaussianMixture(n_components=2, random_state=42)
            gmm.fit(X_train[[column]])
            proba = gmm.predict_proba(X_train[[column]])
            X_train_transformed[f'{column}_Mode_Prob'] = np.where(proba[:, 0] > proba[:, 1], -proba[:, 0], proba[:, 1])
            proba = gmm.predict_proba(X_test[[column]])
            hidden_X_transformed[f'{column}_Mode_Prob'] = np.where(proba[:, 0] > proba[:, 1], -proba[:, 0], proba[:, 1])
# Make predictions on the hidden kryptonite 30 dataset
X_hidden = pd.concat([(df_hidden_x >= 0.5).astype(int).reset_index(drop=True), hidden_X_transformed.reset_index(drop=True)], axis=1)

y_pred_hidden_kryptonite_30 = (model.predict(X_hidden) > 0.5).astype(int)


np.save('f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/predicted_y_hidden_kryptonite_30.npy', y_pred_hidden_kryptonite_30)






[1m7813/7813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 707us/step


FileNotFoundError: [Errno 2] No such file or directory: 'f"/home/moritz/maths-for-ml/Kryptonite-N/Datasets/predicted_y_hidden_kryptonite_30.npy'

In [11]:
np.save('/home/moritz/maths-for-ml/Kryptonite-N/Datasets/predicted_y_hidden_kryptonite_30.npy', y_pred_hidden_kryptonite_30)