# N=30
Starting with Moritz code

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU, Input, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback, EarlyStopping
from sklearn.mixture import GaussianMixture
from tensorflow.keras.regularizers import l2

### Data Processing

In [2]:
n = 30

Xs = np.load(f"Datasets/kryptonite-{n}-X.npy")
Ys = np.load(f"Datasets/kryptonite-{n}-y.npy")
df_x = pd.DataFrame(Xs)
df_y = pd.Series(Ys)  # Use Series if Ys is 1D

In [3]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)

print(X_train.shape)
print(y_train.shape)

(48000, 30)
(48000,)


### Create new GMM feature for each existing feature

In [4]:
def featureEngineering(X):
    # Apply Gaussian Mixture Model to each feature individually
    x_transformed = pd.DataFrame()
    
    for column in X.columns:
        gmm = GaussianMixture(n_components=2, random_state=42)
        gmm.fit(X[[column]])
        proba = gmm.predict_proba(X[[column]])
        x_transformed[f'{column}_Mode_Prob'] = np.where(proba[:, 0] > proba[:, 1], -proba[:, 0], proba[:, 1])
    
    x_combined = pd.concat([(X >= 0.5).astype(int), x_transformed], axis=1)
    return x_combined

x_train_combined = featureEngineering(X_train)
x_train_combined.shape

(57564, 60)

### Build and Train NN

In [11]:
# Build the neural network model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    
    Dense(256),
    LeakyReLU(),
    BatchNormalization(),
    
    Dense(128),
    LeakyReLU(),
    BatchNormalization(),
    
    Dense(64),
    LeakyReLU(),
    BatchNormalization(),
    
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Early Stopping
ESCallback = EarlyStopping(monitor='loss', patience=3)

In [13]:
# Train the model
history = model.fit(
    x_train_combined, y_train,
    epochs=150,
    batch_size=32,
    validation_split=0.1, 
    verbose=1,
    callbacks=[ESCallback]
)

(60000, 30)
(48000,)


ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 54000
'y' sizes: 48000


### Make Predictions and Evaluate

In [None]:
# Make predictions and evaluate
y_pred_nn = (model.predict(X_test) > 0.5).astype(int)
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Accuracy of the Neural Network Classifier on test set for n = {n}: {accuracy_nn:.4f}")
print(classification_report(y_test, y_pred_nn))