In [2]:
import sys
import os
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
sys.path.append(os.path.abspath(".."))  #TODO: MAKE THE SRC PACKAGE WORK
from src.training.new_optimised_train import train_autoencoder, train_cellfate
from src.evaluation.evaluate import *
from src.training.loss_functions import *
from src.preprocessing.preprocessing_functions import *
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from src.models import Encoder, Decoder, Discriminator, mlp_classifier, complex_mlp_classifier
from src.utils import *
from tensorflow.keras import layers, Sequential
import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight

In [5]:
test_labels = np.load('../data/labels/test_labels.npy')
train_tracks = np.load("../data/tracks/train_tracks.npy")
train_labels = np.load("../data/labels/train_labels.npy")
test_tracks = np.load("../data/tracks/test_tracks.npy")

train_tracks_features = train_tracks[:, 0, 4:17] 
test_tracks_features = test_tracks[:, 0, 4:17]

In [6]:
train_tracks_features.shape, test_tracks_features.shape

((1108, 13), (277, 13))

In [9]:
import numpy as np
import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools


# Define config
config = {
    'batch_size': 30,
    'epochs': 50,
    'learning_rate': 0.001,
    'seed': 42,
    'n_feature_pairs': 30  # number of random feature pairs to test
}

# Set seed
tf.keras.utils.set_random_seed(config['seed'])
np.random.seed(config['seed'])


# Initialize container for confusion matrices
conf_matrix_tabular = np.zeros((config['n_feature_pairs'], 2, 2))

# Loop over N random feature combinations
for i in range(config['n_feature_pairs']):
    # Pick 2 random features
    selected_features = np.random.choice(13, size=2, replace=False)
    print(f"Run {i+1} — Using features: {selected_features}")

    # Subset the data
    X_train = train_tracks_features[:, selected_features]
    X_test = test_tracks_features[:, selected_features]

    # Train/val split
    X_val, X_test_final, y_val, y_test_final = train_test_split(X_test, test_labels, test_size=0.5, random_state=42)

    # Class weights
    class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
    class_weights = dict(enumerate(class_weights))

    # Build model
    classifier = complex_mlp_classifier(latent_dim=2)
    classifier.compile(loss='sparse_categorical_crossentropy',
                       optimizer=tf.keras.optimizers.Adam(learning_rate=config['learning_rate']),
                       metrics=['accuracy'])

    # Train
    classifier.fit(X_train, train_labels,
                   batch_size=config['batch_size'],
                   epochs=config['epochs'],
                   validation_data=(X_val, y_val),
                   class_weight=class_weights,
                   verbose=0)

    # Predict
    y_pred = classifier.predict(X_test_final, verbose=0)
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Confusion Matrix
    cm = confusion_matrix(y_test_final, y_pred_classes)
    cm_normalized = cm / cm.sum(axis=1, keepdims=True)

    conf_matrix_tabular[i] = cm_normalized

# Save results
np.save("confusion_matrices_random_feature_selection.npy", conf_matrix_tabular)
print(f"\nSaved confusion matrices for {config['n_feature_pairs']} random feature combinations.")


Run 1 — Using features: [11  9]
Run 2 — Using features: [ 8 10]
Run 3 — Using features: [12 10]
Run 4 — Using features: [9 1]
Run 5 — Using features: [0 5]
Run 6 — Using features: [8 9]
Run 7 — Using features: [4 2]
Run 8 — Using features: [11  2]
Run 9 — Using features: [5 1]
Run 10 — Using features: [11  9]
Run 11 — Using features: [ 1 10]
Run 12 — Using features: [10  1]
Run 13 — Using features: [1 8]
Run 14 — Using features: [7 9]
Run 15 — Using features: [1 2]
Run 16 — Using features: [11  1]
Run 17 — Using features: [2 4]
Run 18 — Using features: [9 5]
Run 19 — Using features: [8 7]
Run 20 — Using features: [9 8]
Run 21 — Using features: [11  1]
Run 22 — Using features: [5 1]
Run 23 — Using features: [11  4]
Run 24 — Using features: [11  6]
Run 25 — Using features: [4 1]
Run 26 — Using features: [4 1]
Run 27 — Using features: [ 9 10]
Run 28 — Using features: [5 9]
Run 29 — Using features: [8 3]
Run 30 — Using features: [7 5]

Saved confusion matrices for 30 random feature combina

In [10]:
conf_matrix_tabular

array([[[0.5045045 , 0.4954955 ],
        [0.21428571, 0.78571429]],

       [[0.57657658, 0.42342342],
        [0.42857143, 0.57142857]],

       [[0.77477477, 0.22522523],
        [0.67857143, 0.32142857]],

       [[0.53153153, 0.46846847],
        [0.28571429, 0.71428571]],

       [[0.51351351, 0.48648649],
        [0.28571429, 0.71428571]],

       [[0.54054054, 0.45945946],
        [0.21428571, 0.78571429]],

       [[0.68468468, 0.31531532],
        [0.67857143, 0.32142857]],

       [[0.51351351, 0.48648649],
        [0.32142857, 0.67857143]],

       [[0.57657658, 0.42342342],
        [0.39285714, 0.60714286]],

       [[0.5045045 , 0.4954955 ],
        [0.21428571, 0.78571429]],

       [[0.73873874, 0.26126126],
        [0.82142857, 0.17857143]],

       [[0.59459459, 0.40540541],
        [0.64285714, 0.35714286]],

       [[0.56756757, 0.43243243],
        [0.28571429, 0.71428571]],

       [[0.47747748, 0.52252252],
        [0.28571429, 0.71428571]],

       [[0.47747748,