In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

encoder = LabelEncoder()
cell_types_encoded = encoder.fit_transform(cell_types)
cell_types_categorical = to_categorical(cell_types_encoded)


In [None]:
from sklearn.model_selection import KFold
import numpy as np

n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
scores = []


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GaussianNoise, Dropout, Concatenate

emb_p0 = {"GaussianNoise": 0.1}
emb_p1 = {"Dense": 256}
emb_p2 = {"Dropout": 0.3}
emb_p3 = {"Dense": 128} 


In [None]:
compliers = {"optimizer": "adam", "loss": "categorical_crossentropy", "metrics": ["accuracy"]}


In [None]:
for fold, (train_index, test_index) in enumerate(kf.split(embeddings), start=1):
    # Split data into training and testing sets for this fold
    X_train_emb, X_test_emb = embeddings[train_index], embeddings[test_index]
    y_train, y_test = cell_types_categorical[train_index], cell_types_categorical[test_index]

    # Define model inputs
    input_emb = Input(shape=(X_train_emb.shape[1],), name='input_emb')

    # Build pathway for embedding inputs
    noisy_emb = GaussianNoise(emb_p0["GaussianNoise"])(input_emb)
    pathway_emb = Dense(emb_p1["Dense"], activation='relu')(noisy_emb)
    pathway_emb = Dropout(emb_p2["Dropout"])(pathway_emb)
    pathway_emb = Dense(emb_p3["Dense"], activation='relu')(pathway_emb)
    
    # Define output layer
    output = Dense(cell_types_categorical.shape[1], activation='softmax')(pathway_emb)

    # Compile model
    model = Model(inputs=input_emb, outputs=output)
    model.compile(optimizer=compliers['optimizer'], loss=compliers['loss'], metrics=compliers['metrics'])

    if fold == 1:
        # Only print the model summary for the first fold
        model.summary()

    print(f"\nTraining on fold {fold}...")
    model.fit(X_train_emb, y_train, validation_split=0.2, epochs=30, batch_size=32, verbose=1)

    # Step 4: Evaluate the model
    loss, accuracy = model.evaluate(X_test_emb, y_test, verbose=1)
    print(f"Fold {fold} Test Accuracy: {accuracy}\n")
    scores.append(accuracy)


In [None]:
average_score = np.mean(scores)
print(f"\nAverage Test Accuracy across {n_splits} folds: {average_score}")


In [None]:
nn_configurations = {
    "emb_path": {
        "emb_p0": {"GaussianNoise": 0.1},
        "emb_p1": {"Dense": 256, "activation": "relu"},
        "emb_p2": {"Dropout": 0.3},
        "emb_p3": {"Dense": 128, "activation": "relu"}
    },
    "compliers": {
        "optimizer": "adam", 
        "loss": "categorical_crossentropy", 
        "metrics": ["accuracy"]
    }
}

def config_to_str(config_dict):
    config_str_list = []
    for path, configs in config_dict.items():
        for key, value in configs.items():
            config_str_list.append(f"{key}: {value}")
    return ', '.join(config_str_list)

config_str = config_to_str(nn_configurations)


In [None]:
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

predictions = model.predict(X_test_emb)
y_pred = np.argmax(predictions, axis=1)
y_true = np.argmax(y_test, axis=1)

report = classification_report(y_true, y_pred, output_dict=True)
statistics_NeNe = pd.DataFrame(report).transpose()


In [None]:
applications_label = f"{subject_outlier}_{subject_autoencoder}_{subject_normalization}_{subject_dimension}"
model_label = f"{subject_outlier}_{subject_autoencoder}_{subject_normalization}_{subject_dimension}"

statistics_NeNe['Parameters'] = config_str
statistics_NeNe['Applications'] = model_label
statistics_NeNe['Applications_Condition'] = f"{subject_label}_NeNe"


In [None]:
statistics_NeNe_DF = pd.DataFrame({
    "Applications": [applications_label],
    "Applications_Condition": [f"{subject_label}"],
    "Model": ["NeNe"],


    "Parameters": [config_str],
    "Accuracy": [average_score],
    "Precision": [precision_score(y_true, y_pred, average='weighted', zero_division=0)],
    "Recall": [recall_score(y_true, y_pred, average='weighted', zero_division=0)],
    "F1": [f1_score(y_true, y_pred, average='weighted', zero_division=0)]
})


In [None]:
Subject_Process_Dict = {
    "Model": model,
    "Predictions": y_pred,
    "Statistics": statistics_NeNe,
    "Statistics_DF": statistics_NeNe_DF
}

print("operationnn.ipynb has finished")
