In [1]:
import Models
import numpy as np
import random as rn
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

rn.seed(1)
np.random.seed(1)
tf.random.set_seed(1)
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)

# Load Data
X_renal_data = pd.read_csv('./x_exp_renal.csv', sep='\t') # Dataset has Donor ID as first column
y_renal_data = pd.read_csv('./y_renal.csv', sep=',') # Dataset has Donor ID on first column and Label on second column.

X_train_norm, X_train_swapped, X_test_norm, y_train, y_test, y_train_oh, y_test_oh, X_train_first, X_train_second, X_swapped_first, X_swapped_second, X_test_first, X_test_second \
    = Models.prepare_datasets(X_renal_data.iloc[:,1:],y_renal_data["label"], test_size=0.2, swap_noise=0.15)


swapping: 65 rows.


In [None]:
import importlib
importlib.reload(Models)

In [None]:
###  Hyperparamter tuning
# MAP y_train from classes -1 and 1 to 0 and 1
y_train_map = ((y_train + 1)/2).astype(int)

### VISUALIZATION WITH PCA
## ORIGINAL DATASET
X_latent_pca, X_latent_test_pca = Models.perform_PCA(X_train_norm, X_test_norm, y_train, y_test, n_components=10)

mu=0.5
dropouts = [0,0.1,0.2,0.3]
l1s = [0.000005]
l2s = [0.000005,0.000001,0.00001]
scores = []
for dropout in dropouts:
    for l1 in l1s:
        for l2 in l2s:
            ## Build and Train Autoencoder
            autoencoder, encoder, decoder, loss = Models.build_and_train_multi_autoencoder([X_swapped_first,X_swapped_second],
                                                                          [X_train_first,X_train_second],
                                                                          encoding_dim=50, 
                                                                          regularizer=tf.keras.regularizers.l1_l2(l1,l2),
                                                                          dropout=dropout,
                                                                          epochs=200,
                                                                          mu=mu)
            ## Encode datasets
            X_latent_ae = Models.encode_dataset([X_train_first,X_train_second], encoder)
            X_latent_test_ae = Models.encode_dataset([X_test_first,X_test_second], encoder)
         
            ## PCA ON AUTOENCODER LATENT SPACE
            X_latent_pca_ae, X_latent_test_pca_ae = Models.perform_PCA(X_latent_ae, X_latent_test_ae, y_train, y_test, n_components=10)

            ### CLASSIFICATION ###
            # We use the reduced dataset to train a classifier and compare it against the same classifier trained with the original dataset.
            lr_accuracy, svm_accuracy, rf_accuracy = Models.classify(X_latent_ae, X_latent_test_ae, y_train, y_test, model_type="AE")

            ### CLUSTERING ###
            silhouette_kmeans, mutual_info_kmeans, silhouette_spectral, mutual_info_spectral, silhouette_hierarchical, mutual_info_hierarchical = Models.cluster(X_latent_ae,y_train_map, model_type="AE")

            scores.append((dropout,l1,l2,loss,lr_accuracy,svm_accuracy,rf_accuracy, lr_auc, svm_auc, rf_auc,silhouette_kmeans, mutual_info_kmeans, silhouette_spectral, mutual_info_spectral, silhouette_hierarchical, mutual_info_hierarchical))


In [None]:
scores = pd.DataFrame(scores,
                      columns=["dropout","l1","l2","loss","LR_accuracy","SVM_accuracy", "RF_accuracy", "LR_auc", "SVM_auc", "RF_auc","kmeans_silhouette","kmeans_mutual_info","spectral_silhouette","spectral_mutual_info","hierarchical_silhouette","hierarchical_mutual_info"])    
scores

Results

| Encoding dim | l1      | l2      | Dropout | Loss   | Accuracy (LR) | Accuracy (SVM) | Accuracy (RF) | ROC-AUC (LR) | ROC-AUC (SVM) | ROC-AUC (RF) | SS (K-means) | MI (K-means) | SS (Spectral) | MI (Spectral) | SS (Hierarch.) | MI (Hierarch.) |
|--------------|---------|---------|---------|--------|---------------|----------------|---------------|--------------|---------------|--------------|--------------|--------------|---------------|---------------|----------------|----------------|
| 50           | 0.000005| 0.000005| 0.00    | 0.0100 |  0.95         | 0.92           | 0.95          | 0.46         | 0.46          | 0.46         | 0.46         | 0.3263       | 0.51          | 0.0047        | 0.46           | 0.4273         |
