In [None]:
from ctc_utils.functions import def_gen_expr_for_datasets, def_labels_for_datasets, load_data
from sklearn.feature_selection import VarianceThreshold
from classifiers.DTC import dtc
from classifiers.LogisticRegression import logreg
from classifiers.LinearSVC import lsvc
from classifiers.kNN import knn
from classifiers.MLP import mlp
from classifiers.NB import gnb
from classifiers.SDGC import sdg
from classifiers.StackedClassifiers import stackedClassifier
from classifiers.hierarchical_classifier import hierarchical_classifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np



In [None]:
# Load data
cell_names, gene_names, labels, gene_expr_bin = load_data()

# Gene expression datasts
# [ge_10xv2, ge_SM2, ge_10xv3, ge_CL, ge_DR, ge_iD, ge_SW, ge_10xv2_2]
gene_expression_datasets = def_gen_expr_for_datasets(gene_expr_bin)

# Labels per dataset
# [lb_10xv2, lb_SM2, lb_10xv3, lb_CL, lb_DR, lb_iD, lb_SW, lb_10xv2_2]
labels_for_datasets = def_labels_for_datasets(labels)

acc_matrix = np.zeros(shape = (8,9))

all_matrix = np.zeros(shape = (8,8,9))

In [None]:
for i, train_dataset in enumerate(gene_expression_datasets):
    print("outer loop :", i)

    train_dataset_no_fs = train_dataset

    # Feature selection
    sel = VarianceThreshold(threshold=0.16)
    train_dataset = sel.fit_transform(train_dataset)

    for j, test_dataset in enumerate(gene_expression_datasets):
        print("inner loop :", j)

        test_dataset_no_fs = test_dataset
        # Feature selection
        test_dataset = sel.transform(test_dataset)

        print("Define  test & train set")
        if i == j:
            x_train, x_test, y_train, y_test = train_test_split(
            train_dataset, labels_for_datasets[i], test_size=0.2, random_state=42
            )
        else :
            x_train = train_dataset
            y_train = labels_for_datasets[i]
            x_test = test_dataset
            y_test = labels_for_datasets[j]

        print("classification...")    

        #Apply knn
        model, acc_knn = knn(x_train, y_train, x_test,y_test)

        #Apply logistic regression
        model, acc_logreg = logreg(x_train, y_train, x_test, y_test)

        #Apply Decision Tree Classifier
        model, acc_dtc = dtc(x_train, y_train, x_test, y_test)

        model, acc_lsvc = lsvc(x_train, y_train, x_test, y_test)

        model, acc_mlp = mlp(x_train, y_train, x_test, y_test)

        model, acc_nb = gnb(x_train, y_train, x_test, y_test)

        model, acc_gnb = gnb(x_train, y_train, x_test, y_test)

        model, acc_sdg = sdg(x_train, y_train, x_test, y_test)

        model, acc_hc = hierarchical_classifier(x_train, y_train, x_test, y_test)

        acc_stacked = stackedClassifier(x_train, y_train, x_test, y_test)

        all_acc_within_dataset = np.array([acc_knn, acc_logreg, acc_dtc, acc_lsvc, acc_mlp, acc_gnb, acc_sdg, acc_hc, acc_stacked])

        acc_matrix[j] = all_acc_within_dataset
    
    all_matrix[i] = acc_matrix


# Save performance results to disk

In [None]:
np.save("../results/performance_matrices/ctc_res", all_matrix)


# Load performance Results from disc

In [12]:
import numpy as np
all_matrix = np.load("../results/performance_matrices/ctc_res.npy")

# Plot results

In [83]:
#%matplotlib
import matplotlib.pyplot as plt
%matplotlib qt

fig, (pl_10xv2, pl_SM2, pl_10xv3, pl_CL, pl_DR, pl_iD, pl_SW, pl_10xv2_2) = plt.subplots(1, 8, sharey=True)

im = pl_10xv2.imshow(all_matrix[0])
im = pl_SM2.imshow(all_matrix[1])
im = pl_10xv3.imshow(all_matrix[2])
im = pl_CL.imshow(all_matrix[3])
im = pl_DR.imshow(all_matrix[4])
im = pl_iD.imshow(all_matrix[5])
im = pl_SW.imshow(all_matrix[6])
im = pl_10xv2_2.imshow(all_matrix[7])

y_labels = ["10xv2", "SM2", "10xv3", "CL", "DR", "iD", "SW", "10xv2_2"]
x_labels = ["knn", "Logistic Regression", "DTC", "LinearSVC", "MLP", "GaussianNB", "SDG", "Hierarchical", "Stacked" ]

pl_10xv2.set_yticks(np.arange(len(y_labels)))
pl_10xv2.set_yticklabels(y_labels, fontsize = 5)

pl_SM2.set_yticks(np.arange(len(y_labels)))
pl_SM2.set_yticklabels(y_labels, fontsize = 5)

pl_10xv3.set_yticks(np.arange(len(y_labels)))
pl_10xv3.set_yticklabels(y_labels, fontsize = 5)

pl_CL.set_yticks(np.arange(len(y_labels)))
pl_CL.set_yticklabels(y_labels, fontsize = 5)

pl_DR.set_yticks(np.arange(len(y_labels)))
pl_DR.set_yticklabels(y_labels, fontsize = 5)

pl_iD.set_yticks(np.arange(len(y_labels)))
pl_iD.set_yticklabels(y_labels, fontsize = 5)

pl_SW.set_yticks(np.arange(len(y_labels)))
pl_SW.set_yticklabels(y_labels, fontsize = 5)

pl_10xv2_2.set_yticks(np.arange(len(y_labels)))
pl_10xv2_2.set_yticklabels(y_labels, fontsize = 5)


pl_10xv2.set_title("10xv2", fontsize = 6)
pl_10xv2.set_xticks(np.arange(len(x_labels)))
pl_10xv2.set_xticklabels(x_labels, fontsize = 5)
plt.setp(pl_10xv2.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")

pl_SM2.set_title('SM2', fontsize = 6)
pl_SM2.set_xticks(np.arange(len(x_labels)))
pl_SM2.set_xticklabels(x_labels, fontsize = 5)
plt.setp(pl_SM2.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")


pl_10xv3.set_title('10xv3', fontsize = 6)
pl_10xv3.set_xticks(np.arange(len(x_labels)))
pl_10xv3.set_xticklabels(x_labels, fontsize = 5)
plt.setp(pl_10xv3.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")

pl_CL.set_title('CL', fontsize = 6)
pl_CL.set_xticks(np.arange(len(x_labels)))
pl_CL.set_xticklabels(x_labels, fontsize = 5)
plt.setp(pl_CL.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")

pl_DR.set_title('DR', fontsize = 6)
pl_DR.set_xticks(np.arange(len(x_labels)))
pl_DR.set_xticklabels(x_labels, fontsize = 5)
plt.setp(pl_DR.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")


pl_iD.set_title('iD', fontsize = 6)
pl_iD.set_xticks(np.arange(len(x_labels)))
pl_iD.set_xticklabels(x_labels, fontsize = 5)
plt.setp(pl_iD.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")

pl_SW.set_title('SW', fontsize = 6)
pl_SW.set_xticks(np.arange(len(x_labels)))
pl_SW.set_xticklabels(x_labels,fontsize = 5)
plt.setp(pl_SW.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor")

pl_10xv2_2.set_title('10xv2_2', fontsize = 6)
pl_10xv2_2.set_xticks(np.arange(len(x_labels)))
pl_10xv2_2.set_xticklabels(x_labels, fontsize = 5)
plt.setp(pl_10xv2_2.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor", fontsize = 5)

fig1 = plt.gcf()

fig.subplots_adjust(right=0.85, top= 1)
cbar_ax = fig.add_axes([0.9, 0.47, 0.01, 0.16])
cbar_ax.axes.tick_params(labelsize = 3.5)
fig1.colorbar(im, cax=cbar_ax)
cbar_ax.set_ylabel('weighted f1 score',fontsize =6)
fig1.text(0.47,0.67, "Training set", ha='center', fontsize= 6)
fig1.text(0.03, 0.55, "Test set", va='center', rotation='vertical', fontsize= 6)



fig, (pl_10xv2, pl_SM2, pl_10xv3, pl_CL, pl_DR, pl_iD, pl_SW, pl_10xv2_2) = plt.subplots(1, 8, sharey=True)

fig1.savefig("../results/report_pdf_results/report_ctc.pdf", format='pdf')

plt.show()