# Figure 6 Single-Cell Decomposition using Hierarchical Autoencoder (scDHA)

### Figure 6B Bar Graph

In [None]:
# scDHA performed on gene level data
df_scDHA = pd.read_csv("predictions_scDHA.csv", sep=",", header=0, index_col=0)
Y_test = df_scDHA.true
scDHA_pred = df_scDHA.predicted

scDHA_performance = {"precision": [],"recall" : [], "f1" : [], "accuracy" : []}
scDHA_performance["precision"].append(precision_score(Y_test, scDHA_pred, average="macro"))
scDHA_performance["recall"].append(recall_score(Y_test, scDHA_pred, average="macro"))
scDHA_performance["f1"].append(f1_score(Y_test, scDHA_pred, average="macro"))
scDHA_performance["accuracy"].append(accuracy_score(Y_test,scDHA_pred))

In [None]:
#scDHA performed on gene set data
scDHA_gsets = pd.read_csv("predictions_scDHA_gs.csv", sep=",", header=0, index_col=0)
Y_test_gsets = scDHA_gsets.true
scDHA_pred_gsets = scDHA_gsets.predicted

scDHAgs_performance = {"precision": [],"recall" : [], "f1" : [], "accuracy" : []}
scDHAgs_performance["precision"].append(precision_score(Y_test_gsets, scDHA_pred_gsets, average="macro"))
scDHAgs_performance["recall"].append(recall_score(Y_test_gsets, scDHA_pred_gsets, average="macro"))
scDHAgs_performance["f1"].append(f1_score(Y_test_gsets, scDHA_pred_gsets, average="macro"))
scDHAgs_performance["accuracy"].append(accuracy_score(Y_test_gsets,scDHA_pred_gsets))

In [None]:
classification_df = pd.DataFrame({"metric": ["Precision","Recall","F1","Accuracy"],"scDHA" : [np.mean(v) for k,v in scDHA_performance.items()],
                                 "scDHA_genesets" : [np.mean(v) for k,v in scDHAgs_performance.items()]})
print(classification_df)

In [None]:
ind = np.arange(4)
fig, ax = plt.subplots(figsize = (25, 10))

scDHA_bar = ax.bar(ind, classification_df.scDHA,alpha=1, width = 0.2,capsize = 10, color = '#1f78b4', align = 'center')
scDHAgs_bar = ax.bar(ind+0.2, classification_df.scDHA_genesets,alpha=1, width = 0.2,capsize = 10, color = '#33a02c', align = 'center')
ax.set_ylabel("Model Performance", size = 38)
ax.set_ylim([0.50,1])
ax.set_xlabel("Metric", size = 38)
ax.set_xticks(ind +0.1,labels = classification_df.metric)
ax.set_xticklabels(classification_df.metric,rotation = 0)
ax.tick_params(axis='both', labelsize=32)
fig.suptitle('scDHA cell classification using genes vs gene sets', fontsize=38)
ax.legend((scDHA_bar[0], scDHAgs_bar[0]), ('Genes', 'Gene Sets'),prop={'size': 22}, bbox_to_anchor=(1.23, 1), loc='upper right')
ax.xaxis.labelpad = 20
ax.yaxis.labelpad = 20
ax.grid(color='black', linestyle='-', linewidth=0.7)
ax.patch.set_edgecolor('black')  
ax.patch.set_linewidth('2') 
ax.xaxis.grid(False)
vis = ax.get_figure()
vis.savefig("Fig6B.pdf", bbox_inches='tight')

### Figure 6C Confusion Matrices

In [None]:
#Confusion matrix for gene level data
matrix = confusion_matrix(df_scDHA.true,df_scDHA.predicted, labels=np.unique(df_scDHA.predicted))
matrix = matrix/matrix.sum(axis=1, keepdims=True)*1
df_matrix = pd.DataFrame(matrix)
df_matrix.columns = np.unique(df_scDHA.predicted)
df_matrix.index = np.unique(df_scDHA.predicted)

sns.set(font_scale=0.65)
ax = sns.heatmap(df_matrix,cmap ="YlGnBu",annot = True,vmin = 0, vmax = 1,linewidth=0.5,annot_kws={"size": 6})
plt.show()
vis = ax.get_figure()
vis.savefig("confusionmtx_scDHAgenes.pdf", bbox_inches='tight')

In [None]:
#Confusion matrix for gene sets data
matrix = confusion_matrix(scDHA_gsets.true,scDHA_gsets.predicted, labels=np.unique(scDHA_gsets.predicted))
matrix = matrix/matrix.sum(axis=1, keepdims=True)*1
df_matrix = pd.DataFrame(matrix)
df_matrix.columns = np.unique(scDHA_gsets.predicted)
df_matrix.index = np.unique(scDHA_gsets.predicted)

sns.set(font_scale=0.65)
ax = sns.heatmap(df_matrix,cmap ="YlGnBu",annot = True,vmin = 0, vmax = 1,linewidth=0.5,annot_kws={"size": 6})
plt.show()
vis = ax.get_figure()
vis.savefig("confusionmtx_scDHAgenesets.pdf", bbox_inches='tight')