In [None]:
from libraries import *
from parameters import *
from numpy import asarray
from numpy import savetxt
import matplotlib as mpl
from sklearn.preprocessing import LabelEncoder
import ot
import umap
%matplotlib inline

In [None]:
os.getcwd()
os.chdir(projectDir)

In [None]:
%load_ext rpy2.ipython

In [None]:
def ot_distance(exp1, exp2, N=None):
    if N is not None:
        s1 = np.random.choice(exp1.shape[0], N, replace=False)  
        exp1 = exp1[s1,:]
        s2 = np.random.choice(exp2.shape[0], N, replace=False)
        exp2 = exp2[s2,:]
    M = ot.dist(exp1, exp2)
    #M /= M.max()
    
    a = np.ones(len(exp1)) / len(exp1)
    b = np.ones(len(exp2)) / len(exp2)
    
    # ot_distance = ot.emd(a, b, M)
    # return np.sum(ot_distance*M)
    return ot.emd2(a, b, M)

In [None]:
adata = sc.read(par_save_filename_8)
zs = ["K_0", "K_1","K_2", "K_3", "K_4", "K_5", "K_CONTROL"]

In [None]:
dcGenes = pd.read_csv('./PositiveControls/DC_cellstate_genes.csv')

dc1Genes = dcGenes["DC1 genes"].unique()
dc2Genes = dcGenes["DC2 genes"].unique()
mregGenes = dcGenes["mregDC genes"].unique()

macGenes = dcGenes["Macrophage genes"].unique()
sc.tl.score_genes(adata=adata, gene_list=macGenes, score_name="Mac")


allDCgenes = np.concatenate((dc1Genes, dc2Genes, mregGenes))
sc.tl.score_genes(adata=adata, gene_list=allDCgenes, score_name="DCSig")

adata.obs["DCSig_zscore"] = scipy.stats.zscore(adata.obs["DCSig"])
adata.obs["Mac_zscore"] = scipy.stats.zscore(adata.obs["Mac"])

adata.obs["MACoverDC"] = adata.obs["Mac_zscore"] - adata.obs["DCSig_zscore"]

In [None]:
adata = adata[adata.obs.leiden.isin(['0', '1', '2']),]

In [None]:
fBarMat_grouped = adata.obs[zs].T
fBarMat_groupedDeneme = fBarMat_grouped.apply(lambda x: ''.join(x.dropna().astype(str)))

label_encoder = LabelEncoder()
fBarMat_encodedDF = pd.Series(label_encoder.fit_transform(fBarMat_groupedDeneme))
fBarMat_encodedDF.index = fBarMat_groupedDeneme.index

adata.obs["perCat"] = fBarMat_encodedDF

In [None]:
fBarMat_encodedDF[fBarMat_encodedDF==6] = "K0"
fBarMat_encodedDF[fBarMat_encodedDF==5] = "K1"
fBarMat_encodedDF[fBarMat_encodedDF==4] = "K2"
fBarMat_encodedDF[fBarMat_encodedDF==3] = "K3"
fBarMat_encodedDF[fBarMat_encodedDF==2] = "K4"
fBarMat_encodedDF[fBarMat_encodedDF==1] = "K5"
fBarMat_encodedDF[fBarMat_encodedDF==0] = "CONTROL"

In [None]:
adata.obs["perCatGroups"] = fBarMat_encodedDF

In [None]:
sc.pl.umap(adata, color='leiden', legend_loc="on data")

In [None]:
sc.pl.umap(adata, color='perCatGroups', legend_loc="on data")

In [None]:
embedding = umap.UMAP(n_neighbors=18 ).fit_transform(X=pd.DataFrame(adata.X), y=np.array(adata.obs["perCat"]))

In [None]:
classes = ["CONTROL","K5","K4","K3", "K2","K1", "K0"]
fig, ax = plt.subplots(1, figsize=(10, 5))
plt.scatter(*embedding.T, s=0.9, c=adata.obs["perCat"], cmap='tab10', alpha=0.5)
plt.ylim(-17, -3)
plt.xlim(-1, 8)
#plt.setp(ax, xticks=[], yticks=[])
cbar = plt.colorbar(boundaries=np.arange(8)-0.5)
cbar.set_ticks(np.arange(7))
cbar.set_ticklabels(classes)
plt.ylabel('UMAP1', fontsize = 15) # x-axis label with fontsize 15
plt.xlabel('UMAP2', fontsize = 15) # y-axis label with fontsize 15

plt.savefig('Figure_3C.pdf')  

In [None]:
fig, ax = plt.subplots(1, figsize=(10, 5))
plt.scatter(*embedding.T, s=2, c=adata.obs["MACoverDC"], cmap='coolwarm',alpha=0.5)
plt.ylim(-17, -3)
plt.xlim(-1, 8)
#plt.setp(ax, xticks=[], yticks=[])
cbar = plt.colorbar(boundaries=np.arange(-6,4))
cbar.set_ticks(np.arange(-6,4))
plt.ylabel('UMAP1', fontsize = 15) # x-axis label with fontsize 15
plt.xlabel('UMAP2', fontsize = 15) # y-axis label with fontsize 15

plt.savefig('Figure_3C_supplement.pdf') 

In [None]:
real_dist_mat = pd.DataFrame(np.zeros((7, 7))) 
real_dist_mat.index= ['CONTROL', 'K0','K1', 'K2', 'K3', 'K4', 'K5']
real_dist_mat.columns= ['CONTROL', 'K0', 'K1', 'K2', 'K3', 'K4', 'K5']


In [None]:
indNames= ['CONTROL', 'K0', 'K1', 'K2', 'K3', 'K4', 'K5']

for i in range(7):
    for j in range(i, 7):
        x = inDat[adata.obs["perCatGroups"] == indNames[i],] 
        y = inDat[adata.obs["perCatGroups"] == indNames[j],] 

        otDists =  pd.DataFrame(np.zeros((100)))

        for z in range(100):
            otDists.loc[z] = ot_distance(x, y, N=300)

        real_dist_mat.loc[indNames[i],indNames[j]] = np.round(np.float(otDists.mean(axis=0)), -1)


In [None]:
real_dist_mat.columns = ["CONTROL", "K0", "K1", "K2", "K3", "K4", "K5"]
real_dist_mat.index = ["CONTROL", "K0", "K1", "K2", "K3", "K4", "K5"]

In [None]:
plt.figure(figsize = (8,7))

mask = np.zeros_like(real_dist_mat)
mask[np.tril_indices_from(mask, k=-1)] = True
#sns.heatmap(corr, mask=mask, vmax=.3, square=True)

#ax = sns.heatmap(real_dist_mat, annot=True, mask=mask, vmin=6, vmax=9.5)
ax = sns.heatmap(real_dist_mat, annot=True, mask=mask, cmap="coolwarm", fmt='.0f')
#plt.ylabel('Real values', fontsize = 15) # x-axis label with fontsize 15
#plt.xlabel('Real values', fontsize = 15) # y-axis label with fontsize 15

#plt.show()

plt.savefig('Figure_3D.pdf')  