In [4]:
import IHPF
import scanpy as sc

In [5]:
from scipy.sparse import coo_matrix
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
import numpy as np
import pandas as pd

In [30]:
datasets = ['humanpancreas','10Xmouse']
for dataset in datasets:
    adata = sc.read('../Real/scanorama/{}.h5ad'.format(dataset))
    adataobs = adata.obs.copy()
    no_cell_types = len(adataobs['actual'].unique())
    adataobs.reset_index(inplace=True)
    Xlist = list()
    for i,df in adataobs.groupby('batch'):
        batchidx = df.index
        Xlist.append(coo_matrix(adata.X[batchidx,:]))
    model = IHPF.scIHPF(no_cell_types,max_iter=500)
    model.fit(Xlist)
    adata.obsm['IHPF_{}'.format(no_cell_types)] = np.concatenate(model.cell_scores(),axis=0)
    adata.varm['IHPF_{}'.format(no_cell_types)] = model.shared_gene_scores()
    kmeans_cell = KMeans(n_clusters=no_cell_types, random_state=0).fit(normalize(adata.obsm['IHPF_{}'.format(no_cell_types)]))
    adata.obs['IHPF_{}_kmeans_normalised'.format(no_cell_types)] = kmeans_cell.labels_
    print(adata)
    

Clipping dp: was [2.054972355836071e-06, 2.5971088689402677e-05, 6.098692392697558e-05, 3.414987759242649e-07, 4.0265252465587764e-08] now [1.030316692776978e-06, 2.0419471547938884e-07, 4.648955073207617e-07, 2.013995617744513e-08, 4.222342795401346e-09]
[Iter.    0]  loss:3729.622879  pct:100.000000000
[Iter.   10]  loss:166.439517  pct:-95.537363352
[Iter.   20]  loss:139.270597  pct:-16.323599914
[Iter.   30]  loss:132.457036  pct:-4.892318137
[Iter.   40]  loss:130.263620  pct:-1.655945074
[Iter.   50]  loss:129.058297  pct:-0.925295289
[Iter.   60]  loss:128.399832  pct:-0.510207721
[Iter.   70]  loss:127.999939  pct:-0.311442905
[Iter.   80]  loss:127.715797  pct:-0.221986702
[Iter.   90]  loss:127.523018  pct:-0.150943254
[Iter.  100]  loss:127.389771  pct:-0.104488408
[Iter.  110]  loss:127.279905  pct:-0.086244184
[Iter.  120]  loss:127.194298  pct:-0.067259001
[Iter.  130]  loss:127.109281  pct:-0.066839931
[Iter.  140]  loss:127.034315  pct:-0.058977855
[Iter.  150]  loss:1

In [31]:
adata.obs

Unnamed: 0,actual,batch,IHPF_kmeans_normalised,HPF_kmeans_normalised,INMF_kmeans_normalised,IPCA_kmeans_normalised,IHPF_2_kmeans_normalised
0,293t,0,1,0,1,0,0
1,293t,0,1,0,1,0,0
2,293t,0,1,0,1,0,0
3,293t,0,1,0,1,0,0
4,293t,0,1,0,1,0,0
...,...,...,...,...,...,...,...
9525,jurkat,2,0,1,0,0,1
9526,293t,2,1,1,1,0,0
9527,293t,2,1,1,1,0,0
9528,293t,2,1,1,1,0,0


In [32]:
from sklearn.metrics import adjusted_mutual_info_score, silhouette_score
datasets = ['humanpancreas','10Xmouse','newpbmc','hpfpbmc']
batchAMI = dict()
actualAMI = dict()

In [33]:
for dataset in datasets:
    batch = dict()
    actual = dict()
    # adata = sc.read('../Real/scanorama/{}.h5ad'.format(dataset))
    for method in ['IHPF_2','IHPF','HPF','INMF','IPCA']:
        batch[method] = adjusted_mutual_info_score(adata.obs['batch'],adata.obs['{}_kmeans_normalised'.format(method)])
        actual[method] = adjusted_mutual_info_score(adata.obs['actual'],adata.obs['{}_kmeans_normalised'.format(method)])
    batch['actual'] = adjusted_mutual_info_score(adata.obs['batch'],adata.obs['actual'])
    actual['batch'] = adjusted_mutual_info_score(adata.obs['batch'],adata.obs['actual'])
    batchAMI[dataset] = batch
    actualAMI[dataset] = actual

In [34]:
pd.DataFrame(batchAMI)

Unnamed: 0,humanpancreas,10Xmouse,newpbmc,hpfpbmc
IHPF_2,0.498353,0.498353,0.498353,0.498353
IHPF,0.498353,0.498353,0.498353,0.498353
HPF,0.744954,0.744954,0.744954,0.744954
INMF,0.288737,0.288737,0.288737,0.288737
IPCA,0.73875,0.73875,0.73875,0.73875
actual,0.498373,0.498373,0.498373,0.498373


In [35]:
pd.DataFrame(actualAMI)

Unnamed: 0,humanpancreas,10Xmouse,newpbmc,hpfpbmc
IHPF_2,0.976076,0.976076,0.976076,0.976076
IHPF,0.976076,0.976076,0.976076,0.976076
HPF,-6.7e-05,-6.7e-05,-6.7e-05,-6.7e-05
INMF,0.226908,0.226908,0.226908,0.226908
IPCA,0.447701,0.447701,0.447701,0.447701
batch,0.498373,0.498373,0.498373,0.498373
