In [None]:
import os
os.environ['SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL'] = 'True'

In [None]:
# !git clone https://github.com/jianhuupenn/ItClust/tree/master/tutorial/data/pancreas.git
# pip install keras==2.2.4 tensorflow==1.14.0
# pip show anndata numpy pandas tensorflow keras scipy scanpy natsort sklearn

In [None]:
import ItClust as ic
import scanpy.api as sc
import os
from numpy.random import seed
from tensorflow import set_random_seed
import pandas as pd
import numpy as np
import warnings
os.environ["CUDA_VISIBLE_DEVICES"]="1"
warnings.filterwarnings("ignore")
#import sys
#!{sys.executable} -m pip install 'scanpy==1.4.4.post1'
#Set seeds
seed(20180806)
np.random.seed(10)
set_random_seed(20180806) # on GPU may be some other default

In [2]:
import warnings
# Suppress specific ImportWarning
warnings.filterwarnings("ignore")

## Import the data

In [None]:
adata = sc.read("./lung_atlas_public.h5ad")
adata.obs = pd.read_csv("./labels_for_ItClust/missing_at_edge_lung_atlas_obs.csv", sep=',', index_col=0)
adata

In [None]:
adata.obs['celltype']

## Common Code

In [None]:
adata.raw = adata
adata.layers["counts"] = adata.X.copy()

In [None]:
print(adata)

In [None]:
raw_adata = adata.raw.to_adata()

In [None]:
raw_adata.var_names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in raw_adata.var_names]

In [None]:
raw_adata.obs_names = [name.decode('utf-8') if isinstance(name, bytes) else name for name in raw_adata.obs_names]

In [None]:
raw_adata.var_names

In [None]:
raw_adata.obs_names

In [None]:
raw_adata.raw = raw_adata

In [None]:
adata_train = raw_adata.raw.to_adata()

In [None]:
adata_test = raw_adata.raw.to_adata()

In [None]:
adata_train.raw = adata_train

In [None]:
adata_test.raw = adata_test

In [None]:
print(adata_train.raw.var_names)

In [None]:
print(adata_train.raw.obs_names)

In [None]:
clf=ic.transfer_learning_clf()
clf.fit(adata_train, adata_test)

## Print celltype annotation

In [None]:
celltype_pred = {}
source_label=pd.Series(clf.adata_train.obs["celltype"], dtype="category")
source_label=source_label.cat.categories.tolist()
num_ori_ct=clf.adata_test.obsm["prob_matrix"+str(clf.save_atr)].shape[1]
target_label=[str(i) for i in range(num_ori_ct)]
for i in range(num_ori_ct):
    end_cell=clf.adata_test.obs.index[clf.adata_test.obs["decisy_trans_True"]==target_label[i]]
    start_cell=clf.adata_test.obs.index[clf.adata_test.obs["trajectory_0"]==target_label[i]]
    overlap=len(set(end_cell).intersection(set(start_cell)))
    celltype_pred[target_label[i]]=[source_label[i], round(overlap/(len(end_cell)+0.0001),3)]

In [None]:
pred, prob, celltype_pred = clf.predict()
pred.head()

## Save the embeddings

In [None]:
embeddings_dict = {}

In [None]:
embeddings_dict['lung_atlas'] = clf.adata_test.obsm["X_Embeded_z"+str(clf.save_atr)]

In [None]:
embeddings_dict['lung_atlas']

In [None]:
combined_embeddings = pd.concat(
    {key:pd.DataFrame(value, index=adata_test.obs_names) for key, value in embeddings_dict.items()}, axis=1
)

combined_embeddings.to_csv("./results/missing_at_edge_lung_atlas_itclust_embeddings.csv")