In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!python -m pip install pip==20.1

Collecting pip==20.1
  Downloading pip-20.1-py2.py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 3.2 MB/s 
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.3
    Uninstalling pip-21.1.3:
      Successfully uninstalled pip-21.1.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pip-tools 6.2.0 requires pip>=20.3, but you have pip 20.1 which is incompatible.[0m
Successfully installed pip-20.1


In [None]:
!pip install -r '/content/drive/MyDrive/sctag/requirements.txt'

In [5]:
import sys
sys.path.append('/content/drive/MyDrive/sctag')

import tensorflow as tf
from numpy.random import seed
from preprocess import *
from utils import *
import argparse

from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
from sklearn import metrics
import scipy.io as scio
seed(1)
tf.random.set_seed(1)

from scipy import sparse as sp


# Remove warnings
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from sctag import SCTAG
from loss import *
from graph_function import *

In [6]:
# Compute cluster centroids, which is the mean of all points in one cluster.
def computeCentroids(data, labels):
    n_clusters = len(np.unique(labels))
    return np.array([data[labels == i].mean(0) for i in range(n_clusters)])

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

In [7]:
parser = argparse.ArgumentParser(description="train", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--dataname", default = "Quake_Smart-seq2_Limb_Muscle", type = str)
parser.add_argument("--highly_genes", default = 500, type=int)
parser.add_argument("--pretrain_epochs", default = 1000, type=int)
parser.add_argument("--maxiter", default = 300, type=int)

_StoreAction(option_strings=['--maxiter'], dest='maxiter', nargs=None, const=None, default=300, type=<class 'int'>, choices=None, help=None, metavar=None)

In [8]:
# Load data
filepath = '/content/drive/MyDrive/sctag/data/Quake_Smart-seq2_Limb_Muscle'
x, y = prepro(filepath + '/data.h5')               
x = np.ceil(x).astype(np.int)
cluster_number = int(max(y) - min(y) + 1)
adata = sc.AnnData(x)
adata.obs['Group'] = y
adata = normalize(adata, copy=True, highly_genes=500, size_factors=True, normalize_input=True, logtrans_input=True)
count = adata.X

  if isinstance(data, AnnData) and data.isview:
  if adata.isview:


In [9]:
# Build model
adj, adj_n = get_adj(count)
model = SCTAG(count, adj=adj, adj_n=adj_n)

# Pre-training
model.pre_train(epochs=1000)
Y = model.embedding(count, adj_n)
from sklearn.cluster import SpectralClustering
labels = SpectralClustering(n_clusters=cluster_number,affinity="precomputed", assign_labels="discretize",random_state=0).fit_predict(adj)
centers = computeCentroids(Y, labels)

Epoch 10  zinb_loss: 0.4605904   A_rec_loss: 0.060504682
Epoch 20  zinb_loss: 0.44781798   A_rec_loss: 0.019720197
Epoch 30  zinb_loss: 0.422904   A_rec_loss: 0.017147386
Epoch 40  zinb_loss: 0.38029116   A_rec_loss: 0.016393337
Epoch 50  zinb_loss: 0.33837423   A_rec_loss: 0.014907937
Epoch 60  zinb_loss: 0.3189943   A_rec_loss: 0.014530956
Epoch 70  zinb_loss: 0.30431154   A_rec_loss: 0.014581355
Epoch 80  zinb_loss: 0.29286408   A_rec_loss: 0.014494008
Epoch 90  zinb_loss: 0.28329423   A_rec_loss: 0.014353795
Epoch 100  zinb_loss: 0.275151   A_rec_loss: 0.014270246
Epoch 110  zinb_loss: 0.26829273   A_rec_loss: 0.014188629
Epoch 120  zinb_loss: 0.2627482   A_rec_loss: 0.014051388
Epoch 130  zinb_loss: 0.25856382   A_rec_loss: 0.014003832
Epoch 140  zinb_loss: 0.2553891   A_rec_loss: 0.013970969
Epoch 150  zinb_loss: 0.2528679   A_rec_loss: 0.0139179705
Epoch 160  zinb_loss: 0.2508104   A_rec_loss: 0.013851836
Epoch 170  zinb_loss: 0.24907096   A_rec_loss: 0.013743291
Epoch 180  zinb

  adjacency = check_symmetric(adjacency)


In [10]:
# Clustering training
Cluster_predicted=model.alt_train(y, epochs=300, centers=centers)
if y is not None:
    acc = np.round(cluster_acc(y, Cluster_predicted.y_pred), 5)
    y = list(map(int, y))
    Cluster_predicted.y_pred = np.array(Cluster_predicted.y_pred)
    nmi = np.round(metrics.normalized_mutual_info_score(y, Cluster_predicted.y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, Cluster_predicted.y_pred), 5)
    print('ACC= %.4f, NMI= %.4f, ARI= %.4f'
        % (acc, nmi, ari))

Epoch 0  zinb_loss:  0.17729902  A_rec_loss:  0.013304185  cluster_loss:  0.23224849
Epoch 8  zinb_loss:  0.33651155  A_rec_loss:  0.013447595  cluster_loss:  0.19002314
Epoch 16  zinb_loss:  0.24143533  A_rec_loss:  0.013453748  cluster_loss:  0.1539921
Epoch 24  zinb_loss:  0.22269346  A_rec_loss:  0.013490005  cluster_loss:  0.12557748
Epoch 32  zinb_loss:  0.21837875  A_rec_loss:  0.013616322  cluster_loss:  0.10463761
Epoch 40  zinb_loss:  0.21877529  A_rec_loss:  0.013670526  cluster_loss:  0.08960147
Epoch 48  zinb_loss:  0.21903658  A_rec_loss:  0.013673743  cluster_loss:  0.07800763
Epoch 56  zinb_loss:  0.22038655  A_rec_loss:  0.01366682  cluster_loss:  0.069112346
Epoch 64  zinb_loss:  0.22166145  A_rec_loss:  0.013663192  cluster_loss:  0.062568314
Epoch 72  zinb_loss:  0.22251336  A_rec_loss:  0.013665533  cluster_loss:  0.057167966
Epoch 80  zinb_loss:  0.22343978  A_rec_loss:  0.013666533  cluster_loss:  0.053416457
Epoch 88  zinb_loss:  0.22400244  A_rec_loss:  0.01366