### Detailed explanation of running scSemiGCN

#### Import modules and functions

In [None]:
import enhancement
import torch
import numpy as np
from scipy.io import loadmat
import argparse
from torch.utils.data import DataLoader
from TwoLayerGCN import GCN
import argparse
from validation import eval, make_prediction
import torch.nn as nn
import enhancement
import random
import os
from PseudoLabels import knn_similarity
from contrastive_loss import contrastive_loss
from OneLayerGCN import preprocessor
import pandas as pd

 #### Set random set for reproducibility

In [None]:
def seed_torch(seed=1029):
	random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed) 
	torch.backends.cudnn.benchmark = False
	torch.backends.cudnn.deterministic = True

#### Define functions for training GCNs

In [None]:
def pretrain(args, model, feature, pseudo_labels, loss_func, optimizer):
    feature = feature.float()
    for i in range(args.round):
        feature_ = model(feature)
        loss = loss_func(feature_, pseudo_labels, args.tau)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('contrastive loss[%d/%d]: %.4f' % (i+1, args.round, loss))
    
    feature_ = model(feature)
    
    return feature_


def train(model, features, loss, optimizer, train_dataset, num_class, epochs):
    for i in range(epochs):
        model.train()
        losses_train = 0
        for x in train_dataset:
            pro = model(features)
            y = x.T[0]-1
            a = pro[x.T[1].long()]
            loss_ = loss(a, y.long())
            optimizer.zero_grad()
            loss_.backward()
            optimizer.step()
            losses_train += loss_.cpu().item()

        eval_train = eval(pro, train_dataset, num_class)
       
        print("In epoch: %d, losses: %.4f, acc_train:%.4f, f1_train:%.4f, ,auc_train:%.4f" 
              % (i+1, losses_train, eval_train[0],  eval_train[1], eval_train[2]))

    return model

#### Parameter settings
Set parameters for running scSemiGCN. Descriptions of paramaters can be found in the README file. Please change the default values to set your own settings

In [None]:
class Params:
    
    def __init__(self):
        # parameters for topological denoising
        self.Nk = 18
        self.alpha = 0.5

        # parameters for feature refinement
        self.round = 10
        self.dropout = 0.5
        self.slr = 0.05
        self.weight_decay = 1e-2
        self.tau = 0.5

        # paramters for semi-supervised cell-type annotation
        self.hidden = 100
        self.glr = 0.002
        self.epoch = 100
        self.batch_size = 100
        self.dir = "Prediction"



#### Set parameters and load data in a specified format
The file seqdata.mat includes three fields. The format is explained in the README file. Gold-standard annotation of labeled cells is assigned to the variable *labels*

In [None]:
seed_torch()
args = Params()
data = loadmat("seqdata.mat")

adj = data["similarity"]
annotation = data["annotation"].ravel()
feature = data["feature"]

break_idx = np.where(annotation==-1)[0][0]
labels = annotation[:break_idx]
    
feat_dim = feature.shape[1]
num_class = len(np.unique(labels))

#### Generate pseudo labels with topoloigcal denoising
We frist obtain a denoised similarity matrix by network enhancement, then generate pseudo labels for unannotated cells by 1-NN.

In [None]:
adj = enhancement.network_enhancement(adj, 2, 18, 0.5) 
pseudo_labels = knn_similarity(labels, adj, 1)

adj = torch.from_numpy(adj)
adj = adj.to(torch.float32)

#### Refine features with supervised contrastive learning in a denoised GCN.
We define a denoised GCN *premodel* with the denoised network structure *adj* and obtain refined features *refined_feature* with supervised contrastive learning using pseudo labels *pseudo_labels*. The *contrastive_loss* is the corresponding supervised contrastive loss function.

In [None]:
feature = torch.from_numpy(feature)
pseudo_labels = torch.tensor(pseudo_labels, dtype=torch.int32)
    
premodel = preprocessor(feat_dim, args.dropout, adj)
optimizer1 = torch.optim.SGD(params=premodel.parameters(), lr=args.slr, weight_decay=args.weight_decay)
refined_feature = pretrain(args, premodel, feature, pseudo_labels, contrastive_loss, optimizer1).detach()

#### Train a cell-type annotation GCN with gold-standard annotated cells
A two-layer denoised GCN *net* is trained by gold-standard annotated cells with refine features, using cross entropy as loss function. The well-trained model is returned as *opt_model*.

In [None]:
train_idx = np.arange(break_idx)
train_data = np.vstack((labels, train_idx))
train_data = torch.tensor(train_data.T, dtype=torch.float)
train_dataset = DataLoader(train_data, batch_size=args.batch_size, shuffle=True)
    
net = GCN(feat_dim, args.hidden, num_class, args.dropout, adj)
optimizer2 = torch.optim.Adam(params=net.parameters(), lr=args.glr)
loss = nn.CrossEntropyLoss(reduction="mean")
opt_model = train(net, refined_feature, loss, optimizer2, train_dataset, num_class, args.epoch)

#### Annotate unlabeld cells by *opt_model*. 
The results are stored in the table *prediction*.

In [None]:
pro = opt_model(refined_feature)
unannotated_idx = np.arange(len(labels), len(annotation))
to_anno_data = torch.tensor(unannotated_idx, dtype=torch.float)
to_anno_dataset = DataLoader(to_anno_data, batch_size=args.batch_size, shuffle=False)
prediction_ = make_prediction(pro, to_anno_dataset)
prediction = pd.DataFrame(prediction_, columns=["sampleID", "predictedLabel"])

#### Save results
Save annotation and the well-trained model to a specified directory.

In [None]:
if not os.path.exists(args.dir):
     os.makedirs(args.dir)
prediction.to_csv(args.dir + "/" + "make_prediction.csv", index=False)
torch.save(opt_model.state_dict(), args.dir + "/" + "opt_model.pt")