# install packages

In [1]:
install.packages('SCINA')


#Install preprocessCore if required
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("preprocessCore")

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

Bioconductor version 3.10 (BiocManager 1.30.10), R 3.6.0 (2019-04-26)

Installing package(s) 'preprocessCore'

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

Old packages: 'bit', 'dplyr', 'DT'



# load library

In [2]:
library('SCINA')
library('preprocessCore')

Loading required package: MASS

Loading required package: gplots


Attaching package: ‘gplots’


The following object is masked from ‘package:stats’:

    lowess




# load signatures of cell types (prior knowlege)

In [3]:
# prior knowledge: signatures (A list contains multiple signature vectors)
# using eTME signatures
load(system.file('extdata','example_signatures.RData', package = "SCINA"))

# using own signatures files:
# signatures=preprocess.signatures('your/path/to/example_signatures.csv')

In [4]:
head(signatures)

# load expression data and normalization

In [5]:
# input data: expression matrix 
# From .rds
exp <- readRDS("/stor/public/hcad/Heart_Heart_Wang2020/Heart_Heart_Wang2020.seuratobj.rds")
exp <- exp@assays$RNA@scale.data


# from .csv
# exp <- read.csv("csv",row.names=1,stringsAsFactors = F)

# normalize counts 
# exp = log(exp+1)

# Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities.
# exp[] = normalize.quantiles(exp)

In [6]:
head(exp)

Unnamed: 0,SC_92563_0_17,SC_92563_0_23,SC_92563_0_12,SC_92563_0_18,SC_92563_0_19,SC_92563_0_20,SC_92563_0_14,SC_92563_1_45,SC_92563_1_46,SC_92563_2_64,⋯,SC_97502_56_57,SC_97502_56_33,SC_97502_70_62,SC_97502_71_55,SC_97502_24_52,SC_97502_34_40,SC_97502_32_60,SC_97502_66_39,SC_97502_30_13,SC_97502_30_69
FGR,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,⋯,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015,-0.11106015
CFH,4.96971188,-0.28446614,0.87201257,7.90064217,-0.28446614,-0.28446614,-0.28446614,-0.28446614,-0.28446614,1.05326192,⋯,-0.28446614,-0.28446614,-0.28446614,-0.28446614,-0.28446614,-0.28446614,-0.28446614,-0.28446614,-0.28446614,-0.28446614
SNX11,-0.12088841,-0.12088841,-0.12088841,-0.12088841,-0.12088841,-0.12088841,-0.12088841,-0.12088841,-0.12088841,-0.12088841,⋯,-0.12088841,-0.12088841,-0.12088841,-0.12088841,-0.12088841,2.04038057,-0.12088841,-0.12088841,-0.12088841,-0.12088841
TFPI,-0.28945759,-0.28945759,1.79777746,-0.28945759,-0.28945759,-0.28945759,-0.28945759,-0.28945759,-0.28945759,-0.28945759,⋯,-0.28945759,-0.28945759,-0.28945759,-0.28945759,-0.28945759,-0.28945759,0.44511547,-0.28945759,-0.28945759,-0.28945759
RBM5,-0.34809229,-0.34809229,0.03439037,-0.34809229,-0.34809229,-0.34809229,5.23763354,-0.34809229,-0.34809229,-0.34809229,⋯,-0.34809229,-0.34809229,-0.34809229,-0.21096557,-0.34809229,-0.34809229,-0.34809229,-0.34809229,1.20399877,1.3843678
CD38,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,⋯,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936,-0.09208936


# using SCINA

In [7]:
# parameters
# max_iter: max iterations (integer>0)

# covergence_n: (iteger>0)
# Stop SCINA if during the last n rounds of iterations, cell type assignment keeps steady above the convergence_rate.

# covergence_rate: (A float between 0 and 1. Default is 0.99.)
# Percentage of cells for which the type assignment remains stable for the last n rounds.

# sensitivity_cutoff: (A float between 0 and 1.)
# The cutoff to remove signatures whose cells types are deemed as non-existent at all in the data by SCINA.

# rm_overlap A binary value, default 1 (TRUE)
# denotes that shared symbols between signature lists will be removed. If 0 (FALSE) then allows different cell types to share the same identifiers.

# allow_unknown A binary value, default 1 (TRUE)
#  If 0 (FALSE) then no cell will be assigned to the 'unknown' category.

# log_file: A name string denoting a record for the SCINA running status, path may be included. Default is 'SCINA.log'.

results = SCINA(exp, signatures, max_iter = 100, convergence_n = 10, 
    convergence_rate = 0.999, sensitivity_cutoff = 0.9, rm_overlap=TRUE, allow_unknown=TRUE, log_file='SCINA.log')

In [8]:
head(results$cell_labels)

In [9]:
head(results$probabilities)

Unnamed: 0,SC_92563_0_17,SC_92563_0_23,SC_92563_0_12,SC_92563_0_18,SC_92563_0_19,SC_92563_0_20,SC_92563_0_14,SC_92563_1_45,SC_92563_1_46,SC_92563_2_64,⋯,SC_97502_56_57,SC_97502_56_33,SC_97502_70_62,SC_97502_71_55,SC_97502_24_52,SC_97502_34_40,SC_97502_32_60,SC_97502_66_39,SC_97502_30_13,SC_97502_30_69
cd14_monocytes,2.811101e-80,1.0,1.060899e-80,2.089183e-80,2.650614e-80,2.496487e-81,2.845697e-71,1.4459739999999999e-86,6.725161e-119,5.051021e-82,⋯,2.1172360000000002e-82,8.410902e-82,4.934286e-81,9.684958000000001e-82,4.690013e-81,1.42285e-81,8.283384999999999e-78,1.2068580000000001e-81,2.5234530000000003e-81,1.514548e-81
b_cells,1.498798e-12,3.345653e-176,1.246688e-11,4.958829e-13,1.776962e-11,3.031437e-14,2.600142e-11,2.621568e-25,7.534063e-37,4.401195e-12,⋯,3.031437e-14,5.31006e-12,4.794494e-14,3.031437e-14,2.620238e-13,3.354446e-13,8.732616e-14,3.525154e-14,3.031437e-14,3.031437e-14
cd56_nk,1.303675e-19,1.438805e-181,1.303675e-19,1.303675e-19,1.303675e-19,1.303675e-19,4.626065e-15,1.0,1.0,1.303675e-19,⋯,1.303675e-19,1.3642099999999998e-19,1.303675e-19,1.303675e-19,1.303675e-19,1.303675e-19,1.303675e-19,1.303675e-19,1.303675e-19,1.303675e-19


# save results

In [45]:
# write out the result
query_result <- data.frame(results$cell_labels)
save_anno_dir <- "" # use your dir
write.csv(query_result, paste0(save_anno_dir, "anno.csv"))

In [46]:
head(query_result)

Unnamed: 0_level_0,results.cell_labels
Unnamed: 0_level_1,<chr>
1,unknown
2,cd14_monocytes
3,unknown
4,unknown
5,unknown
6,unknown
