In [None]:
pip install cplearn #Install this pacakge.

In [None]:
#Load data here.
X=None #X will be an n*d numpy array. Most likely either the post-PCA or post-harmony dataset.

In [None]:
from cplearn.corespect import Corespect
corespect = Corespect(X)
core = corespect.find_core(core_fraction=0.15,ranking_algo_params={'q':40,'r':20})
#Here, if you want to find larger clusters, use larger values of q and r (such as q=40,r=20), and if you want to find more sub-clusters, you may use {'q':20,'r':10}. r=0.5*q is usually a good choice.
#We are currently implementing a new method that is able to do both simultaneously.



cluster_core = corespect.cluster_core(core, cluster_algo='louvain',cluster_algo_params={"ng_num": 20,"resolution":1})
propagated_data = corespect.propagate_labels(cluster_core, propagate_algo='adaptive_majority', propagate_algo_params={"ng_num": 20})

In [None]:
from cplearn.visualizer import Visualizer as viz
# 4. Extract layers and labels
layers, labels_for_layer = propagated_data.get_layers_and_labels()

#The first layers are cores. These are the most separable parts.
#For now, if adaptive majority is used, then you get three layers when using this method.
#The first is the core
#The second is all points we can cluster confidently
#More points that we can cluster, but less confidently
#You will notice that it will not contain indexes of some points; these are points we could not cluster

In [None]:
#We will use this is a basic skeleton for subsequent visualization.
import umap
reducer=umap.UMAP(n_neighbors=15, init='spectral')
X_umap=reducer.fit_transform(X)

In [None]:
# 5. Visualize (Visualizer internally calls CoreMAP for embedding)
mode_choice='three_steps' #use this with adaptive_majority
#You can use mode_choice='layerwise' to see a more in-depth layer-by-layer visualization
#Use layerwise if propagate_algo is "CDNN"
fig = viz(corespect,global_init_embedding=X_umap,mode=mode_choice).fig

In [None]:
#In the output html file, click on "FlowRank" on the line that says core.
#Then click on the "louvain" button in the next line
#Finally, click on the "adaptive majority" button
#Now, a scrollbar will appear at the bottom of the image. You can move it from left to right, and observe visualization of the core (central points) to periphery (boundary points) (colored using labels found by CoreSPECT)"

fig.show()   # or fig.write_html("corespect_viz.html")

In [None]:
#We also have functionality where you can run multiple clustering/propagation steps on the same data, and then be able to access them in a user-friendly manner, as well as be able to observe their visualizations collectively for smooth comparisons. The functionality already exists in our cplearn package and we will share a detailed description/vignette for direction soon.

In [None]:
#If you want to visualize the same thing, but with your own labels for the data, run the following command.
#This requires a re-run of the whole code again, which is redundant, and will be fixed in the next version.

user_label=None #Use your label here. Currently, the labels have to be integer valued. But we will make string-labels possible very soon.

fig = viz(corespect,labels=user_label,global_init_embedding=X_umap).fig

fig.show()

In [None]:
#Now we can look at DEGs w.r.t. the layers and the clusters output by CoreSPECT
#We use the wilcoxon rank-sum method to calculate DEG for now

In [None]:
#Load X_cg, the original cell x gene matrix corresponding to X.
#X_cg=None

c1=[3] #Add all label numbers that you want to put in group 1
c2=[14] #Add all label numbers that you want to put in group 1

from scRNA_seq.layered_deg import Deg
deg = Deg(X_cg=X_cg,corespect=corespect)
de_results=deg.detailed_deg(c1,c2,top_deg_num_per_layer=10,custom_labels=None)#here you can also pass your own label

In [None]:
de_results_select=de_results[0] #Follow this for now. If you have run the cell as described so far this will be needed.

You can now look at the different DEG value metrics (such as percentage and logfoldchange and means) between c1 and c2 across different layers.

The column 0 implies that the calculation is only between points in the layer 0

Similarly: The column 1 implies that the calculation is only between points in the layer 1

On the other-hand the column 1+ is for points upto layer 1 (that is layer 0 and layer 1) and so on.

A dashed entry implies that the gene was not considered to be differentially expressed in that layer.

The key is the column-number of the gene in X_cg. You may have the gene names in your adata, which should give you the name of the genes observed her.



In [None]:
#Indexes:
#0: p_values
#1: avg_log2FC
#2: mean1
#3: mean2
#4: pct1
#5: pct2
from open_source.degspect.deg_utils import tabulate_de_results
tabulate_de_results(de_results_select,4,5)