In [1]:
import pandas as pd
import scanpy as sc
import numpy as np
import torch
import scipy
import matplotlib
import matplotlib.pyplot as plt
import os
from itertools import product
from scHyper import dataprocess as dp

os.chdir('../scHyper/') #You can specify the location of other database 

### 1. Read gene expression matrix and cell type labels

In [2]:
x = pd.read_csv("../tutorial/data/count.csv", index_col=0)
meta = pd.read_csv("../tutorial/data/meta.csv", index_col=0)

### 2. We add the labels to adata.obs, and please normalize and logarithmize the unprocessed data here.

In [3]:
adata = sc.AnnData(X=x.T.values)
adata.obs = pd.DataFrame(meta["labels"])
adata.var_names = x.index
adata.obs_names = x.columns
adata.obs_names_make_unique()
adata.var_names_make_unique()
sc.pp.filter_cells(adata, min_genes=10)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

  adata = sc.AnnData(X=x.T.values)


In [4]:
adata

AnnData object with n_obs × n_vars = 23 × 40
    obs: 'labels', 'n_genes'
    var: 'n_cells'
    uns: 'log1p'

### 3. Construct intercellular communication tensor

In [5]:
# Calculate the average expression of genes in different cell types
# Please select the truncated mean or average based on your requirements
adata = dp.meanExpression(adata, type="mean", groupby="labels")
# Pair of ligand-receptor interactions, expression of ligands and receptors appear in the gene expression matrix
# Please select whether to use high-variation genes based on your needs
adata, ligand_receptor_data = dp.process_ligands_receptors(adata, "human", highly_variable=False)
# Construct intercellular communication tensor
interaction_tensor = dp.generate_tensor(adata, ligand_receptor_data)

### 4. Get the triples of hypergraph and weights

In [6]:
# Get the triples of hypergraph and weights.
triplets, weights, validlrindices = dp.generate_triplets_weights_validlrindices(interaction_tensor)
# Obtain effective L-R pairs and ineffective L-R pairs.
validlrs, invalidlrs = dp.generate_validlrs_invalidlrs(validlrindices, ligand_receptor_data)
# Obtain effective celltypes and ineffective celltypes.
validsenderindices, validreceiverindices = dp.generate_validsenderindices_validreceiverindices(interaction_tensor)
validsenders, invalidsenders, validreceivers, invalidreceivers = dp.generate_validsenders_validreceivers(adata, validsenderindices, validreceiverindices)
# Update weights and triples
triplets = dp.update_triplets(triplets)
weights = dp.update_weights(weights)

### 5. Generate the training set and test set

In [7]:
# Generate the training set and test set
train_triplets, test_triplets, train_weights, test_weights = dp.generate_train_test(triplets, weights)
train_nums_type, test_nums_type = dp.generate_nums_type(train_triplets, test_triplets)

In [8]:
# Save the training and test data sets, Please specify the save_path
save_path='D:/xugen/scHyper-master/data/demo'
np.savez(os.path.join(save_path, 'train_data.npz'), nums_type=train_nums_type, train_data=train_triplets, train_weight=train_weights)
np.savez(os.path.join(save_path, 'test_data.npz'), nums_type=train_nums_type, test_data=test_triplets, test_weight=test_weights)

# Create and save an array for prediction
use_to_predict = dp.use_to_predict(triplets)
np.savez(os.path.join(save_path, 'use_to_predict.npz'), use_to_predict=use_to_predict)

### 6. We used nonparametric tests to identify significant intercellular communications

In [9]:
# The next step is after training the model
df_nn, candidates = dp.genenrate_df_nn_candidates(validlrs, validsenders, validreceivers, triplets, use_to_predict)
df_enriched, tensor_pval = dp.null_test(df_nn, candidates, pval=0.05, plot=False)


Total enriched: 2 / 192


### 7.  Visualization preparation

In [None]:
file_path='../results'
dp.generate_results(adata, df_nn, tensor_pval, validlrs, train_nums_type, file_path)