In [2]:
import pandas as pd
import numpy as np
import scanpy, phate
from sklearn import preprocessing

In [None]:
data = scanpy.read_h5ad('./data/processed_data.h5ad')
# subset to those samples with known response
data = data[data.obs['response'].isin(['NR', 'R'])]

## GSPA patient indicator

In [None]:
gspa_op = gspa.GSPA(use_reduced=False)
gspa_op.construct_graph(data)
gspa_op.build_diffusion_operator()
gspa_op.build_wavelet_dictionary()

In [6]:
patient_indicator = preprocessing.OneHotEncoder().fit_transform(np.array(data.obs['patient']).reshape(-1,1)).T.todense()

In [None]:
# Embed patient indicator signals
pt_ae, pt_pc = gspa_op.get_gene_embeddings(patient_indicator)
np.save('results/GSPA_patient.npy', pt_ae)

## Cluster proportion, CD8

In [107]:
data_cd8 = data[data.obs['isCD8'], :]
patient_proportions = pd.DataFrame(index=np.unique(data_cd8.obs['patient']), columns=np.unique(data_cd8.obs['cluster_cd8']))
for patient in np.unique(data_cd8.obs['patient']):
    pat = data_cd8[data_cd8.obs['patient'] == patient]
    pat_prop = pat.obs['cluster_cd8'].value_counts() / pat.n_obs
    patient_proportions.loc[patient] = pat_prop

patient_proportions.fillna(0, inplace=True)
np.save('results/proportion_cd8.npy', np.array(patient_proportions))

## Cluster proportion, all

In [114]:
patient_proportions = pd.DataFrame(index=np.unique(data.obs['patient']), columns=np.unique(data.obs['cluster_all']))
for patient in np.unique(data.obs['patient']):
    pat = data[data.obs['patient'] == patient]
    pat_prop = pat.obs['cluster_all'].value_counts() / pat.n_obs
    patient_proportions.loc[patient] = pat_prop

patient_proportions.fillna(0, inplace=True)
np.save('results/proportion_all.npy', np.array(patient_proportions))

## Mean expression

In [63]:
mean_patients = []
for patient in np.unique(data.obs['patient']):
    pat = data[data.obs['patient'] == patient]
    mean_patients.append(pat.to_df().mean(axis=0))
p.save('results/mean.npy', np.array(mean_patients))

## GSPA QR gene signals

In [None]:
gspa_op = gspa.GSPA()
gspa_op.construct_graph(data)
gspa_op.build_diffusion_operator()
gspa_op.build_wavelet_dictionary()

In [None]:
all_patients_reduced = []
for i,pt in enumerate(np.unique(data.obs['patient'])):
    patient = data[data.obs['patient'] == pt].to_df().T.values
    gene_signal_projected = gspa.embedding.project(patient, dictionary[data.obs['patient'] == pt])
    gene_signal_reduced = gspa.embedding.svd(gene_signal_projected)
    all_patients_reduced.append(gene_signal_reduced[:, :5])
    
np.save('results/GSPA_QR_gene.npy', np.array(all_patients_reduced))