# Simple tutorial: integration with basic parameters

In [None]:

import numpy as np
import scanpy as sc
from sobolev_alignment import SobolevAlignment

## Import data
Please refer to "process_data.ipynb" for a tutorial on how to set up the AnnData for input.

In [None]:
source_an = sc.read_h5ad('./data/kinker.h5ad')
target_an = sc.read_h5ad('./data/kim.h5ad')

In [None]:
common_genes = np.intersect1d(source_an.var.index, target_an.var.index)
source_an = source_an[:,common_genes]
target_an = target_an[:,common_genes]

## Sobolev Alignment
### Train scVI

In [None]:
sobolev_alignment_clf = SobolevAlignment(
    source_batch_name=None,
    target_batch_name=None,
    n_artificial_samples=int(1e5),
    n_samples_per_sample_batch=int(1e5),
    no_posterior_collapse=False
)

In [None]:
# Limit epochs for test, just quote for full deployment
sobolev_alignment_clf.scvi_params['source']['train']['max_epochs'] = 100
sobolev_alignment_clf.scvi_params['target']['train']['max_epochs'] = 100

In [None]:
sobolev_alignment_clf.fit(
    X_source=source_an,
    X_target=target_an,
    fit_vae=True, krr_approx=False, sample_artificial=False
)

### KRR approximation

In [None]:
# Select best model for KRR
sobolev_alignment_clf.krr_model_selection(
    X_source=source_an,
    X_target=target_an,
    M=500,
    same_model_alignment_thresh=0.95
)

In [None]:
# Increase M for the new one
sobolev_alignment_clf.krr_params['source']['M'] = 1000
sobolev_alignment_clf.krr_params['target']['M'] = 1000

In [None]:
# Fit last model, with increased M
sobolev_alignment_clf.fit(
    X_source=source_an,
    X_target=target_an,
    fit_vae=False,
    krr_approx=True,
    sample_artificial=True
)

## Estimate number of shared principal vectors

In [None]:
n_similar_pvs = sobolev_alignment_clf.permutation_test_number_similar_pvs(n_permutations=10)

In [None]:
sobolev_alignment_clf.principal_angles

## Project data

In [None]:
consensus_feature_an = sobolev_alignment_clf.compute_consensus_features(
    {'source': source_an, 'target': target_an},
    n_similar_pv=n_similar_pvs, return_anndata=True
)

In [None]:
sc.pp.neighbors(consensus_feature_an, use_rep='X_sobolev_alignment')
sc.tl.umap(consensus_feature_an)

In [None]:
sc.pl.umap(consensus_feature_an, color='data_source')

## MNN projection

In [None]:
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr

pandas2ri.activate()

importr('batchelor')

In [None]:
source_sobal = np.array(consensus_feature_an[consensus_feature_an.obs['data_source'] == 'source',:].obsm['X_sobolev_alignment'])
sourcer = robjects.r.matrix(source_sobal.T, nrow=source_sobal.shape[1], ncol=source_sobal.shape[0])

target_sobal = np.array(consensus_feature_an[consensus_feature_an.obs['data_source'] == 'target',:].obsm['X_sobolev_alignment'])
targetr = robjects.r.matrix(target_sobal.T, nrow=target_sobal.shape[1], ncol=target_sobal.shape[0])

robjects.r.assign("source", sourcer)
robjects.r.assign("target", targetr)

robjects.r('''
    interpolated_corrected.signal <- mnnCorrect(
        source,
        target,
        cos.norm.in = TRUE, 
        cos.norm.out = TRUE
    )
    corrected.frame <- interpolated_corrected.signal@assays@data$corrected
''')
consensus_feature_an.obsm['X_sobolev_alignment_mnn'] = robjects.r['corrected.frame'].T

In [None]:
sc.pp.neighbors(consensus_feature_an, use_rep='X_sobolev_alignment_mnn')
sc.tl.umap(consensus_feature_an)

In [None]:
sc.pl.umap(consensus_feature_an, color='data_source')

## Consensus feature interpretation

In [None]:
# WARNING: issue with gene_names = None, TO FIX
sobolev_alignment_clf.n_jobs = 5
linear_gene_weights = sobolev_alignment_clf.feature_analysis(max_order=1)#, gene_names=source_an.var.index)

In [None]:
# Linear weights of source PVs, ordered by first PV
sobolev_alignment_clf.pv_level_feature_weights_df['source'].T.sort_values(0)