# Test the HALO Downstreaming Pipelines for regulation potential analysis

## Load the library

In [9]:
from typing import Dict, Iterable, Optional

import numpy as np
import torch
from torch.distributions import Normal, Poisson
from torch.distributions import kl_divergence as kld
from torch import tensor
import scanpy as sc
import anndata
import pandas as pd
import tools
import logging
import mira
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## You  might need to compute the Mira topic models for downstreaming tasks for Now

### If you need to train the MIRA model, see the downstream_pipeline_MIRA_topics.ipynb

## Load pretrained model

In [10]:
atac_model = mira.topics.AccessibilityTopicModel(counts_layer='counts',
                                                 dataset_loader_workers = 3)

rna_model = mira.topics.ExpressionTopicModel(
    endogenous_key='endog',
    exogenous_key='endog',
    counts_layer='counts',
    seed = 0,
    hidden=64, # to make the tutorial faster. In practice, use the default of 128!
)               

In [11]:
rna_data = anndata.read_h5ad("data/datasets/mouse_brain_rna_interim.h5ad")
atac_data =  anndata.read_h5ad("data/datasets/mouse_brain_atac_interim.h5ad")



In [12]:
## If there are pretrained models, load them

rna_model = mira.topics.ExpressionTopicModel.load('MIRA/rna_topic_model.pth')
atac_model = mira.topics.AccessibilityTopicModel.load('MIRA/atac_topic_model.pth')

INFO:mira.topic_model.base:Moving model to CPU for inference.
INFO:mira.topic_model.base:Moving model to device: cpu
INFO:mira.topic_model.base:Moving model to CPU for inference.
INFO:mira.topic_model.base:Moving model to device: cpu


### Training the MIRA model
#### Training the RNA model

In [1]:
## Set up Rtuner
rna_model.get_learning_rate_bounds(rna_data, eval_every=1, upper_bound_lr=5)
rna_model.trim_learning_rate_bounds(2.25, 1.25)
_ = rna_model.plot_learning_rate_bounds()

NameError: name 'rna_model' is not defined

In [None]:
Rtuner = mira.topics.TopicModelTuner(
    rna_model,
    save_name = 'tuning-tutorial',
    seed = 0,
    iters=32, # Recommend 30-64 iterations of tuning.
    max_topics = 15 # to speed up convergence! Leave at default of 55 when working with new data.
)
Rtuner.train_test_split(rna_data)

In [None]:
Rtuner.tune(rna_data, n_workers=5)

In [None]:
Rtuner.select_best_model(rna_data, record_umaps=True)

In [None]:
rna_model.save('MIRA/rna_topic_model_MB.pth')


In [None]:
rna_model.predict(rna_data)
rna_model.get_umap_features(rna_data, box_cox=0.5)
sc.pp.neighbors(rna_data, use_rep = 'X_umap_features', metric = 'manhattan')
sc.tl.umap(rna_data, min_dist=0.1, negative_sample_rate=0.05)

sc.pl.umap(rna_data, frameon=False, size = 10, alpha = 1, add_outline=True,
          outline_width=(0.1,0), color='celltype')

### Training the Atac Models


In [None]:
atac_data.raw = atac_data
atac_data.layers['counts'] = atac_data.raw.to_adata().X.copy()

In [None]:
atac_model.get_learning_rate_bounds(atac_data, eval_every=1, upper_bound_lr=5)
atac_model.trim_learning_rate_bounds(1e-5, 0.2)
_ = atac_model.plot_learning_rate_bounds()

In [None]:
Atuner = mira.topics.TopicModelTuner(
    atac_model,
    save_name = 'ATAC',
    seed = 0,
    iters=20, # Recommend 30-64 iterations of tuning.
    max_topics = 15 # to speed up convergence! Leave at default of 55 when working with new data.
)
Atuner.train_test_split(atac_data)

In [None]:
Atuner.select_best_model(atac_data, record_umaps=True)
atac_model.save('MIRA/atac_topic_model_MB.pth')


### Save the rna data and atac data

In [None]:
atac_data.write_h5ad("data/datasets/mouse_brain_atac_mira_interium.h5ad")
rna_data.write_h5ad("data/datasets/mouse_brain_rna_mira_interium.h5ad")