
# 10x Multiome Hippocampus SEAcells Computation

## Set-up environment and download data 
We will first create a directory to store the data and results

In [None]:
#supress warnings
import warnings
import sys
import os
import re
import os
from os import listdir
import json
import pickle
import dill
import random
from typing import Union, Dict, Sequence, Optional, List
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

os.environ["MODIN_ENGINE"] = "ray"
import ray

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

# SEAcells
import scanpy as sc
import SEACells

#set some figure parameters for nice display inside jupyternotebooks.
sc.settings.set_figure_params(dpi=80, frameon=False, figsize=(5, 5), facecolor='white')

#Set up working directory
work_dir = '../../data/SCENIC/ExNeu/'
listdir(work_dir)
if not os.path.exists(os.path.join(work_dir, 'model')):
    os.makedirs(os.path.join(work_dir, 'model'))

# Some plotting aesthetics
%matplotlib inline
sns.set_style('ticks')
matplotlib.rcParams['figure.figsize'] = [4, 4]
matplotlib.rcParams['figure.dpi'] = 100

## Metacell Identification With SEAcells

Load the Anndata object made in R

In [None]:
ad = sc.read_h5ad(os.path.join(work_dir, "objects/RNA.h5ad"))
ad

Ensure PCA and UMAP are in correct slots

In [None]:
ad.obsm['X_pca'] = ad.obsm['PCA'].to_numpy()[:,0:11]
ad.obsm['X_umap'] = ad.obsm['UMAP'].to_numpy()

#### Plot cell-types for reference

In [None]:
sc.pl.scatter(ad, basis='umap', color='clusters', frameon=False)

#### Initialise SEAcells model

In [None]:
## User defined parameters

## Core parameters
n_SEACells = len(ad.obs.index)/200 #One metacell for every 200 cells
build_kernel_on = 'X_pca' # key in ad.obsm to use for computing metacells
                          # This would be replaced by 'X_svd' for ATAC data

## Additional parameters
n_waypoint_eigs = 10 # Number of eigenvalues to consider when initializing metacells

In [None]:
model = SEACells.core.SEACells(ad,
                  build_kernel_on=build_kernel_on,
                  n_SEACells=n_SEACells,
                  n_waypoint_eigs=n_waypoint_eigs,
                  convergence_epsilon = 1e-5)

model.construct_kernel_matrix()
M = model.kernel_matrix

# Initialize archetypes
model.initialize_archetypes()

In [None]:
# Plot the initilization to ensure they are spread across phenotypic space
SEACells.plot.plot_initialization(ad, model)

#### Fit model

In [None]:
model.fit(min_iter=10, max_iter=50)

In [None]:
# Check for convergence
model.plot_convergence()

#### Plot Metacell Assignments

In [None]:
SEACells.plot.plot_2D(ad, key='X_umap', colour_metacells=True)

#### Plot Purity

In [None]:
SEACell_purity = SEACells.evaluate.compute_celltype_purity(ad, 'clusters')

plt.figure(figsize=(4,4))
sns.boxplot(data=SEACell_purity, y='clusters_purity')
plt.title('clusters Purity')
sns.despine()
plt.show()
plt.close()

SEACell_purity.head()

SEACell_purity.to_csv(os.path.join(work_dir, "MetaCellInfo.csv")) #Save Cluster Assignments

#### Plot Compactness

In [None]:
compactness = SEACells.evaluate.compactness(ad, 'X_pca')

plt.figure(figsize=(4,4))
sns.boxplot(data=compactness, y='compactness')
plt.title('Compactness')
sns.despine()
plt.show()
plt.close()

compactness.head()

#### Plot Seperation

In [None]:
separation = SEACells.evaluate.separation(ad, 'X_pca',nth_nbr=1)

plt.figure(figsize=(4,4))
sns.boxplot(data=separation, y='separation')
plt.title('Separation')
sns.despine()
plt.show()
plt.close()

separation.head()

#### Load ATAC Data and add SEAcells info

In [None]:
atac_ad = sc.read_h5ad(os.path.join(work_dir, "objects/ATAC.h5ad"))
atac_ad.obs['SEACell'] = ad.obs['SEACell']
atac_ad

#### Save objects

In [None]:
ad.write_h5ad(os.path.join(work_dir, "objects/RNA.h5ad"))
atac_ad.write_h5ad(os.path.join(work_dir, "objects/ATAC.h5ad"))