# Dependencies and files

## Install dependencies

## Imports

In [1]:
import warnings
import scanpy as sc
import anndata as an
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import os

import time
from tqdm.notebook import tqdm
import requests

sc.settings.set_figure_params(dpi=80)
#sc.set_figure_params(facecolor="white", figsize=(8, 8))
warnings.simplefilter(action='ignore', category=FutureWarning)
sc.settings.verbosity = 3

# Global variables

In [None]:
de_dir_path = '../../data/DE/ANOVA_mean/'

In [None]:
layers = ['L1', 'L2', 'L3', 'L4', 'L5', "L6", 'WM']

In [None]:
cluster_color = ['red', 'yellow', 'orange', 'green', 'purple', 'blue', 'olive', 'pink', 'brown']

In [None]:
os.listdir(de_dir_path)

## Load files

### DE results

In [None]:
anova_res = pd.read_csv(de_dir_path + 'anova_mamm_mean.csv', index_col=0)
anova_genes = anova_res[anova_res.p_val_adj < 0.05].index
anova_res.head()

In [None]:
n_clusters = 7
anova_labels = pd.read_csv(de_dir_path + f'cluster_spectr_anova_mean_{n_clusters}.csv', index_col=0)['0'].sort_values()
anova_labels = anova_labels
anova_labels.name = 'ANOVA_labels'

In [None]:
ancova_labels = pd.read_csv('./results/human_clusters_ancova_01_cos_all_8.csv', index_col=0)['0'].sort_values()
ancova_labels

## Load normalized pseudobulks

In [None]:
pb_dir_path = 'results/'

In [None]:
adata_pb_norm = sc.read_h5ad(pb_dir_path + 'pb_mammals_samplenorm_mean_filtered.h5ad')
#adata_pb_norm_mean.var = anova_res
adata_pb_norm

In [None]:
background_genes = adata_pb_norm.var_names.tolist()
len(background_genes)

## Specie-specific genes

In [None]:
model_res = pd.read_csv('results/gene_selection_df.csv', index_col=0)
model_res.head()

In [None]:
import pickle
with open("results/ancova_perm_res.pkl", "rb") as file:
    ancova_results = pickle.load(file)

In [None]:
# import pickle
# with open("./results/specie_specific_genes_tr50.pkl", "rb") as file:
#     genes_dict77 = pickle.load(file)

In [None]:
pval_tr = 0.01
sign_genes = {key:value[value['p-value'] < pval_tr].index for key, value in ancova_results.items()}
sign_genes

Попробуем определить человеко-специфичные гены как $(HC\bigcap HM)\setminus CM$

In [None]:
hs_genes = sign_genes['human-chimp'].intersection(sign_genes['human-macaque']).difference(sign_genes['chimp-macaque'])
hs_genes, hs_genes.size

# Clusterisation

In [None]:
df = pd.DataFrame(adata_pb_norm.X, index=adata_pb_norm.obs_names, columns=adata_pb_norm.var_names)

In [None]:
sample_order = dict()
for cond in ['human', 'chimp', 'macaque']:
    samples = df.loc[df.index.str.contains(cond)].index
    order = sorted(samples.tolist(), key=lambda x: x[-2:])
    order_laminar = list(filter(lambda x: 'L' in x, order))
    order_wm = list(filter(lambda x: 'WM' in x, order))
    sample_order[cond] = order_wm + order_laminar
sample_order['all'] = sample_order['human'] + sample_order['chimp'] + sample_order['macaque']
df = df.loc[sample_order['all']]
df_hs = df[hs_genes].copy()

In [None]:
cluster_color = ['red', 'yellow', 'orange', 'green', 'purple', 'blue', 'olive', 'pink', 'brown', 'cyan', 'pink', 'violet', 'crimson', 'tab:red', 'tab:green']

In [None]:
n_clusters = ancova_labels.unique().size
order = ancova_labels.sort_values().index

In [None]:
colors = cluster_color[:n_clusters]
clusters = np.arange(n_clusters)

In [None]:
sns.set(font_scale=0.8)

In [None]:
from matplotlib.patches import bbox_artist
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import ticker

fig, ax = plt.subplots(figsize=(12, 8))
sns.heatmap(df_hs[order], ax=ax, cmap="RdBu_r", vmax=0.3, vmin=-0.3, center=0)
divider = make_axes_locatable(ax)
cax = divider.append_axes('top', size='2%', pad=0.05)
cmap = mpl.colors.ListedColormap(colors)

cluster_size = ancova_labels.value_counts().loc[clusters].values
cluster_pos = cluster_size.cumsum()
bounds = [0] + list(cluster_pos)
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
fig.colorbar(
    mpl.cm.ScalarMappable(cmap=cmap, norm=norm),
    cax=cax,
    ticks=bounds,
    orientation='horizontal',
    spacing='proportional'
)
cax.xaxis.set_major_locator(ticker.FixedLocator(cluster_pos - cluster_size / 2))
cax.xaxis.set_major_formatter(ticker.FixedFormatter(clusters))
cax.xaxis.tick_top()

## Cluster profiles

In [None]:
df_annotation = df.copy()
df_annotation['layer'] = adata_pb_norm.obs.layer
df_annotation['condition'] = adata_pb_norm.obs.condition

In [None]:
layer_mean = df_annotation.groupby(['condition', 'layer']).mean()
layer_mean.head()

In [None]:
# fig, axes = plt.subplots(2, 2,  figsize=(14, 3.5 * 2), gridspec_kw={'hspace': 0.4})
# for gene, ax in zip(layer_mean.columns, axes.flatten()):
#     (
#             layer_mean[gene]
#             .reorder_levels(['layer', 'condition'])
#             .unstack().loc[layers]
#             .plot
#             .line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax, marker='.')
#     )
#     ax.grid(False)

In [None]:
fig, axes = plt.subplots((n_clusters + 1) // 2, 2,  figsize=(14, 3.5 * n_clusters // 2), gridspec_kw={'hspace': 0.4})
sns.set(font_scale=1)

for label, ax in zip(ancova_labels.unique(), axes.flatten()):
    genes = ancova_labels[ancova_labels == label].index.tolist()

    (
        layer_mean[genes]
        .mean(axis=1)
        .reorder_levels(['layer', 'condition'])
        .unstack().loc[layers]
        .plot
        .line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax, marker='.')
    )
    ax.legend(fontsize=10)
    ax.set_title(f'Cluster {label}', fontsize=16)
    ax.grid(False)
    ax.hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')

In [None]:
grid = np.linspace(1, 7, 400)
conditions = ['human', 'chimp', 'macaque']
cond_colors = ['tab:red', 'tab:orange', 'tab:green']

In [None]:
df_cont = df_annotation.copy()
df_cont['layer'] = df_cont.layer.map(dict(zip(sorted(df_cont.layer.unique()), range(1, 8)))).astype(int)

In [None]:
from scipy.interpolate import UnivariateSpline

array = np.zeros((grid.size, hs_genes.size))
df_list = []

for specie in conditions:
    for i, gene in enumerate(hs_genes.to_list()):
        #fit spline
        subs_index = df_cont[df_cont.condition == specie].index
        targ_dataset = df_cont.loc[subs_index].copy().sort_values(by='layer')
        x = targ_dataset['layer'].values
        y = targ_dataset[gene].values
        spl = UnivariateSpline(x, y, k=3)
        array[:, i] = spl(grid)
    df_specie = pd.DataFrame(array, columns=hs_genes)
    df_specie['condition'] = specie
    df_list.append(df_specie.copy())
df_spline = pd.concat(df_list)

In [None]:
mpl.rcdefaults()
fig, axes = plt.subplots((n_clusters + 1) // 2, 2,  figsize=(14, 3.5 * n_clusters // 2), gridspec_kw={'hspace': 0.4})
for label, ax in zip(ancova_labels.unique(), axes.flatten()):
    genes = ancova_labels[ancova_labels == label].index.tolist()
    # plot the splines
    df_cluster_spline = df_spline[genes].mean(axis=1).to_frame()
    df_cluster_spline['condition'] = df_spline['condition']
    df_cluster_spline = df_cluster_spline.pivot(columns='condition', values=0)
    df_cluster_spline.index = grid
    df_cluster_spline.plot.line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax)

    # plot the dots
    # df_layers = layer_mean[genes].mean(axis=1).reorder_levels(['layer', 'condition']).unstack().loc[layers][conditions]
    # df_layers.index = np.arange(1, 8)
    # ax.set_prop_cycle(color=['tab:red', 'tab:orange', 'tab:green'])
    # ax.plot(df_layers, 'o', alpha=0.7)
    # ax.legend(conditions)
    
    
    ax.legend(fontsize=10, loc="upper left", bbox_to_anchor=(1, 1), prop={"size":12})
    ax.xaxis.set_major_locator(ticker.FixedLocator(np.arange(1, 8)))
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(layers))
    
    if label % 2 == 0:
        ax.get_legend().remove()
    ax.set_title(f'Cluster {label}', fontsize=16)
    ax.grid(False)
    #ax.hlines(0, xmin=1, xmax=6, colors='gray', linestyles='dashed')

## All genes

### Human

In [None]:
fig, ax = plt.subplots()

gene = 'NCAM2'
df_cluster_spline = df_spline[[gene, 'condition']].copy()
df_cluster_spline = df_cluster_spline.pivot(columns='condition', values=gene)
df_cluster_spline.index = grid
df_cluster_spline.plot.line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax)

df_cluster_values = df_cont[[gene, 'condition', 'layer']].copy()
palette = {'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}
sns.scatterplot(df_cont, x='layer', y=gene, hue='condition', palette=palette, ax=ax, legend=False, alpha=0.7, marker="$\circ$", s=200)
ax.set_ylabel('')
ax.set_title(gene, fontsize=16)
#df_cluster_values.plot.scatter(x='layer', y=gene, c='condition', ax=ax, cmap=cmap, colorbar=False, legend=True)
#ax.legend(['human', 'chimp', 'macaque'])

In [None]:
mpl.rcdefaults()

for label in ancova_labels.unique():
    genes = ancova_labels[ancova_labels == label].index.tolist()
    fig, axes = plt.subplots((len(genes) + 3) // 2, 2,  figsize=(14, 3.5 * (len(genes) + 3) // 2), gridspec_kw={'hspace': 0.6})
    fig.suptitle(f'Cluster {label}', fontsize=20)

    ax = axes[0][0]

    df_cluster_spline = df_spline[genes].mean(axis=1).to_frame()
    df_cluster_spline['condition'] = df_spline['condition']
    df_cluster_spline = df_cluster_spline.pivot(columns='condition', values=0)
    df_cluster_spline.index = grid
    df_cluster_spline.plot.line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax)

    # plot the dots
    # df_layers = layer_mean[genes].mean(axis=1).reorder_levels(['layer', 'condition']).unstack().loc[layers][conditions]
    # df_layers.index = np.arange(1, 8)
    # ax.set_prop_cycle(color=['tab:red', 'tab:orange', 'tab:green'])
    # ax.plot(df_layers, 'o', alpha=0.7)
    # ax.legend(conditions)
    
    
    ax.legend(fontsize=10, loc="upper left", bbox_to_anchor=(1, 1), prop={"size":12})
    ax.xaxis.set_major_locator(ticker.FixedLocator(np.arange(1, 8)))
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(layers))
    
    ax.set_title(f'Cluster {label}', fontsize=16)
    ax.grid(False)
    #ax.hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    fig.delaxes(axes[0][1])
    
    
    for i, (gene, ax) in tqdm(enumerate(zip(genes, axes.flatten()[2:]))):
        df_cluster_spline = df_spline[[gene, 'condition']].copy()
        df_cluster_spline = df_cluster_spline.pivot(columns='condition', values=gene)
        df_cluster_spline.index = grid
        df_cluster_spline.plot.line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax)
        
        df_cluster_values = df_cont[[gene, 'condition', 'layer']].copy()
        palette = {'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}
        sns.scatterplot(df_cont, x='layer', y=gene, hue='condition', palette=palette, ax=ax, legend=False, alpha=1, marker="$\circ$", s=100)
        ax.set_ylabel('')
        
        
        ax.legend(fontsize=10, loc="upper left", bbox_to_anchor=(1, 1), prop={"size":12})
        ax.xaxis.set_major_locator(ticker.FixedLocator(np.arange(1, 8)))
        ax.xaxis.set_major_formatter(ticker.FixedFormatter(layers))

        if i % 2 == 0:
            ax.get_legend().remove()
    
        ax.set_title(f'{gene}', fontsize=16)
        ax.grid(False)
        #ax.hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    plt.savefig(f'results/clusters{pval_tr}/hs_genes_splines_{pval_tr}_{n_clusters}_{label}.pdf')

In [None]:
import glob
from pypdf import PdfMerger

pdf_files = glob.glob(f'results/clusters{pval_tr}/hs_genes_splines_{pval_tr}_{n_clusters}*')
merger = PdfMerger()
for pdf in pdf_files:
    merger.append(pdf)
merger.write(f"results/clusters{pval_tr}/hs_genes_splines_{pval_tr}_{n_clusters}_all.pdf")
merger.close()

for file in pdf_files:
    os.remove(file.replace('\\', '/'))

In [None]:
mpl.rcdefaults()

for label in ancova_labels.unique():
    genes = ancova_labels[ancova_labels == label].index.tolist()
    fig, axes = plt.subplots((len(genes) + 3) // 2, 2,  figsize=(14, 3.5 * (len(genes) + 3) // 2), gridspec_kw={'hspace': 0.6})
    fig.suptitle(f'Cluster {label}', fontsize=20)

    ax = axes[0][0]

    (
        layer_mean[genes]
        .median(axis=1)
        .reorder_levels(['layer', 'condition'])
        .unstack().loc[layers]
        .plot
        .line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax, marker='.')
    )
    ax.legend(fontsize=10)
    ax.set_title(f'Cluster {label}', fontsize=16)
    ax.grid(False)
    ax.hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    fig.delaxes(axes[0][1])
    
    
    for gene, ax in tqdm(list(zip(genes, axes.flatten()[2:]))):
        (
            layer_mean[gene]
            .reorder_levels(['layer', 'condition'])
            .unstack().loc[layers]
            .plot
            .line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax, marker='.')
        )
        ax.legend(fontsize=10)
        ax.set_title(f'{gene}', fontsize=16)
        ax.grid(False)
        ax.hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    plt.savefig(f'results/clusters{pval_tr}/hs_genes_cluster_{pval_tr}_{n_clusters}_{label}.pdf')

In [None]:
import glob
from pypdf import PdfMerger

pdf_files = glob.glob(f'results/clusters{pval_tr}/hs_genes_cluster_{pval_tr}_{n_clusters}*')
merger = PdfMerger()
for pdf in pdf_files:
    merger.append(pdf)
merger.write(f"results/clusters{pval_tr}/hs_genes_cluster_{pval_tr}_{n_clusters}_all.pdf")
merger.close()

for file in pdf_files:
    os.remove(file.replace('\\', '/'))

## Levels of genes

In [None]:
hs_genes

In [None]:
pretty_genes = ['PHACTR3', 'MARCKS', 'LPL', 'COTL1', 'GRM3', 'SCD5', 'EIF3F', 'EEF1G', 'NOL10', 'RAB3GAP1', 'ZMYND8', 'GNPTAB','NRXN1',
                'NRXN2', 'KIF21B', 'STAU2', 'GNAZ', 'FOXP1', 'AKT3', 'SYNGAP1', 'ZBTB18', 'DYNLL2', 'CPNE6', 'OTUD4','LINGO1', 'SCN3B',
                'ATAD1', 'PPP2R2C', 'APLP2', 'NCAM2', 'SLC38A10', 'ASAP2', 'FAM171B', 'RBM23', 'ID2', 'ETFB', 'ENC1', 'EIF1B']

In [None]:
pb_dir_path = '../../data/pseudobulks/'

In [None]:
adata_pb = sc.read_h5ad(pb_dir_path + 'pb_mammals_filtered.h5ad')
adata_pb

In [None]:
sc.pp.normalize_total(adata_pb, target_sum=1e4)
sc.pp.log1p(adata_pb)

In [None]:
df_pb = pd.DataFrame(adata_pb.X, index=adata_pb.obs_names, columns=adata_pb.var_names)

In [None]:
df_pb = df_pb.loc[sample_order['all']]
#df_hs = df[hs_genes].copy()

In [None]:
df_annotation_pb = df_pb.copy()
df_annotation_pb['layer'] = adata_pb.obs.layer
df_annotation_pb['condition'] = adata_pb.obs.condition

In [None]:
sns.histplot(df_annotation_pb[df_annotation_pb.condition == 'human'].iloc[:, :-2].mean(axis=0))

In [None]:
layer_mean_pb = df_annotation_pb.groupby(['condition', 'layer']).mean()
layer_mean_pb.head()

In [None]:
mpl.rcdefaults()

#gene_labels = ancova_labels.loc[pretty_genes]
gene_labels = ancova_labels


for label in gene_labels.unique():
    genes = gene_labels[gene_labels == label].index.tolist()
    fig, axes = plt.subplots((len(genes) + 1), 2,  figsize=(14, 3.5 * (len(genes) + 1)), gridspec_kw={'hspace': 0.6})
    fig.suptitle(f'Cluster {label}', fontsize=20)

    ax = axes[0][0]

    (
        layer_mean[genes]
        .median(axis=1)
        .reorder_levels(['layer', 'condition'])
        .unstack().loc[layers]
        .plot
        .line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax, marker='.')
    )
    ax.legend(fontsize=10)
    ax.set_title(f'Cluster {label}', fontsize=16)
    ax.grid(False)
    #ax.hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    fig.delaxes(axes[0][1])
    
    
    for i, gene in tqdm(enumerate(genes)):
        (
            layer_mean[gene]
            .reorder_levels(['layer', 'condition'])
            .unstack().loc[layers]
            .plot
            .line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=axes[i + 1, 0], marker='.')
        )
        axes[i + 1, 0].legend(fontsize=10)
        axes[i + 1, 0].set_title(f'{gene}', fontsize=16)
        axes[i + 1, 0].grid(False)
        axes[i + 1, 0].hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')

        (
            layer_mean_pb[gene]
            .reorder_levels(['layer', 'condition'])
            .unstack().loc[layers]
            .plot
            .line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=axes[i + 1, 1], marker='.')
        )
        axes[i + 1, 1].legend(fontsize=10)
        axes[i + 1, 1].set_title(f'{gene} Log(CP10K)', fontsize=16)
        axes[i + 1, 1].grid(False)
        #axes[i + 1, 1].hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    plt.savefig(f'results/clusters{pval_tr}/hs_genes_levels_{pval_tr}_cluster_{n_clusters}_{label}.pdf')

In [None]:
import glob
from pypdf import PdfMerger

pdf_files = glob.glob(f'results/clusters{pval_tr}/hs_genes_levels_{pval_tr}_cluster_{n_clusters}*')
merger = PdfMerger()
for pdf in pdf_files:
    merger.append(pdf)
merger.write(f"results/clusters{pval_tr}/hs_genes_levels_{pval_tr}_cluster_{n_clusters}_all.pdf")
merger.close()

for file in pdf_files:
    os.remove(file.replace('\\', '/'))

In [None]:
df_cont_pb = df_annotation_pb.copy()
df_cont_pb['layer'] = df_cont_pb.layer.map(dict(zip(sorted(df_cont_pb.layer.unique()), range(1, 8)))).astype(int)

In [None]:
from scipy.interpolate import UnivariateSpline

array = np.zeros((grid.size, hs_genes.size))
df_list = []

for specie in conditions:
    for i, gene in enumerate(hs_genes.to_list()):
        #fit spline
        subs_index = df_cont_pb[df_cont_pb.condition == specie].index
        targ_dataset = df_cont_pb.loc[subs_index].copy().sort_values(by='layer')
        x = targ_dataset['layer'].values
        y = targ_dataset[gene].values
        spl = UnivariateSpline(x, y, k=3)
        array[:, i] = spl(grid)
    df_specie = pd.DataFrame(array, columns=hs_genes)
    df_specie['condition'] = specie
    df_list.append(df_specie.copy())
df_spline_pb = pd.concat(df_list)

In [None]:
mpl.rcdefaults()
gene_labels = ancova_labels


for label in gene_labels.unique():
    genes = gene_labels[gene_labels == label].index.tolist()
    fig, axes = plt.subplots((len(genes) + 1), 2,  figsize=(14, 3.5 * (len(genes) + 1)), gridspec_kw={'hspace': 0.6})
    fig.suptitle(f'Cluster {label}', fontsize=20)

    ax = axes[0][0]

    df_cluster_spline = df_spline[genes].mean(axis=1).to_frame()
    df_cluster_spline['condition'] = df_spline['condition']
    df_cluster_spline = df_cluster_spline.pivot(columns='condition', values=0)
    df_cluster_spline.index = grid
    df_cluster_spline.plot.line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=ax)

    # plot the dots
    # df_layers = layer_mean[genes].mean(axis=1).reorder_levels(['layer', 'condition']).unstack().loc[layers][conditions]
    # df_layers.index = np.arange(1, 8)
    # ax.set_prop_cycle(color=['tab:red', 'tab:orange', 'tab:green'])
    # ax.plot(df_layers, 'o', alpha=0.7)
    # ax.legend(conditions)
    
    
    ax.legend(fontsize=10, loc="upper left", bbox_to_anchor=(1, 1), prop={"size":12})
    ax.xaxis.set_major_locator(ticker.FixedLocator(np.arange(1, 8)))
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(layers))
    ax.set_title(f'Cluster {label}', fontsize=16)
    ax.grid(False)
    #ax.hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    fig.delaxes(axes[0][1])
    
    
    for i, gene in tqdm(enumerate(genes)):
        df_cluster_spline = df_spline[[gene, 'condition']].copy()
        df_cluster_spline = df_cluster_spline.pivot(columns='condition', values=gene)
        df_cluster_spline.index = grid
        df_cluster_spline.plot.line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=axes[i + 1, 0])
        
        df_cluster_values = df_cont[[gene, 'condition', 'layer']].copy()
        palette = {'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}
        sns.scatterplot(df_cont, x='layer', y=gene, hue='condition', palette=palette, ax=axes[i + 1, 0], legend=False, alpha=1, marker="$\circ$", s=100)
        axes[i + 1, 0].set_ylabel('')
        
        
        axes[i + 1, 0].legend(fontsize=10, loc="upper left", bbox_to_anchor=(1, 1), prop={"size":12})
        axes[i + 1, 0].set_title(f'{gene}', fontsize=16)
        axes[i + 1, 0].xaxis.set_major_locator(ticker.FixedLocator(np.arange(1, 8)))
        axes[i + 1, 0].xaxis.set_major_formatter(ticker.FixedFormatter(layers))
        axes[i + 1, 0].get_legend().remove()


        df_cluster_spline = df_spline_pb[[gene, 'condition']].copy()
        df_cluster_spline = df_cluster_spline.pivot(columns='condition', values=gene)
        df_cluster_spline.index = grid
        df_cluster_spline.plot.line(color={'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}, ax=axes[i + 1, 1])
        
        df_cluster_values = df_cont_pb[[gene, 'condition', 'layer']].copy()
        palette = {'human': 'tab:red', 'chimp': 'tab:orange', 'macaque': 'tab:green'}
        sns.scatterplot(df_cont_pb, x='layer', y=gene, hue='condition', palette=palette, ax=axes[i + 1, 1], legend=False, alpha=1, marker="$\circ$", s=100)
        axes[i + 1, 1].set_ylabel('')
        
        
        axes[i + 1, 1].legend(fontsize=10, loc="upper left", bbox_to_anchor=(1, 1), prop={"size":10})
        axes[i + 1, 1].set_title(f'{gene} Log(CP10K)', fontsize=16)
        axes[i + 1, 1].xaxis.set_major_locator(ticker.FixedLocator(np.arange(1, 8)))
        axes[i + 1, 1].xaxis.set_major_formatter(ticker.FixedFormatter(layers))
        
        #axes[i + 1, 1].hlines(0, xmin=0, xmax=6, colors='gray', linestyles='dashed')
    plt.savefig(f'results/clusters{pval_tr}/hs_genes_levels_splines_{pval_tr}_cluster_{n_clusters}_{label}.pdf')

In [None]:
import glob
from pypdf import PdfMerger

pdf_files = glob.glob(f'results/clusters{pval_tr}/hs_genes_levels_splines_{pval_tr}_cluster_{n_clusters}*')
merger = PdfMerger()
for pdf in pdf_files:
    merger.append(pdf)
merger.write(f"results/clusters{pval_tr}/hs_genes_levels_splines_{pval_tr}_cluster_{n_clusters}_all.pdf")
merger.close()

for file in pdf_files:
    os.remove(file.replace('\\', '/'))

# Slides

In [None]:
import squidpy as sq

## All spots

In [None]:
adata_human = sc.read_h5ad('../../data/raw_adata/human.h5ad')
adata_human.obs_names_make_unique()
adata_human

In [None]:
pretty_genes = ['PHACTR3', 'MARCKS', 'LPL', 'NRXN1', 'NRXN2']

In [None]:
sc.pp.normalize_total(adata_human, target_sum=1e4)
sc.pp.log1p(adata_human)

In [None]:
import matplotlib
matplotlib.rcdefaults()

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0.5, vmax=2.5, clip=True)

In [None]:
#fig, axes = plt.subplots(5, 1, figsize=(6, 15), gridspec_kw={'hspace': 0.5})
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0.5, vmax=2.5, clip=True)

gene = 'LPL'
sq.pl.spatial_scatter(adata_human,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'human_{gene}.pdf', norm=norm)

In [None]:
#fig, axes = plt.subplots(5, 1, figsize=(6, 15), gridspec_kw={'hspace': 0.5})
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=3, clip=True)

gene = 'EIF1B'
sq.pl.spatial_scatter(adata_human,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'human_{gene}.pdf', norm=norm)

In [None]:
#fig, axes = plt.subplots(5, 1, figsize=(6, 15), gridspec_kw={'hspace': 0.5})
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=2, clip=True)

gene = 'NCAM2'
sq.pl.spatial_scatter(adata_human,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'human_{gene}.pdf', norm=norm)

In [None]:
#fig, axes = plt.subplots(5, 1, figsize=(6, 15), gridspec_kw={'hspace': 0.5})
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=2.5, clip=True)

gene = 'FOXP1'
sq.pl.spatial_scatter(adata_human,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'human_{gene}.pdf', norm=norm)

In [None]:
adata_chimp = sc.read_h5ad('../../data/raw_adata/chimp.h5ad')
adata_chimp.obs_names_make_unique()
adata_chimp

In [None]:
sc.pp.normalize_total(adata_chimp, target_sum=1e4)
sc.pp.log1p(adata_chimp)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0.5, vmax=2.5, clip=True)

gene = 'LPL'
sq.pl.spatial_scatter(adata_chimp,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'chimp_{gene}.pdf', norm=norm)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0.5, vmax=2.5, clip=True)

gene = 'EIF1B'
sq.pl.spatial_scatter(adata_chimp,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'chimp_{gene}.pdf', norm=norm)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=2, clip=True)

gene = 'NCAM2'
sq.pl.spatial_scatter(adata_chimp,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'chimp_{gene}.pdf', norm=norm)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=2, clip=True)

gene = 'FOXP1'
sq.pl.spatial_scatter(adata_chimp,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'chimp_{gene}.pdf', norm=norm)

In [None]:
adata_macaque = sc.read_h5ad('../../data/raw_adata/macaque.h5ad')
adata_macaque.obs_names_make_unique()
adata_macaque

In [None]:
sc.pp.normalize_total(adata_macaque, target_sum=1e4)
sc.pp.log1p(adata_macaque)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=2, clip=True)

gene = 'LPL'
sq.pl.spatial_scatter(adata_macaque,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'macaque_{gene}.pdf', norm=norm)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=3, clip=True)

gene = 'EIF1B'
sq.pl.spatial_scatter(adata_macaque,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'macaque_{gene}.pdf', norm=norm)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=2, clip=True)

gene = 'NCAM2'
sq.pl.spatial_scatter(adata_macaque,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'macaque_{gene}.pdf', norm=norm)

In [None]:
cmap = matplotlib.cm.viridis
norm = matplotlib.colors.Normalize(vmin=0, vmax=2, clip=True)

gene = 'FOXP1'
sq.pl.spatial_scatter(adata_macaque,
                      color=[gene, 'label'],
                      library_key='sample_id', ncols=2, img=True, size=1.3, save=f'macaque_{gene}.pdf', norm=norm)

In [None]:
pretty_genes = ['LPL', 'EIF1B', 'NCAM2', 'FOXP1']

In [None]:
import glob
from pypdf import PdfMerger

for gene in pretty_genes:
    pdf_files = glob.glob(f'figures/*{gene}.pdf')
    merger = PdfMerger()
    for pdf in pdf_files:
        merger.append(pdf)
    merger.write(f"figures/{gene}_all.pdf")
    merger.close()

In [None]:
pdf_files = glob.glob(f'figures/*all.pdf')
merger = PdfMerger()
for pdf in pdf_files:
    merger.append(pdf)
merger.write(f"figures/pretty_genes_all.pdf")
merger.close()

## Filtered

In [None]:
adata_human = sc.read_h5ad('../../data/filtered_adata/human.h5ad')
adata_human.obs_names_make_unique()
adata_human

In [None]:
pretty_genes = ['PHACTR3', 'MARCKS', 'LPL', 'NRXN1', 'NRXN2']

In [None]:
sc.pp.normalize_total(adata_human, target_sum=1e4)
sc.pp.log1p(adata_human)

In [None]:
sq.pl.spatial_scatter(adata_human,
                      color=['LPL', 'FOXP1', 'NCAM2', 'label'],
                      library_key='sample_id', ncols=4, img=True, size=1.3, save='human_filtered_pretty_genes.pdf', norm=norm)

In [None]:
adata_chimp = sc.read_h5ad('../../data/filtered_adata/chimp.h5ad')
adata_chimp.obs_names_make_unique()
adata_chimp

In [None]:
sc.pp.normalize_total(adata_chimp, target_sum=1e4)
sc.pp.log1p(adata_chimp)

In [None]:
sq.pl.spatial_scatter(adata_chimp,
                      color=['LPL', 'FOXP1', 'NCAM2', 'label'],
                      library_key='sample_id', ncols=4, img=True, size=1.3, save='chimp_filtered_pretty_genes.pdf', norm=norm)

In [None]:
adata_macaque = sc.read_h5ad('../../data/filtered_adata/macaque.h5ad')
adata_macaque.obs_names_make_unique()
adata_macaque

In [None]:
sc.pp.normalize_total(adata_macaque, target_sum=1e4)
sc.pp.log1p(adata_macaque)

In [None]:
sq.pl.spatial_scatter(adata_macaque,
                      color=['LPL', 'FOXP1', 'NCAM2', 'label'],
                      library_key='sample_id', ncols=4, img=True, size=1.3, save='macaque_filtered_pretty_genes.pdf', norm=norm)