In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
from spatialdata import read_zarr

## LPS_10

In [None]:
Sample = 'LPS_10'

In [None]:
sdata = read_zarr(f"./{Sample}/raw_sdata.zarr")
sdata

In [None]:
areas = []
for pg in sdata.shapes["MultiModel"]["geometry"]:
    areas.append(pg.area)

In [None]:
adata = sdata.tables['table'].copy()
adata

In [None]:
adata.obs['area'] = areas
adata.obs

In [None]:
import pandas as pd

In [None]:
NP_Pad = pd.read_csv("/media/duan/DuanLab_Data/openFISH/TE/NP_Fold_V58_TE.csv", index_col=0)

In [None]:
adata.obsm['spatial'] = adata.obs[['x', 'y']].to_numpy()

In [None]:
sc.set_figure_params(figsize = (6,6), scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
adata.obsm["spatial"][:,1] =  0 - adata.obsm["spatial"][:,1]
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
tmpmatrix = adata.to_df().copy()

In [None]:
tmpmatrix = tmpmatrix.astype('float32')

In [None]:
import numpy as np

for col in tmpmatrix.columns:
    # tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * np.log2(NP_Pad.loc[col, 'Fold'] + 1)
    tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * NP_Pad.loc[col, 'Fold']

In [None]:
from scipy.sparse import csr_matrix

In [None]:
adata.layers['counts'] = adata.X.copy()
adata.X = csr_matrix(tmpmatrix)
adata.layers['counts_corrected'] = adata.X.copy()

In [None]:
adata

In [None]:
adata.var["FP"] = adata.var_names.str.startswith("sFP")

In [None]:
sc.pp.calculate_qc_metrics(
    adata, inplace=True, log1p=True, percent_top=None,qc_vars=["FP"]
)

In [None]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts", 'pct_counts_FP'],
    jitter=0.4,
    multi_panel=True,
)

In [None]:
import numpy as np
from tqdm import tqdm

In [None]:
adata

In [None]:
adata.obs['total_counts'].mean()

In [None]:
adata.obs['total_counts'].median()

In [None]:
np.max(adata.obs['total_counts_FP'])

In [None]:
adata.obs['total_counts_FP'].mean()

In [None]:
# Filter cells according to Cell Volume
adata.obs["area"].mean()

In [None]:
np.percentile(adata.obs["area"], 1)

In [None]:
np.percentile(adata.obs["area"], 90)

In [None]:
np.percentile(adata.obs["area"], 50)

In [None]:
adata = adata[(adata.obs["area"] > 600) & (adata.obs["area"] < (3 * adata.obs["area"].mean())),:].copy()
adata

In [None]:
adata.obs['total_counts'].median()

In [None]:
adata = adata[adata.obs.pct_counts_FP < 0.1, :].copy()
sc.pp.filter_cells(adata, min_genes=2)
sc.pp.filter_cells(adata, max_counts=800)
sc.pp.filter_cells(adata, min_counts=10)

In [None]:
BLACK_LIST = ['sFP']

In [None]:
adata = adata[:, [x for x in adata.var_names if x not in BLACK_LIST]].copy()

In [None]:
ref = sc.read_h5ad("/media/duan/DuanLab_Data/openFISH/ABA110_Demo2/Rep2/annotated.h5ad")
ref

In [None]:
ref.X = ref.layers['counts_corrected'].copy()

In [None]:
adata.obs['transfer'] = 'target'
adata.obs['cell_type'] = 'target'

ref.obs['transfer'] = 'source'

In [None]:
merged = sc.concat([adata, ref], merge="same")
merged

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(merged.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
merged.X = csr_matrix((merged.X.T / cell_area).T)

In [None]:
# Normalizing to median total counts
sc.pp.normalize_total(merged, target_sum=109)
# Logarithmize the data
sc.pp.log1p(merged)

In [None]:
sc.tl.pca(merged, svd_solver='arpack')

In [None]:
import scanpy.external as sce

In [None]:
sce.pp.harmony_integrate(merged, ["transfer"], max_iter_harmony = 100)

In [None]:
sc.pp.neighbors(merged, use_rep = "X_pca_harmony")

In [None]:
sc.tl.umap(merged)

In [None]:
sc.pl.umap(merged, color=["transfer", 'cell_type'], size = 2)

In [None]:
from scipy.spatial import KDTree
from tqdm import tqdm
import numpy as np

In [None]:
tmp = merged[merged.obs['transfer'] == 'source'].copy()
tmp

In [None]:
ALL_CellTypes = []
ALL_Centers = []

for ct in tmp.obs['cell_type'].unique():
    ALL_Centers.append(np.mean(tmp.obsm['X_pca_harmony'][tmp.obs['cell_type'] == ct,:], axis = 0))
    ALL_CellTypes.append(ct)

In [None]:
tree = KDTree(ALL_Centers)

In [None]:
transfer_celltype = []
for i in tqdm(range(merged.shape[0])):
    if merged.obs["cell_type"][i] == "target":
        _, index = tree.query(merged.obsm["X_pca_harmony"][i])
        transfer_celltype.append(ALL_CellTypes[index])
    else:
        transfer_celltype.append(merged.obs["cell_type"][i])

In [None]:
merged.obs["transfer_cell_type"] = transfer_celltype

In [None]:
sc.pl.umap(merged, color=["transfer_cell_type"], size = 2)

In [None]:
adata

In [None]:
adata.obs['cell_type'] = merged[merged.obs['transfer'] == 'target'].obs['transfer_cell_type'].to_numpy()

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type')

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', groups = 'Inh Vip', size = 20)

In [None]:
adata.write_h5ad(f'data/{Sample}_annotated.h5ad')

## LPS_7

In [None]:
Sample = 'LPS_7'

In [None]:
sdata = read_zarr(f"./{Sample}/raw_sdata.zarr")
sdata

In [None]:
areas = []
for pg in sdata.shapes["MultiModel"]["geometry"]:
    areas.append(pg.area)

In [None]:
adata = sdata.tables['table'].copy()
adata

In [None]:
adata.obs['area'] = areas
adata.obs

In [None]:
import pandas as pd

In [None]:
NP_Pad = pd.read_csv("/media/duan/DuanLab_Data/openFISH/TE/NP_Fold_V58_TE.csv", index_col=0)

In [None]:
adata.obsm['spatial'] = adata.obs[['x', 'y']].to_numpy()

In [None]:
sc.set_figure_params(figsize = (6,6), scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
adata.obsm["spatial"][:,1] =  0 - adata.obsm["spatial"][:,1]
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
tmpmatrix = adata.to_df().copy()

In [None]:
tmpmatrix = tmpmatrix.astype('float32')

In [None]:
import numpy as np

for col in tmpmatrix.columns:
    # tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * np.log2(NP_Pad.loc[col, 'Fold'] + 1)
    tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * NP_Pad.loc[col, 'Fold']

In [None]:
from scipy.sparse import csr_matrix

In [None]:
adata.layers['counts'] = adata.X.copy()
adata.X = csr_matrix(tmpmatrix)
adata.layers['counts_corrected'] = adata.X.copy()

In [None]:
adata

In [None]:
adata.var["FP"] = adata.var_names.str.startswith("sFP")

In [None]:
sc.pp.calculate_qc_metrics(
    adata, inplace=True, log1p=True, percent_top=None,qc_vars=["FP"]
)

In [None]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts", 'pct_counts_FP'],
    jitter=0.4,
    multi_panel=True,
)

In [None]:
import numpy as np
from tqdm import tqdm

In [None]:
adata

In [None]:
adata.obs['total_counts'].mean()

In [None]:
adata.obs['total_counts'].median()

In [None]:
np.max(adata.obs['total_counts_FP'])

In [None]:
adata.obs['total_counts_FP'].mean()

In [None]:
# Filter cells according to Cell Volume
adata.obs["area"].mean()

In [None]:
np.percentile(adata.obs["area"], 1)

In [None]:
np.percentile(adata.obs["area"], 90)

In [None]:
np.percentile(adata.obs["area"], 50)

In [None]:
adata = adata[(adata.obs["area"] > 600) & (adata.obs["area"] < (3 * adata.obs["area"].mean())),:].copy()
adata

In [None]:
adata.obs['total_counts'].median()

In [None]:
adata = adata[adata.obs.pct_counts_FP < 0.1, :].copy()
sc.pp.filter_cells(adata, min_genes=2)
sc.pp.filter_cells(adata, max_counts=900)
sc.pp.filter_cells(adata, min_counts=10)

In [None]:
BLACK_LIST = ['sFP']

In [None]:
adata = adata[:, [x for x in adata.var_names if x not in BLACK_LIST]].copy()

In [None]:
ref = sc.read_h5ad("/media/duan/DuanLab_Data/openFISH/ABA110_Demo2/Rep2/annotated.h5ad")
ref

In [None]:
ref.X = ref.layers['counts_corrected'].copy()

In [None]:
adata.obs['transfer'] = 'target'
adata.obs['cell_type'] = 'target'

ref.obs['transfer'] = 'source'

In [None]:
merged = sc.concat([adata, ref], merge="same")
merged

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(merged.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
merged.X = csr_matrix((merged.X.T / cell_area).T)

In [None]:
# Normalizing to median total counts
sc.pp.normalize_total(merged, target_sum=109)
# Logarithmize the data
sc.pp.log1p(merged)

In [None]:
sc.tl.pca(merged, svd_solver='arpack')

In [None]:
import scanpy.external as sce

In [None]:
sce.pp.harmony_integrate(merged, ["transfer"], max_iter_harmony = 100)

In [None]:
sc.pp.neighbors(merged, use_rep = "X_pca_harmony")

In [None]:
sc.tl.umap(merged)

In [None]:
sc.pl.umap(merged, color=["transfer", 'cell_type'], size = 2)

In [None]:
from scipy.spatial import KDTree
from tqdm import tqdm
import numpy as np

In [None]:
tmp = merged[merged.obs['transfer'] == 'source'].copy()
tmp

In [None]:
ALL_CellTypes = []
ALL_Centers = []

for ct in tmp.obs['cell_type'].unique():
    ALL_Centers.append(np.mean(tmp.obsm['X_pca_harmony'][tmp.obs['cell_type'] == ct,:], axis = 0))
    ALL_CellTypes.append(ct)

In [None]:
tree = KDTree(ALL_Centers)

In [None]:
transfer_celltype = []
for i in tqdm(range(merged.shape[0])):
    if merged.obs["cell_type"][i] == "target":
        _, index = tree.query(merged.obsm["X_pca_harmony"][i])
        transfer_celltype.append(ALL_CellTypes[index])
    else:
        transfer_celltype.append(merged.obs["cell_type"][i])

In [None]:
merged.obs["transfer_cell_type"] = transfer_celltype

In [None]:
sc.pl.umap(merged, color=["transfer_cell_type"], size = 2)

In [None]:
adata

In [None]:
adata.obs['cell_type'] = merged[merged.obs['transfer'] == 'target'].obs['transfer_cell_type'].to_numpy()

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type')

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', groups = 'Inh Vip', size = 20)

In [None]:
adata.write_h5ad(f'data/{Sample}_annotated.h5ad')

## LPS_8

In [None]:
Sample = 'LPS_8'

In [None]:
sdata = read_zarr(f"./{Sample}/raw_sdata.zarr")
sdata

In [None]:
areas = []
for pg in sdata.shapes["MultiModel"]["geometry"]:
    areas.append(pg.area)

In [None]:
adata = sdata.tables['table'].copy()
adata

In [None]:
adata.obs['area'] = areas
adata.obs

In [None]:
import pandas as pd

In [None]:
NP_Pad = pd.read_csv("/media/duan/DuanLab_Data/openFISH/TE/NP_Fold_V58_TE.csv", index_col=0)

In [None]:
adata.obsm['spatial'] = adata.obs[['x', 'y']].to_numpy()

In [None]:
sc.set_figure_params(figsize = (6,6), scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
adata.obsm["spatial"][:,1] =  0 - adata.obsm["spatial"][:,1]
sc.pl.embedding(adata, basis = 'spatial', color = 'TE_RLTR6_Mm', size = 10)

In [None]:
tmpmatrix = adata.to_df().copy()

In [None]:
tmpmatrix = tmpmatrix.astype('float32')

In [None]:
import numpy as np

for col in tmpmatrix.columns:
    # tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * np.log2(NP_Pad.loc[col, 'Fold'] + 1)
    tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * NP_Pad.loc[col, 'Fold']

In [None]:
from scipy.sparse import csr_matrix

In [None]:
adata.layers['counts'] = adata.X.copy()
adata.X = csr_matrix(tmpmatrix)
adata.layers['counts_corrected'] = adata.X.copy()

In [None]:
adata

In [None]:
adata.var["FP"] = adata.var_names.str.startswith("sFP")

In [None]:
sc.pp.calculate_qc_metrics(
    adata, inplace=True, log1p=True, percent_top=None,qc_vars=["FP"]
)

In [None]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts", 'pct_counts_FP'],
    jitter=0.4,
    multi_panel=True,
)

In [None]:
import numpy as np
from tqdm import tqdm

In [None]:
adata

In [None]:
adata.obs['total_counts'].mean()

In [None]:
adata.obs['total_counts'].median()

In [None]:
np.max(adata.obs['total_counts_FP'])

In [None]:
adata.obs['total_counts_FP'].mean()

In [None]:
# Filter cells according to Cell Volume
adata.obs["area"].mean()

In [None]:
np.percentile(adata.obs["area"], 1)

In [None]:
np.percentile(adata.obs["area"], 90)

In [None]:
np.percentile(adata.obs["area"], 50)

In [None]:
adata = adata[(adata.obs["area"] > 600) & (adata.obs["area"] < (3 * adata.obs["area"].mean())),:].copy()
adata

In [None]:
adata.obs['total_counts'].median()

In [None]:
adata = adata[adata.obs.pct_counts_FP < 0.1, :].copy()
sc.pp.filter_cells(adata, min_genes=2)
sc.pp.filter_cells(adata, max_counts=900)
sc.pp.filter_cells(adata, min_counts=10)

In [None]:
BLACK_LIST = ['sFP']

In [None]:
adata = adata[:, [x for x in adata.var_names if x not in BLACK_LIST]].copy()

In [None]:
ref = sc.read_h5ad("/media/duan/DuanLab_Data/openFISH/ABA110_Demo2/Rep2/annotated.h5ad")
ref

In [None]:
ref.X = ref.layers['counts_corrected'].copy()

In [None]:
adata.obs['transfer'] = 'target'
adata.obs['cell_type'] = 'target'

ref.obs['transfer'] = 'source'

In [None]:
merged = sc.concat([adata, ref], merge="same")
merged

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(merged.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
merged.X = csr_matrix((merged.X.T / cell_area).T)

In [None]:
# Normalizing to median total counts
sc.pp.normalize_total(merged, target_sum=109)
# Logarithmize the data
sc.pp.log1p(merged)

In [None]:
sc.tl.pca(merged, svd_solver='arpack')

In [None]:
import scanpy.external as sce

In [None]:
sce.pp.harmony_integrate(merged, ["transfer"], max_iter_harmony = 100)

In [None]:
sc.pp.neighbors(merged, use_rep = "X_pca_harmony")

In [None]:
sc.tl.umap(merged)

In [None]:
sc.pl.umap(merged, color=["transfer", 'cell_type'], size = 2)

In [None]:
from scipy.spatial import KDTree
from tqdm import tqdm
import numpy as np

In [None]:
tmp = merged[merged.obs['transfer'] == 'source'].copy()
tmp

In [None]:
ALL_CellTypes = []
ALL_Centers = []

for ct in tmp.obs['cell_type'].unique():
    ALL_Centers.append(np.mean(tmp.obsm['X_pca_harmony'][tmp.obs['cell_type'] == ct,:], axis = 0))
    ALL_CellTypes.append(ct)

In [None]:
tree = KDTree(ALL_Centers)

In [None]:
transfer_celltype = []
for i in tqdm(range(merged.shape[0])):
    if merged.obs["cell_type"][i] == "target":
        _, index = tree.query(merged.obsm["X_pca_harmony"][i])
        transfer_celltype.append(ALL_CellTypes[index])
    else:
        transfer_celltype.append(merged.obs["cell_type"][i])

In [None]:
merged.obs["transfer_cell_type"] = transfer_celltype

In [None]:
sc.pl.umap(merged, color=["transfer_cell_type"], size = 2)

In [None]:
adata

In [None]:
adata.obs['cell_type'] = merged[merged.obs['transfer'] == 'target'].obs['transfer_cell_type'].to_numpy()

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type')

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', groups = 'Inh Vip', size = 20)

In [None]:
adata.write_h5ad(f'data/{Sample}_annotated.h5ad')

## Saline2

In [None]:
Sample = 'Saline_2'

In [None]:
sdata = read_zarr(f"./{Sample}/raw_sdata.zarr")
sdata

In [None]:
areas = []
for pg in sdata.shapes["MultiModel"]["geometry"]:
    areas.append(pg.area)

In [None]:
adata = sdata.tables['table'].copy()
adata

In [None]:
adata.obs['area'] = areas
adata.obs

In [None]:
import pandas as pd

In [None]:
NP_Pad = pd.read_csv("/media/duan/DuanLab_Data/openFISH/TE/NP_Fold_V58_TE.csv", index_col=0)

In [None]:
adata.obsm['spatial'] = adata.obs[['x', 'y']].to_numpy()

In [None]:
sc.set_figure_params(figsize = (6,6), scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
adata.obsm["spatial"][:,1] =  0 - adata.obsm["spatial"][:,1]
sc.pl.embedding(adata, basis = 'spatial', color = 'TE_RLTR6_Mm', size = 10)

In [None]:
tmpmatrix = adata.to_df().copy()

In [None]:
tmpmatrix = tmpmatrix.astype('float32')

In [None]:
import numpy as np

for col in tmpmatrix.columns:
    # tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * np.log2(NP_Pad.loc[col, 'Fold'] + 1)
    tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * NP_Pad.loc[col, 'Fold']

In [None]:
from scipy.sparse import csr_matrix

In [None]:
adata.layers['counts'] = adata.X.copy()
adata.X = csr_matrix(tmpmatrix)
adata.layers['counts_corrected'] = adata.X.copy()

In [None]:
adata

In [None]:
adata.var["FP"] = adata.var_names.str.startswith("sFP")

In [None]:
sc.pp.calculate_qc_metrics(
    adata, inplace=True, log1p=True, percent_top=None,qc_vars=["FP"]
)

In [None]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts", 'pct_counts_FP'],
    jitter=0.4,
    multi_panel=True,
)

In [None]:
import numpy as np
from tqdm import tqdm

In [None]:
adata

In [None]:
adata.obs['total_counts'].mean()

In [None]:
adata.obs['total_counts'].median()

In [None]:
np.max(adata.obs['total_counts_FP'])

In [None]:
adata.obs['total_counts_FP'].mean()

In [None]:
# Filter cells according to Cell Volume
adata.obs["area"].mean()

In [None]:
np.percentile(adata.obs["area"], 1)

In [None]:
np.percentile(adata.obs["area"], 90)

In [None]:
np.percentile(adata.obs["area"], 50)

In [None]:
adata = adata[(adata.obs["area"] > 600) & (adata.obs["area"] < (3 * adata.obs["area"].mean())),:].copy()
adata

In [None]:
adata.obs['total_counts'].median()

In [None]:
adata = adata[adata.obs.pct_counts_FP < 0.1, :].copy()
sc.pp.filter_cells(adata, min_genes=2)
sc.pp.filter_cells(adata, max_counts=900)
sc.pp.filter_cells(adata, min_counts=10)

In [None]:
BLACK_LIST = ['sFP']

In [None]:
adata = adata[:, [x for x in adata.var_names if x not in BLACK_LIST]].copy()

In [None]:
ref = sc.read_h5ad("/media/duan/DuanLab_Data/openFISH/ABA110_Demo2/Rep2/annotated.h5ad")
ref

In [None]:
ref.X = ref.layers['counts_corrected'].copy()

In [None]:
adata.obs['transfer'] = 'target'
adata.obs['cell_type'] = 'target'

ref.obs['transfer'] = 'source'

In [None]:
merged = sc.concat([adata, ref], merge="same")
merged

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(merged.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
merged.X = csr_matrix((merged.X.T / cell_area).T)

In [None]:
# Normalizing to median total counts
sc.pp.normalize_total(merged, target_sum=109)
# Logarithmize the data
sc.pp.log1p(merged)

In [None]:
sc.tl.pca(merged, svd_solver='arpack')

In [None]:
import scanpy.external as sce

In [None]:
sce.pp.harmony_integrate(merged, ["transfer"], max_iter_harmony = 100)

In [None]:
sc.pp.neighbors(merged, use_rep = "X_pca_harmony")

In [None]:
sc.tl.umap(merged)

In [None]:
sc.pl.umap(merged, color=["transfer", 'cell_type'], size = 2)

In [None]:
from scipy.spatial import KDTree
from tqdm import tqdm
import numpy as np

In [None]:
tmp = merged[merged.obs['transfer'] == 'source'].copy()
tmp

In [None]:
ALL_CellTypes = []
ALL_Centers = []

for ct in tmp.obs['cell_type'].unique():
    ALL_Centers.append(np.mean(tmp.obsm['X_pca_harmony'][tmp.obs['cell_type'] == ct,:], axis = 0))
    ALL_CellTypes.append(ct)

In [None]:
tree = KDTree(ALL_Centers)

In [None]:
transfer_celltype = []
for i in tqdm(range(merged.shape[0])):
    if merged.obs["cell_type"][i] == "target":
        _, index = tree.query(merged.obsm["X_pca_harmony"][i])
        transfer_celltype.append(ALL_CellTypes[index])
    else:
        transfer_celltype.append(merged.obs["cell_type"][i])

In [None]:
merged.obs["transfer_cell_type"] = transfer_celltype

In [None]:
sc.pl.umap(merged, color=["transfer_cell_type"], size = 2)

In [None]:
adata

In [None]:
adata.obs['cell_type'] = merged[merged.obs['transfer'] == 'target'].obs['transfer_cell_type'].to_numpy()

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type')

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', groups = 'Inh Vip', size = 20)

In [None]:
adata.write_h5ad(f'data/{Sample}_annotated.h5ad')

## Saline4

In [None]:
Sample = 'Saline_4'

In [None]:
sdata = read_zarr(f"./{Sample}/raw_sdata.zarr")
sdata

In [None]:
areas = []
for pg in sdata.shapes["MultiModel"]["geometry"]:
    areas.append(pg.area)

In [None]:
adata = sdata.tables['table'].copy()
adata

In [None]:
adata.obs['area'] = areas
adata.obs

In [None]:
import pandas as pd

In [None]:
NP_Pad = pd.read_csv("/media/duan/DuanLab_Data/openFISH/TE/NP_Fold_V58_TE.csv", index_col=0)

In [None]:
adata.obsm['spatial'] = adata.obs[['x', 'y']].to_numpy()

In [None]:
sc.set_figure_params(figsize = (6,6), scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
adata.obsm["spatial"][:,1] =  0 - adata.obsm["spatial"][:,1]
sc.pl.embedding(adata, basis = 'spatial', color = 'TE_RLTR6_Mm', size = 10)

In [None]:
tmpmatrix = adata.to_df().copy()

In [None]:
tmpmatrix = tmpmatrix.astype('float32')

In [None]:
import numpy as np

for col in tmpmatrix.columns:
    # tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * np.log2(NP_Pad.loc[col, 'Fold'] + 1)
    tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * NP_Pad.loc[col, 'Fold']

In [None]:
from scipy.sparse import csr_matrix

In [None]:
adata.layers['counts'] = adata.X.copy()
adata.X = csr_matrix(tmpmatrix)
adata.layers['counts_corrected'] = adata.X.copy()

In [None]:
adata

In [None]:
adata.var["FP"] = adata.var_names.str.startswith("sFP")

In [None]:
sc.pp.calculate_qc_metrics(
    adata, inplace=True, log1p=True, percent_top=None,qc_vars=["FP"]
)

In [None]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts", 'pct_counts_FP'],
    jitter=0.4,
    multi_panel=True,
)

In [None]:
import numpy as np
from tqdm import tqdm

In [None]:
adata

In [None]:
adata.obs['total_counts'].mean()

In [None]:
adata.obs['total_counts'].median()

In [None]:
np.max(adata.obs['total_counts_FP'])

In [None]:
adata.obs['total_counts_FP'].mean()

In [None]:
# Filter cells according to Cell Volume
adata.obs["area"].mean()

In [None]:
np.percentile(adata.obs["area"], 1)

In [None]:
np.percentile(adata.obs["area"], 90)

In [None]:
np.percentile(adata.obs["area"], 50)

In [None]:
adata = adata[(adata.obs["area"] > 600) & (adata.obs["area"] < (3 * adata.obs["area"].mean())),:].copy()
adata

In [None]:
adata.obs['total_counts'].median()

In [None]:
adata = adata[adata.obs.pct_counts_FP < 0.1, :].copy()
sc.pp.filter_cells(adata, min_genes=2)
sc.pp.filter_cells(adata, max_counts=900)
sc.pp.filter_cells(adata, min_counts=10)

In [None]:
BLACK_LIST = ['sFP']

In [None]:
adata = adata[:, [x for x in adata.var_names if x not in BLACK_LIST]].copy()

In [None]:
ref = sc.read_h5ad("/media/duan/DuanLab_Data/openFISH/ABA110_Demo2/Rep2/annotated.h5ad")
ref

In [None]:
ref.X = ref.layers['counts_corrected'].copy()

In [None]:
adata.obs['transfer'] = 'target'
adata.obs['cell_type'] = 'target'

ref.obs['transfer'] = 'source'

In [None]:
merged = sc.concat([adata, ref], merge="same")
merged

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(merged.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
merged.X = csr_matrix((merged.X.T / cell_area).T)

In [None]:
# Normalizing to median total counts
sc.pp.normalize_total(merged, target_sum=109)
# Logarithmize the data
sc.pp.log1p(merged)

In [None]:
sc.tl.pca(merged, svd_solver='arpack')

In [None]:
import scanpy.external as sce

In [None]:
sce.pp.harmony_integrate(merged, ["transfer"], max_iter_harmony = 100)

In [None]:
sc.pp.neighbors(merged, use_rep = "X_pca_harmony")

In [None]:
sc.tl.umap(merged)

In [None]:
sc.pl.umap(merged, color=["transfer", 'cell_type'], size = 2)

In [None]:
from scipy.spatial import KDTree
from tqdm import tqdm
import numpy as np

In [None]:
tmp = merged[merged.obs['transfer'] == 'source'].copy()
tmp

In [None]:
ALL_CellTypes = []
ALL_Centers = []

for ct in tmp.obs['cell_type'].unique():
    ALL_Centers.append(np.mean(tmp.obsm['X_pca_harmony'][tmp.obs['cell_type'] == ct,:], axis = 0))
    ALL_CellTypes.append(ct)

In [None]:
tree = KDTree(ALL_Centers)

In [None]:
transfer_celltype = []
for i in tqdm(range(merged.shape[0])):
    if merged.obs["cell_type"][i] == "target":
        _, index = tree.query(merged.obsm["X_pca_harmony"][i])
        transfer_celltype.append(ALL_CellTypes[index])
    else:
        transfer_celltype.append(merged.obs["cell_type"][i])

In [None]:
merged.obs["transfer_cell_type"] = transfer_celltype

In [None]:
sc.pl.umap(merged, color=["transfer_cell_type"], size = 2)

In [None]:
adata

In [None]:
adata.obs['cell_type'] = merged[merged.obs['transfer'] == 'target'].obs['transfer_cell_type'].to_numpy()

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type')

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', groups = 'Inh Vip', size = 20)

In [None]:
adata.write_h5ad(f'data/{Sample}_annotated.h5ad')

## Saline9

In [None]:
Sample = 'Saline_9'

In [None]:
sdata = read_zarr(f"./{Sample}/raw_sdata.zarr")
sdata

In [None]:
areas = []
for pg in sdata.shapes["MultiModel"]["geometry"]:
    areas.append(pg.area)

In [None]:
adata = sdata.tables['table'].copy()
adata

In [None]:
adata.obs['area'] = areas
adata.obs

In [None]:
import pandas as pd

In [None]:
NP_Pad = pd.read_csv("/media/duan/DuanLab_Data/openFISH/TE/NP_Fold_V58_TE.csv", index_col=0)

In [None]:
adata.obsm['spatial'] = adata.obs[['x', 'y']].to_numpy()

In [None]:
sc.set_figure_params(figsize = (6,6), scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = 'Cck', size = 10)

In [None]:
adata.obsm["spatial"][:,1] =  0 - adata.obsm["spatial"][:,1]
sc.pl.embedding(adata, basis = 'spatial', color = 'TE_RLTR6_Mm', size = 10)

In [None]:
tmpmatrix = adata.to_df().copy()

In [None]:
tmpmatrix = tmpmatrix.astype('float32')

In [None]:
import numpy as np

for col in tmpmatrix.columns:
    # tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * np.log2(NP_Pad.loc[col, 'Fold'] + 1)
    tmpmatrix.loc[:,col] = tmpmatrix.loc[:,col] * NP_Pad.loc[col, 'Fold']

In [None]:
from scipy.sparse import csr_matrix

In [None]:
adata.layers['counts'] = adata.X.copy()
adata.X = csr_matrix(tmpmatrix)
adata.layers['counts_corrected'] = adata.X.copy()

In [None]:
adata

In [None]:
adata.var["FP"] = adata.var_names.str.startswith("sFP")

In [None]:
sc.pp.calculate_qc_metrics(
    adata, inplace=True, log1p=True, percent_top=None,qc_vars=["FP"]
)

In [None]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts", 'pct_counts_FP'],
    jitter=0.4,
    multi_panel=True,
)

In [None]:
import numpy as np
from tqdm import tqdm

In [None]:
adata

In [None]:
adata.obs['total_counts'].mean()

In [None]:
adata.obs['total_counts'].median()

In [None]:
np.max(adata.obs['total_counts_FP'])

In [None]:
adata.obs['total_counts_FP'].mean()

In [None]:
# Filter cells according to Cell Volume
adata.obs["area"].mean()

In [None]:
np.percentile(adata.obs["area"], 1)

In [None]:
np.percentile(adata.obs["area"], 90)

In [None]:
np.percentile(adata.obs["area"], 50)

In [None]:
adata = adata[(adata.obs["area"] > 600) & (adata.obs["area"] < (3 * adata.obs["area"].mean())),:].copy()
adata

In [None]:
adata.obs['total_counts'].median()

In [None]:
adata = adata[adata.obs.pct_counts_FP < 0.1, :].copy()
sc.pp.filter_cells(adata, min_genes=2)
sc.pp.filter_cells(adata, max_counts=900)
sc.pp.filter_cells(adata, min_counts=10)

In [None]:
BLACK_LIST = ['sFP']

In [None]:
adata = adata[:, [x for x in adata.var_names if x not in BLACK_LIST]].copy()

In [None]:
ref = sc.read_h5ad("/media/duan/DuanLab_Data/openFISH/ABA110_Demo2/Rep2/annotated.h5ad")
ref

In [None]:
ref.X = ref.layers['counts_corrected'].copy()

In [None]:
adata.obs['transfer'] = 'target'
adata.obs['cell_type'] = 'target'

ref.obs['transfer'] = 'source'

In [None]:
merged = sc.concat([adata, ref], merge="same")
merged

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(merged.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
merged.X = csr_matrix((merged.X.T / cell_area).T)

In [None]:
# Normalizing to median total counts
sc.pp.normalize_total(merged, target_sum=109)
# Logarithmize the data
sc.pp.log1p(merged)

In [None]:
sc.tl.pca(merged, svd_solver='arpack')

In [None]:
import scanpy.external as sce

In [None]:
sce.pp.harmony_integrate(merged, ["transfer"], max_iter_harmony = 100)

In [None]:
sc.pp.neighbors(merged, use_rep = "X_pca_harmony")

In [None]:
sc.tl.umap(merged)

In [None]:
sc.pl.umap(merged, color=["transfer", 'cell_type'], size = 2)

In [None]:
from scipy.spatial import KDTree
from tqdm import tqdm
import numpy as np

In [None]:
tmp = merged[merged.obs['transfer'] == 'source'].copy()
tmp

In [None]:
ALL_CellTypes = []
ALL_Centers = []

for ct in tmp.obs['cell_type'].unique():
    ALL_Centers.append(np.mean(tmp.obsm['X_pca_harmony'][tmp.obs['cell_type'] == ct,:], axis = 0))
    ALL_CellTypes.append(ct)

In [None]:
tree = KDTree(ALL_Centers)

In [None]:
transfer_celltype = []
for i in tqdm(range(merged.shape[0])):
    if merged.obs["cell_type"][i] == "target":
        _, index = tree.query(merged.obsm["X_pca_harmony"][i])
        transfer_celltype.append(ALL_CellTypes[index])
    else:
        transfer_celltype.append(merged.obs["cell_type"][i])

In [None]:
merged.obs["transfer_cell_type"] = transfer_celltype

In [None]:
sc.pl.umap(merged, color=["transfer_cell_type"], size = 2)

In [None]:
adata

In [None]:
adata.obs['cell_type'] = merged[merged.obs['transfer'] == 'target'].obs['transfer_cell_type'].to_numpy()

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type')

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', groups = 'Inh Vip', size = 20)

In [None]:
adata.write_h5ad(f'data/{Sample}_annotated.h5ad')

## Annotation

In [None]:
import scanpy as sc
import numpy as np
import cosg
import pandas as pd

In [None]:
adatas = []

for sample in ['LPS_10', 'LPS_7', 'LPS_8']:
    adata = sc.read_h5ad(f"./data/{sample}_annotated.h5ad")
    adata.obs_names = [f"{x}_{sample}" for x in adata.obs_names]
    adata.obs['Sample'] = sample
    adata.obs['Condition'] = 'LPS'
    adatas.append(adata)
    
adata = sc.concat(adatas)
adata

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = ['Sample'], size = 10)

In [None]:
adata.obsm['spatial'][adata.obs['Sample'] == 'LPS_8',0] += 20000
adata.obsm['spatial'][adata.obs['Sample'] == 'LPS_10',0] += 41000

In [None]:
sc.set_figure_params(figsize = (16,6), dpi = 100)
sc.pl.embedding(adata, basis = 'spatial', color = ['Sample'], size = 10)

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = ['cell_type'], size = 10)

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(adata.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
adata.X = csr_matrix((adata.X.T / cell_area).T)

In [None]:
# Normalizing to library size
sc.pp.normalize_total(adata, target_sum=109)
# Logarithmize the data
sc.pp.log1p(adata)

In [None]:
sc.pl.embedding(adata, basis="spatial", color="Slc17a7", size = 10)

In [None]:
sc.tl.pca(adata)
sc.external.pp.harmony_integrate(adata, ["transfer"], max_iter_harmony = 20)
sc.pp.neighbors(adata, use_rep='X_pca_harmony')
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=2, flavor='igraph', n_iterations=-1)

In [None]:
sc.set_figure_params(figsize = (6,6), dpi = 100)
sc.pl.umap(adata, color=['Sample', 'leiden'], size = 2)

In [None]:
groupby='leiden_R'
cosg.cosg(adata, key_added='cosg_leiden',
          # use_raw=False, layer='log1p', ## e.g., if you want to use the log1p layer in adata
          mu=1,
          expressed_pct=0.1,
          remove_lowly_expressed=True,
          n_genes_user=10,
          groupby=groupby)

import pandas as pd
pd.DataFrame(adata.uns["cosg_leiden"]["names"]).to_csv('LPS_cosg.csv')

In [None]:
sc.tl.leiden(adata, restrict_to=('leiden', ['0']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['0,0']), resolution = 0.3)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['1']), resolution = 0.3)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['5']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['16']), resolution = 0.3)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['16,0']), resolution = 0.3)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['17']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['18']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['21']), resolution = 0.3)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['22']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['23']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['24']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['25']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['26']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['29']), resolution = 0.3)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['31']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['40']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['41']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['42']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['43']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['47']), resolution = 0.2)

sc.set_figure_params(figsize = (6,6), dpi = 100)
sc.pl.umap(adata, color = 'leiden_R', size = 20)

In [None]:
annotation_dict = {
    '0,0,0': 'Fibro',
    '0,0,1': 'Mural',
    '0,0,2': 'Mural',
    '0,0,3': 'Fibro',
    '0,1': 'Fibro',
    '1,0': 'Fibro',
    '1,1': 'BACKGROUND', # CHOR飘了
    '1,2': 'BACKGROUND', # 组织破损
    '1,3': 'Fibro',
    '1,4': 'BACKGROUND',
    '1,5': 'BACKGROUND',
    '2': 'Endo',
    '3': 'Microglia',
    '4': 'Astro-TE',
    '5,0': 'Endo',
    '5,1': 'Endo',
    '5,2': 'Mural',
    '6': 'Microglia',
    '7': 'Microglia',
    '8': 'Ext TH',
    '9': 'Endo',
    '10': 'Oligo',
    '11': 'Oligo',
    '12': 'Ext MEA-COA',
    '13': 'Astro-NT',
    '14': 'Astro-TE',
    '15': 'OPC',
    '16,0,0': 'Oligo',
    '16,0,1': 'Inh RT-ZI',
    '16,0,2': 'Oligo',
    '16,1': 'Inh RT-ZI',
    '17,0': 'Inh Lamp5',
    '17,1': 'Inh Vip',
    '17,2': 'Inh Vip',
    '18,0': 'Inh DMH-LHA-MEA',
    '18,1': 'Inh CEA',
    '18,2': 'Inh PAL',
    '18,3': 'Inh DMH-LHA-MEA',
    '19': 'Ext L6 CT',
    '20': 'OPC',
    '21,0': 'Epen',
    '21,1': 'Epen',
    '21,2': 'Epen',
    '21,3': 'Tanycyte',
    '22,0': 'Oligo',
    '22,1': 'Ext TH',
    '23,0': 'OPC',
    '23,1': 'OPC',
    '24,0': 'Ext L5 ET/NP',
    '24,1': 'Ext L5 ET/NP',
    '24,2': 'Ext L4 RSP-ACA',
    '25,0': 'Ext L5 ET/NP',
    '25,1': 'Ext L6 IT',
    '25,2': 'Ext CLA-EPd',
    '26,0': 'Ext L6 IT',
    '26,1': 'Ext CLA-EPd',
    '26,2': 'Ext L6 CT',
    '26,3': 'Ext L6b',
    '27': 'Ext L2/3 IT',
    '28': 'Ext L2/3 PIR-ENTl',
    '29,0': 'Ext L4 IT',
    '29,1': 'Ext L5 IT',
    '29,2': 'Ext L4 IT',
    '30': 'BACKGROUND',
    '31,0': 'Ext L2/3 IT',
    '31,1': 'Ext L4 IT',
    '31,2': 'Ext L2/3 RSP IT',
    '32': 'Inh STR D2',
    '33': 'Inh STR D1',
    '34': 'Inh Pvalb',
    '35': 'Ext L2/3 RSP IT',
    '36': 'Inh DMH-LHA-MEA',
    '37': 'Ext L5 ET/NP',
    '38': 'Inh Sst',
    '39': 'Ext L2/3 PIR-ENTl',
    '40,0': 'Ext LA-BLA-BMA-PA',
    '40,1': 'Ext MEA',
    '40,2': 'Ext LA-BLA-BMA-PA',
    '41,0': 'Ext CA3',
    '41,1': 'Ext CA2',
    '41,2': 'Ext CA2',
    '42,0': 'Ext LH-VMH-LHA',
    '42,1': 'Ext LH-VMH-LHA',
    '42,2': 'Inh TU-ARH',
    '42,3': 'Ext LH-VMH-LHA',
    '43,0': 'Inh RT-ZI',
    '43,1': 'Inh Sst',
    '44': 'CHOR',
    '45': 'Ext TH',
    '46': 'Inh STR D2',
    '47,0': 'Inh STR D2',
    '47,1': 'Inh STR D1',
    '48': 'Ext CA1',
    '49': 'Ext L6b',
    '50': 'Ext DG',
    '51': 'Ext DG',
    '52': 'Ext MH',
}

In [None]:
adata[adata.obs['leiden_R'] == '53'].obs[['cell_type']].value_counts()

In [None]:
sc.set_figure_params(figsize = (16,6), dpi = 100)
sc.pl.embedding(adata, basis = 'spatial', color = 'leiden_R', size = 20, groups=['51'])

In [None]:
sc.pl.umap(adata, color = 'Mrc1', size = 20)

In [None]:
adata.obs['cell_type'] = [annotation_dict[x] for x in adata.obs['leiden_R']]

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', size = 20)

In [None]:
adata.write_h5ad("./LPS_annotated.h5ad")

In [None]:
import scanpy as sc
import numpy as np
import cosg
import pandas as pd

In [None]:
adatas = []

for sample in ['Saline_2', 'Saline_4', 'Saline_9']:
    adata = sc.read_h5ad(f"./data/{sample}_annotated.h5ad")
    adata.obs_names = [f"{x}_{sample}" for x in adata.obs_names]
    adata.obs['Sample'] = sample
    adata.obs['Condition'] = 'Saline'
    adatas.append(adata)
    
adata = sc.concat(adatas)
adata

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = ['Sample'], size = 10)

In [None]:
adata.obsm['spatial'][adata.obs['Sample'] == 'Saline_4',0] += 20000
adata.obsm['spatial'][adata.obs['Sample'] == 'Saline_9',0] += 41000

In [None]:
sc.set_figure_params(figsize = (16,6), dpi = 100)
sc.pl.embedding(adata, basis = 'spatial', color = ['Sample'], size = 10)

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = ['cell_type'], size = 10)

In [None]:
# Normalize using cell area
from scipy.sparse import csr_matrix
cell_area = np.array(adata.obs["area"])
# cell_area = cell_area.reshape((len(cell_area),1))
adata.X = csr_matrix((adata.X.T / cell_area).T)

In [None]:
# Normalizing to library size
sc.pp.normalize_total(adata, target_sum=109)
# Logarithmize the data
sc.pp.log1p(adata)

In [None]:
sc.pl.embedding(adata, basis="spatial", color="Slc17a7", size = 10)

In [None]:
sc.tl.pca(adata)
sc.external.pp.harmony_integrate(adata, ["transfer"], max_iter_harmony = 20)
sc.pp.neighbors(adata, use_rep='X_pca_harmony')
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=2, flavor='igraph', n_iterations=-1)

In [None]:
sc.set_figure_params(figsize = (6,6), dpi = 100)
sc.pl.umap(adata, color=['Sample', 'leiden'], size = 2)

In [None]:
groupby='leiden_R'
cosg.cosg(adata, key_added='cosg_leiden',
          # use_raw=False, layer='log1p', ## e.g., if you want to use the log1p layer in adata
          mu=1,
          expressed_pct=0.1,
          remove_lowly_expressed=True,
          n_genes_user=10,
          groupby=groupby)

import pandas as pd
pd.DataFrame(adata.uns["cosg_leiden"]["names"]).to_csv('Saline_cosg.csv')

In [None]:
sc.tl.leiden(adata, restrict_to=('leiden', ['0']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['0,1']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['1']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['6']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['12']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['16']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['19']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['21']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['21,0']), resolution = 0.3)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['26']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['31']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['47']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['49']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['52']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['53']), resolution = 0.2)
sc.tl.leiden(adata, restrict_to=('leiden_R', ['58']), resolution = 0.2)

sc.set_figure_params(figsize = (6,6), dpi = 100)
sc.pl.umap(adata, color = 'leiden_R', size = 20)

In [None]:
annotation_dict = {
    '0,0': 'Astro-TE',
    '0,1,0': 'Astro-TE',
    '0,1,1': 'Astro-TE',
    '0,2': 'Astro-TE',
    '1,0': 'Mural',
    '1,1': 'Fibro',
    '2': 'Astro-NT',
    '3': 'Ext MH',
    '4': 'Oligo',
    '5': 'Endo',
    '6,0': 'Mural',
    '6,1': 'Mural',
    '7': 'Endo',
    '8': 'Oligo',
    '9': 'Oligo',
    '10': 'Oligo',
    '11': 'OPC',
    '12,0': 'Inh Lamp5',
    '12,1': 'Inh Vip',
    '12,2': 'Inh Vip',
    '13': 'Inh Sst',
    '14': 'Oligo',
    '15': 'Microglia',
    '16,0': 'Inh DMH-LHA-MEA',
    '16,1': 'Inh DMH-LHA-MEA',
    '16,2': 'Inh PAL',
    '17': 'Inh CEA',
    '18': 'Ext TH',
    '19,0': 'Ext LH-LHA',
    '19,1': 'Ext LH-LHA',
    '19,2': 'Ext LH-LHA',
    '19,3': 'Ext LH-LHA',
    '20': 'Astro-TE',
    '21,0,0': 'Ext L5 ET/NP',
    '21,0,1': 'Ext L5 ET/NP',
    '21,0,2': 'Ext L5 ET/NP',
    '21,1': 'Ext L6 IT',
    '21,2': 'Ext L4 RSP-ACA',
    '22': 'Inh PAL',
    '23': 'Inh RT-ZI',
    '24': 'Oligo',
    '25': 'Ext L2/3 IT',
    '26,0': 'Endo',
    '26,1': 'Mural',
    '26,2': 'Endo',
    '26,3': 'Microglia',
    '26,4': 'Astro-TE',
    '27': 'Ext L6 CT',
    '28': 'Ext MEA-COA',
    '29': 'Inh DMH-LHA-MEA',
    '30': 'Epen', 
    '31,0': 'Inh DMH-LHA-MEA',
    '31,1': 'Inh PAL',
    '32': 'OPC',
    '33': 'CHOR',
    '34': 'Microglia',
    '35': 'Ext L2/3 RSP IT',
    '36': 'Inh STR D1',
    '37': 'Ext TH',
    '38': 'Ext L2/3 PIR-ENTl',
    '39': 'Endo',
    '40': 'Inh Pvalb',
    '41': 'Ext L4 IT',
    '42': 'Ext L5 IT',
    '43': 'Ext L2/3 PIR-ENTl',
    '44': 'Ext L6b',
    '45': 'Ext LA-BLA-BMA-PA',
    '46': 'Ext L2/3 IT',
    '47,0': 'Ext L6 IT',
    '47,1': 'Ext L6 CT',
    '47,2': 'Ext L6b',
    '47,3': 'Ext L5 ET/NP',
    '48': 'Inh STR D2',
    '49,0': 'Inh RT-ZI',
    '49,1': 'Inh Sst',
    '50': 'Ext L5 ET/NP',
    '51': 'Ext L5 ET/NP',
    '52,0': 'Inh STR D1',
    '52,1': 'Inh STR D2',
    '53,0': 'Ext CA3',
    '53,1': 'Ext CA2',
    '53,2': 'Ext CA3',
    '54': 'Ext CLA-EPd',
    '55': 'Ext CA1',
    '56': 'Ext DG',
    '57': 'Ext DG',
    '58,0': 'Inh STR D1',
    '58,1': 'Inh STR D2'
}

In [None]:
adata[adata.obs['leiden_R'] == '19,3'].obs[['cell_type']].value_counts()

In [None]:
sc.set_figure_params(figsize = (16,6), dpi = 100)
sc.pl.embedding(adata, basis = 'spatial', color = 'leiden_R', size = 20, groups=['58'])

In [None]:
sc.pl.umap(adata, color = 'Mrc1', size = 20)

In [None]:
adata.obs['cell_type'] = [annotation_dict[x] for x in adata.obs['leiden_R']]

In [None]:
sc.set_figure_params(figsize = (16,6), dpi = 100)
sc.pl.embedding(adata, basis = 'spatial', color = 'cell_type', size = 20)

In [None]:
adata.write_h5ad("./Saline_annotated.h5ad")

In [None]:
import scanpy as sc
import numpy as np
import pandas as  pd

In [None]:
adata1 = sc.read_h5ad("LPS_annotated.h5ad")
adata1

In [None]:
adata2 = sc.read_h5ad("Saline_annotated.h5ad")
adata2

In [None]:
adata = sc.concat([adata1,adata2])
adata

In [None]:
adata.obsm['spatial'][adata.obs['Condition'] == 'LPS',1] -= 25000

In [None]:
sc.set_figure_params(figsize = (16,10), scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = ['Sample'], size = 10)

In [None]:
annotation_dict = {
    'Ext LH-LHA': 'Ext LH-VMH-LHA',
    'Ext MEA': 'Ext MEA-COA'
}

In [None]:
new_cell_type = []

for ct in adata.obs['cell_type']:
    try:
        new_ct = annotation_dict[ct]
    except:
        new_ct = ct
    
    new_cell_type.append(new_ct)

In [None]:
adata.obs['cell_type'] = new_cell_type

In [None]:
adata = adata[adata.obs['cell_type'] != 'BACKGROUND'].copy()

In [None]:
sc.pl.embedding(adata, basis = 'spatial', color = ['cell_type'], size = 10)

In [None]:
adata.write_h5ad('TE_all.h5ad')

In [None]:
TE_RLTR6_Mm
TE_L1MdV_I
TE_MMVL30-int
TE_RLTR4_MM-int

In [None]:
cdf = pd.read_table("TE_CellType_Color.tsv", header=None)
cdf

In [None]:
color_dict = {row[0]:row[1] for _,row in cdf.iterrows()}

In [None]:
sc.set_figure_params(figsize = (16,11), dpi_save=600, scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = ['cell_type'], size = 3, palette=color_dict, save='cell_type_V2.pdf')

In [None]:
#sc.set_figure_params(figsize = (16,11), dpi_save=600)
sc.pl.embedding(adata, basis = 'spatial', color = ['TE_RLTR4_MM-int'], size = 10, cmap = 'Reds', vmax = 'p99.9', vmin = 1,
               save = "RLTR4_MM-int.pdf")

In [None]:
sc.set_figure_params(figsize = (16,11), dpi_save=600, scanpy = False)
sc.pl.embedding(adata, basis = 'spatial', color = ['TE_MMVL30-int'], size = 10, cmap = 'Reds', vmax = 'p99.9', vmin = 1,
               save = "MMVL30-int.pdf")

In [None]:
#sc.set_figure_params(figsize = (16,11), dpi_save=600)
sc.pl.embedding(adata, basis = 'spatial', color = ['TE_L1MdV_I'], size = 10, cmap = 'Reds', vmax = 'p99.9', vmin = 1,
               save = "L1MdV_I.pdf")

In [None]:
# sc.set_figure_params(figsize = (16,11), dpi_save=600)
sc.pl.embedding(adata, basis = 'spatial', color = ['TE_RLTR6_Mm'], size = 10, cmap = 'Reds', vmax = 'p99.9', vmin = 1,
               save = "RLTR6_Mm.pdf")

In [None]:
adata.uns['Sample_colors'] = ['#FF0000', '#EE0000', '#CD0000', '#0000FF', '#0000EE', '#0000CD']

In [None]:
adata.obs['Sample'].cat.categories

In [None]:
from pandas.api.types import CategoricalDtype
custom_order = ['Saline_2', 'Saline_4', 'Saline_9', 'LPS_7', 'LPS_8', 'LPS_10']
cat_dtype = CategoricalDtype(categories=custom_order, ordered=True)
adata.obs['Sample'] = adata.obs['Sample'].astype(cat_dtype)

adata.uns['Sample_colors'] = ['#0000FF', '#0000EE', '#0000CD', '#FF0000', '#EE0000', '#CD0000']

In [None]:
sc.set_figure_params(figsize = (10,5), dpi_save=600, scanpy = False)
sc.pl.violin(adata[adata.obs['cell_type'] == 'Fibro'], keys ='TE_MMVL30-int', groupby='Sample', inner = 'box', stripplot=False,
             linewidth=1,
            save = 'Fibro_MMVL30-int.pdf')

In [None]:
sc.pl.violin(adata[adata.obs['cell_type'] == 'Microglia'], keys ='TE_RLTR6_Mm', groupby='Sample', inner = 'box', stripplot=False,
            save = 'Microglia_RLTR6_Mm.pdf')

In [None]:
sc.pl.violin(adata[adata.obs['cell_type'] == 'Endo'], keys ='TE_RLTR6_Mm', groupby='Sample', inner = 'box', stripplot=False,
            save = 'Endo_RLTR6_Mm.pdf')

In [None]:
sc.pl.violin(adata[adata.obs['cell_type'] == 'Ext DG'], keys ='TE_L1MdV_I', groupby='Sample', inner = 'box', stripplot=False,
            save = 'DG_L1MdV_I.pdf')

In [None]:
sc.pl.violin(adata[adata.obs['cell_type'] == 'Fibro'], keys ='TE_RLTR4_MM-int', groupby='Sample', inner = 'box', stripplot=False,
            save = 'Fibro_RLTR4_MM-int.pdf')

In [None]:
sc.pl.violin(adata[adata.obs['cell_type'] == 'Endo'], keys ='TE_RLTR4_MM-int', groupby='Sample', inner = 'box', stripplot=False,
            save = 'Endo_RLTR4_MM-int.pdf')

In [None]:
sc.pl.violin(adata[adata.obs['cell_type'] == 'Microglia'], keys ='TE_RLTR4_MM-int', groupby='Sample', inner = 'box', stripplot=False,
            save = 'Microglia_RLTR4_MM-int.pdf')

In [None]:
sc.pl.violin(adata, keys ='total_counts', groupby='Sample', inner = 'box', stripplot=False)

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
import scanpy as sc
import numpy as np
import pandas as  pd

In [None]:
adata1 = sc.read_h5ad("LPS_annotated.h5ad")
adata1

In [None]:
adata2 = sc.read_h5ad("Saline_annotated.h5ad")
adata2

In [None]:
adata = sc.concat([adata1,adata2])
adata

In [None]:
df = adata.to_df(layer='counts_corrected')

In [None]:
df['Sample'] = adata.obs['Sample']
df['Condition'] = adata.obs['Condition']
df['cell_type'] = adata.obs['cell_type']

In [None]:
df

In [None]:
MMVL30-int

In [None]:
tmp = df[df['cell_type'] == 'Fibro'].copy()
tmp

In [None]:
tmp = tmp.loc[:, ['TE_MMVL30-int', 'Sample', 'Condition']].copy()
tmp.columns = ['Exp', 'Sample', 'Condition']

In [None]:
tmp['Condition'] = tmp['Condition'].cat.set_categories(['Saline', 'LPS'])

In [None]:
model = smf.mixedlm(
    formula="Exp ~ Condition",  
    data=tmp,
    groups="Sample"                      
)

result = model.fit()
print(result.summary())

In [None]:
tmp = df[df['cell_type'] == 'Endo'].copy()
tmp

In [None]:
tmp = tmp.loc[:, ['TE_RLTR4_MM-int', 'Sample', 'Condition']].copy()
tmp.columns = ['Exp', 'Sample', 'Condition']

In [None]:
tmp['Condition'] = tmp['Condition'].cat.set_categories(['Saline', 'LPS'])

In [None]:
model = smf.mixedlm(
    formula="Exp ~ Condition",  
    data=tmp,
    groups="Sample"                      
)


result = model.fit()
print(result.summary())

In [None]:
tmp = df[df['cell_type'] == 'Microglia'].copy()
tmp

In [None]:
tmp = tmp.loc[:, ['TE_RLTR6_Mm', 'Sample', 'Condition']].copy()
tmp.columns = ['Exp', 'Sample', 'Condition']

In [None]:
tmp['Condition'] = tmp['Condition'].cat.set_categories(['Saline', 'LPS'])

In [None]:
model = smf.mixedlm(
    formula="Exp ~ Condition",  
    data=tmp,
    groups="Sample"                      
)


result = model.fit()
print(result.summary())

In [None]:
adata.obs['Sample'] = adata.obs['Sample'].cat.set_categories(['Saline_2', 'Saline_4', 'Saline_9', 'LPS_7', 'LPS_8', 'LPS_10'])

In [None]:
adata.uns['Sample_colors'] = ['#0000FF', '#0000EE', '#0000CD', '#FF0000', '#EE0000', '#CD0000']

In [None]:
sc.set_figure_params(figsize = (10,5), dpi_save=600, scanpy = False)
sc.pl.violin(adata, keys ='n_genes_by_counts', groupby='Sample', inner = 'box', stripplot=False,
            save = 'n_genes_by_counts.pdf')

In [None]:
sc.set_figure_params(figsize = (10,5), dpi_save=600, scanpy = False)
sc.pl.violin(adata, keys ='total_counts', groupby='Sample', inner = 'box', stripplot=False,
            save = 'total_counts.pdf')