In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib as mpl
import matplotlib.pyplot as plt
from colorspacious import cspace_converter

In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

  @numba.jit()
  @numba.jit()
  @numba.jit()


scanpy==1.9.3 anndata==0.9.2 umap==0.5.3 numpy==1.24.3 scipy==1.11.1 pandas==1.5.3 scikit-learn==1.1.3 statsmodels==0.14.0 python-igraph==0.10.6 pynndescent==0.5.10


  @numba.jit()


In [4]:
adata = sc.read_csv('/vol/ExtraVol/MergedData_Preprocessed.csv')
adata = adata.T

In [5]:
anno = pd.read_csv('/vol/ExtraVol/MergedData_Preprocessed_Metadata.csv')

In [6]:
adata.obs = anno

In [7]:
adata

AnnData object with n_obs × n_vars = 62874 × 40879
    obs: 'Unnamed: 0', 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'Sample_ID', 'stage', 'percent.mt', 'CellName', 'doublet_finder', 'Doublet_classification'

In [8]:
adata.var_names

Index(['Gm26206', 'Xkr4', 'Gm53491', 'Rp1', 'Sox17', 'Gm22307', 'Gm37323',
       'Gm39586', 'Gm46082', 'Gm29874',
       ...
       'ND4', 'TrnH', 'TrnS2', 'TrnL2', 'ND5', 'ND6', 'TrnE', 'CYTB', 'TrnT',
       'TrnP'],
      dtype='object', length=40879)

In [9]:
adata.write_h5ad("/vol/ExtraVol/MergedData_Preprocessed.h5ad") 

In [None]:
### FINISH HERE ###

In [None]:
sc.pl.paga(adata, color=['leiden'])

In [None]:
adata.uns['iroot'] = np.flatnonzero((adata.obs['stage'] == 'embryonic'))[0]

In [None]:
sc.tl.dpt(adata)

In [None]:
sc.pl.umap(adata, color=['leiden', 'dpt_pseudotime'], legend_loc='on data')

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon', use_raw = False)
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon', use_raw = False, groups = ['15'], reference = '12',)
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Adamdec1+_adult_fibroblasts'])

# Define the clusters of interest
clusters_of_interest = {'0'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Adamdec1+_adult_fibroblasts'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Ogn+_adult_fibroblasts'])

# Define the clusters of interest
clusters_of_interest = {'1'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Ogn+_adult_fibroblasts'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Jam2+_cells'])

# Define the clusters of interest
clusters_of_interest = {'18'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Jam2+_cells'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Sox6+Bmp5+Tcf4+_adult_fibroblasts'])

# Define the clusters of interest
clusters_of_interest = {'7'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Sox6+Bmp5+Tcf4+_adult_fibroblasts'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Adamdec1+_AW_fibroblasts'])

# Define the clusters of interest
clusters_of_interest = {'5'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Adamdec1+_AW_fibroblasts'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Ogn+_AW_fibroblasts'])

# Define the clusters of interest
clusters_of_interest = {'4'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Ogn+_AW_fibroblasts'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Ncam1+_newborn_cells'])

# Define the clusters of interest
clusters_of_interest = {'2'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Ncam1+_newborn_cells'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Jam2+_newborn_cells'])

# Define the clusters of interest
clusters_of_interest = {'8'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Jam2+_newborn_cells'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Igf2+_newborn_cells'])

# Define the clusters of interest
clusters_of_interest = {'9'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Igf2+_newborn_cells'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Colec10+_embryonic_cells'])

# Define the clusters of interest
clusters_of_interest = {'3', '10'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Colec10+_embryonic_cells'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Sox6+Bmp5+Tcf4+_embryonic_cells'])

# Define the clusters of interest
clusters_of_interest = {'17'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Sox6+Bmp5+Tcf4+_embryonic_cells'

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['Igf2+_embryonic_cells'])

# Define the clusters of interest
clusters_of_interest = {'6','12','15'}

# Use the isin method to create a boolean mask of the cells that are in the clusters of interest
mask = adata.obs['leiden'].isin(clusters_of_interest)

# Update the 'cell_type' column based on the mask
adata.obs.loc[mask, 'cell_type'] = 'Igf2+_embryonic_cells'

In [None]:
sc.pl.umap(adata, color=['cell_type'])

In [None]:
adata.write_h5ad("/vol/ExtraVol/Development_CellTypeAssigned.h5ad") 