In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as pl
import seaborn as sns
import bbknn
import scvelo as scv
import anndata
import leidenalg
import loompy
from scipy import io
from scipy.sparse import coo_matrix, csr_matrix
from matplotlib.pyplot import rc_context
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import os
import random

In [None]:
# Set working directory
os.chdir("Y:/Tolulope/Cellranger Results/ALL")
adata = sc.read_h5ad('combined2.h5ad')
adata

In [None]:
adata.obs.groupby('Sample').count()

In [None]:
sc.pp.filter_genes(adata, min_cells = 10)
adata.layers['counts'] = adata.X.copy()

In [None]:
sc.pp.normalize_total(adata, target_sum = 1e4)
sc.pp.log1p(adata)
adata.raw = adata
adata.obs.head()

In [None]:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5, n_bins=20, batch_key='Sample', n_top_genes=2000)
sc.pl.highly_variable_genes(adata)#, save=f"{projID}_high_variable_{n3}.png")

In [None]:
sc.pp.scale(adata, max_value=10) 
sc.tl.pca(adata, svd_solver='arpack')
# Plot PCA variance ratio
sc.pl.pca_variance_ratio(adata, log=True, n_pcs=50)

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=50)
#BBKNN
bbknn.bbknn(adata, batch_key="Sample")

In [None]:
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.7)

In [None]:
#Visualize the data
# Main UMAP


In [None]:

# Customize UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['Sample', 'leiden'], legend_loc='on data', legend_fontsize=12, legend_fontoutline=2, frameon=False, size=35, title='Clustering of Cells', save=f'umap_plot after integration sample and cell type{img_tag}.png')
pl.show()

In [None]:
# Customize UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['leiden'],  legend_fontsize=12, legend_fontoutline=2, frameon=False, size=35, title='Clustering of Cells', save=f'umap_plot{img_tag}.png')
pl.show()

In [None]:
# Customize UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['Sample'],  legend_fontsize=12, legend_fontoutline=2, frameon=True, size=35, title='Clustering of Cells', save=f'umap_plot after intesample{img_tag}.png')
pl.show()

In [None]:
def filter_anndata_by_gene(adata, gene, lim=1) -> "AnnData object":
    """filters anndata objects by gene
    
    Args: 
        adata: 'anndata object', that we are filtering
        gene: 'str', the gene we wish to filter by
    """
    ind = np.where(adata[:, [gene]].X.flatten() > lim)
    
    return adata[ind[0], :]

In [None]:
genes = ['Esam', 'Gpnmb',  'Pax7'] 
limits = {
    "Esam": 4,
    "Gpnmb": 4,
    "Pax7": 5,
    
}

In [None]:
import matplotlib.pyplot as plt
from matplotlib import rc_context

with rc_context({'figure.figsize': (15, 4)}):
    print('[Starting umapping]')
    fig, axs = plt.subplots(ncols=3, nrows=1)
    axs = axs.flatten()
    
    for i in range(len(axs)):
        tmp = filter_anndata_by_gene(adata, genes[i], lim=limits[genes[i]])
        
        axs[i].scatter(x=adata.obsm['X_umap'].T[0], y=adata.obsm['X_umap'].T[1], s=4, c="lightgrey")
        axs[i].scatter(x=tmp.obsm['X_umap'].T[0], y=tmp.obsm['X_umap'].T[1], s=1, c="red")
        axs[i].set_xlabel(genes[i])
        
        axs[i].set_xticks([])  # Hide x-axis ticks
        axs[i].set_yticks([])  # Hide y-axis ticks

        axs[i].set_aspect('equal')  # Set aspect ratio to equal
    plt.savefig(os.path.join(sc.settings.figdir, "marker_plot.png"))
    pl.show()
    # plt.close()


In [None]:
#subcluster cluster 11

In [None]:
import scanpy as sc

# Rename original Leiden cluster labels
leiden_labels = ["0", "1", "2", "3", "4",
                 "5", "6", "7", "8", "9",
                 "10", "11", "12", "13", "14",
                 "15", "16", "17", "18"]

adata.obs['leiden'] = adata.obs['leiden'].astype("category")  # Ensure categorical type
adata.obs['leiden'] = adata.obs['leiden'].cat.rename_categories(leiden_labels)  # Correct renaming

# Create a copy of the original Leiden cluster assignments
adata.obs['leiden_R'] = adata.obs['leiden'].astype(str)  # Preserve all clusters as strings

# Subcluster within Leiden cluster '11'
sc.tl.leiden(adata, restrict_to=('leiden', ['11']), resolution=0.05, key_added='sub_leiden')

# Convert the subcluster labels to string type
adata.obs['sub_leiden'] = adata.obs['sub_leiden'].astype(str)

# Replace new subcluster labels ('0' → '11,0', '1' → '11,1')
adata.obs['sub_leiden'] = adata.obs['sub_leiden'].replace({'0': '11,0', '1': '11,1'})

# Assign subclusters back to `leiden_R` only for the original cluster 11
subset_mask = adata.obs['leiden'] == '11'
adata.obs.loc[subset_mask, 'leiden_R'] = adata.obs.loc[subset_mask, 'sub_leiden']

# Remove 'sub_leiden' to avoid redundancy
adata.obs.drop(columns=['sub_leiden'], inplace=True)

# Plot updated UMAP
sc.pl.umap(adata, color='leiden_R', add_outline=True, legend_loc='on data',
           legend_fontsize=12, legend_fontoutline=2, frameon=False, size=35,
           title='Leiden Clustering with Subclustered 11')

# Find marker genes for the refined clustering
sc.tl.rank_genes_groups(adata, 'leiden_R', method='wilcoxon')
# Plot the top 20 ranked genes
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False, save=f'rankgenese{img_tag}.png')
result=adata.uns['rank_genes_groups']
groups=result ['names'].dtype.names
df=pd.DataFrame(
    {group + '_' + key[:15]: result [key] [group]
     for group in groups for key in ['names', 'scores', 'pvals', 'pvals_adj', 'logfoldchanges']})
markers = sc.get.rank_genes_groups_df(adata, None)
markers = markers[(markers.pvals_adj < 0.05) & (markers.logfoldchanges > .5)]
markers
# Create a DataFrame from the 'markers' variable
markers_df = pd.DataFrame(markers)

# Define the full path including the file name and extension
csv_file_path = 'P:/Tolulope/Cellranger Results/Fry-Murach 4d OV aged/markers_outputnew.csv'

# Save the DataFrame to a CSV file
markers_df.to_csv(csv_file_path, index=False)

print(f"Output saved to {csv_file_path}")

In [None]:
markers[markers.names =='Pax7']  

In [None]:
cell_type = {
    "0": "Mature skeletal muscle",
    "1": "Neutrophils",
    "2": "Residence Macrophages/APC I",
    "3": "Monocytes/Macrophages",
    "4": "Fibroadipogenic cells",
    "5": "Anti-inflammatory Macrophages II",
    "6": "Myo-Fibroblast",
    "7": "Lipid-Laden Macrophages",
    "8": "Residence Macrophages/APC II",
    "9": "Classical dendritic cell",
    "10": "Endothelial cells",
    "11,0": "Proliferation",  # Changed from "11_0"
    "11,1": "MuSCs",         # Changed from "11_1"
    "12": "Interferons",
    "13": "Anti-inflammatory Macrophages I",
    "14": "T-lymphocytes and NKCs",
    "15": "Tenocyte",
    "16": "Fibroblast I",
    "17": "Fibroblast II",
    "18": "Residence Macrophages/APC III",
}

# Now map the 'leiden_R' column to 'cell type'
adata.obs['cell type'] = adata.obs['leiden_R'].map(cell_type)


In [None]:
#UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['cell type'],  legend_fontsize=12, legend_fontoutline=2, frameon=True, size=80, title='Clustering of Cells', save=f'umap_plot{img_tag}.png')
pl.show()

In [None]:


# Define the map_condition function
def map_condition(x):
    if 'T' in x:
        return 'tam'  # lowercase
    else:
        return 'veh'  # lowercase

adata.obs['condition'] = adata.obs.Sample.map(map_condition)
#adata.obs

num_tot_cells = adata.obs.groupby(['Sample']).count()
num_tot_cells = dict(zip(num_tot_cells.index, num_tot_cells.doublet))
num_tot_cells

cell_type_counts = adata.obs.groupby(['Sample', 'condition', 'cell type']).count()
cell_type_counts = cell_type_counts[cell_type_counts.sum(axis = 1) > 0].reset_index()
cell_type_counts = cell_type_counts[cell_type_counts.columns[0:4]]
cell_type_counts

cell_type_counts['total_cells'] = cell_type_counts.Sample.map(num_tot_cells).astype(int)

cell_type_counts['frequency'] = cell_type_counts.doublet / cell_type_counts.total_cells

cell_type_counts

import matplotlib.pyplot as plt

plt.figure(figsize = (14,9))

ax = sns.boxplot(data = cell_type_counts, x = 'cell type', y = 'frequency', hue = 'condition')

plt.xticks(rotation = 35, rotation_mode = 'anchor', ha = 'right')
plt.savefig('bar_plot.png')
plt.show()


In [None]:
# Calculate percentages for each cell type across groups
percentage_data = stacked_data.div(stacked_data.sum(axis=1), axis=0) * 100
# Export percentages to a CSV file
percentage_data.to_csv('cell_type_percentages new99.csv', index=True)


In [None]:
adata.write_h5ad('integrated satellite cell depleted.h5ad')

In [None]:
# The new color map


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
import seaborn as sns
from matplotlib import rc_context
import matplotlib as mpl
import pandas as pd
import os
import scanpy as sc
from scipy.sparse import csr_matrix

# Set working directory and read the AnnData object
os.chdir("P:/Tolulope/Cellranger Results/Fry-Murach 4d OV aged")
adata = sc.read_h5ad('integrated satellite cell depleted.h5ad')
# Set the output directory for saving plots
sc.settings.figdir = "FIGURE_2"


In [None]:
# Set figure parameters
celltypePalette = {
    "Mature skeletal muscle": '#ff7f0e',
    "Neutrophils": '#c5b0d5',
    "Residence Macrophages/APC I": '#17becf',
    "Monocytes/Macrophages": '#8c564b',
    "Fibroadipogenic cells": '#9467bd',
    "Anti-inflammatory Macrophages II": '#e377c2', 
    "Myo-Fibroblast": '#9edae5',
    "Lipid-Laden Macrophages": '#ffbb78',
    "Residence Macrophages/APC II": '#1f77b4',
    "Classical dendritic cell": '#dbdb8d',
    "Endothelial cells": '#d62728',
    "Proliferation": '#ff9896',
    "MuSCs": '#2ca02c',
    "Interferons": '#98df8a',
    "Anti-inflammatory Macrophages I": '#ffcc00',  # Added color code
    "T-lymphocytes and NKCs": '#c7c7c7',
    "Tenocyte": '#f7b6d2',
    "Fibroblast I": '#bcbd22',
    "Fibroblast II": '#aec7e8', 
    "Residence Macrophages/APC III": '#c49c94'
}

# Set figure parameters
sc.set_figure_params(figsize=(10, 10))

# Plot UMAP with specified color palette for cell types
sc.pl.umap(adata, color='cell type', palette=celltypePalette, legend_fontsize=12, legend_fontoutline=2, frameon=True, size=40, title='Clustering of Cells', save='umap_plot_color.png')
plt.show()


In [None]:
# Violin 

In [None]:

# Specify the gene names for plotting
genes_for_violin = ['Pax7', 'Megf10', 'Chrdl2', 'Cthrc1', 'Col1a1', 'Dcn', 'Ebf1', 'Pdgfra', 'Cd74', 'H2-DMb1', 
                    'Clec9a', 'H2-Ab1', 'Cd163', 'Lyve1', 'Tnf', 'Ccl4','Cd68', 'Cd14', 'Cxcl3', 'Fn1', 'Cd3e', 
                    'Nkg7', 'S100a8', 'S100a9', 'Gpnmb', 'Fabp5']

# Calculate the number of rows needed based on the number of genes
num_rows = len(genes_for_violin)

# Create subplots
fig, axs = plt.subplots(nrows=num_rows, ncols=1, figsize=(10, 30))

# Ensure axs is iterable when num_rows = 1
if num_rows == 1:
    axs = [axs]

# Loop through genes and plot each one
for i, gene in enumerate(genes_for_violin):
    sc.pl.violin(adata, gene, groupby='cell type',  split=False, stripplot=False, rotation=90, 
                 use_raw=False, scale="width", ax=axs[i], show=False, fill=True, inner="box")
    
    # Hide x-axis labels for all but the last plot
    if i != num_rows - 1:
        axs[i].set_xticklabels([])
        axs[i].set_xlabel('')
    else:
        axs[i].set_xlabel('Cell Type', fontsize=12)
        axs[i].tick_params(axis='x', labelrotation=90)  # Rotate x-axis labels
    
    # Access and customize the inner box color
    for patch in axs[i].collections:
        if isinstance(patch, plt.Polygon):  # Check for the inner box (which is a Polygon)
            patch.set_edgecolor('black')  # Set the edge color to black
            patch.set_facecolor('black')  # Set the fill color to black

# Adjust spacing
plt.tight_layout()

# Save the figure
output_path = os.path.join(sc.settings.figdir, "SCDEPLETEDviolinmarker_plot.png")
plt.savefig(output_path, dpi=300)

# Display all plots
plt.show()


In [None]:
# Merged cell type map

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
import seaborn as sns
from matplotlib import rc_context
import matplotlib as mpl
import pandas as pd
import os
import scanpy as sc
from scipy.sparse import csr_matrix

# Set working directory and read the AnnData object
os.chdir("P:/Tolulope/Cellranger Results/Fry-Murach 4d OV aged")
adata = sc.read_h5ad('integrated satellite cell depleted.h5ad')
# Set the output directory for saving plots
sc.settings.figdir = "FIGURE_2"


In [None]:
# Merged cell type mapping
merged_cell_type = {
    "0": "Mature skeletal muscle",
    "1": "Immune cells",
    "2": "Immune cells",
    "3": "Immune cells",
    "4": "FAPs",
    "5": "Immune cells",
    "6": "FAPs",
    "7": "Immune cells",
    "8": "Immune cells",
    "9": "Immune cells",
    "10": "Endothelial cells",
    "11,0": "Proliferation",
    "11,1": "MuSCs",
    "12": "Interferons",
    "13": "Immune cells",
    "14": "Immune cells",
    "15": "Tenocyte",
    "16": "FAPs",
    "17": "FAPs",
    "18": "Immune cells",
    
}

# Now map the 'leiden_R' column to 'cell type'
adata.obs['cell type'] = adata.obs['leiden_R'].map(merged_cell_type)


In [None]:
#UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['cell type'],  legend_fontsize=12, legend_fontoutline=2, frameon=True, size=80, title='Clustering of Cells', save=f'umap_plot{img_tag}.png')
plt.show()


In [None]:
# The new color for marged map


In [None]:
# Set figure parameters
celltypePalette = {
    "FAPs": '#9467bd',
    "Immune cells": '#17becf',
    "Endothelial cells": '#d62728',
    "Mature skeletal muscle": '#ff7f0e', 
    "MuSCs": '#2ca02c', 
    "Interferons": '#bcbd22',
    "Proliferation": '#ff9896',
    "Tenocyte": '#f7b6d2',
    
}

# Set figure parameters
sc.set_figure_params(figsize=(10, 10))

# Plot UMAP with specified color palette for cell types
sc.pl.umap(adata, color='cell type', palette=celltypePalette, legend_fontsize=12, legend_fontoutline=2, frameon=True, size=40, title='Clustering of Cells', save='umap_plot_color.png')
plt.show()
