In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as pl
import seaborn as sns
import bbknn
import scvelo as scv
import anndata
import leidenalg
import loompy
from scipy import io
from scipy.sparse import coo_matrix, csr_matrix
from matplotlib.pyplot import rc_context
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import os
import random

In [None]:
# Set working directory
os.chdir("Y:/Tolulope/Cellranger Results/ALL")
adata = sc.read_h5ad('combined1.h5ad')
adata

In [None]:
adata.obs.groupby('Sample').count()

In [None]:
sc.pp.filter_genes(adata, min_cells = 10)

In [None]:
adata.layers['counts'] = adata.X.copy()

In [None]:
sc.pp.normalize_total(adata, target_sum = 1e4)
sc.pp.log1p(adata)
adata.raw = adata
adata.obs.head()

In [None]:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5, n_bins=20, batch_key='Sample', n_top_genes=2000)
sc.pl.highly_variable_genes(adata)#, save=f"{projID}_high_variable_{n3}.png")

In [None]:
sc.pp.scale(adata, max_value=10) 
sc.tl.pca(adata, svd_solver='arpack')
# Plot PCA variance ratio
sc.pl.pca_variance_ratio(adata, log=True, n_pcs=50)

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=50)

In [None]:
#BBKNN
bbknn.bbknn(adata, batch_key="Sample")

In [None]:
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.8)

In [None]:
#Visualize the data
# Main UMAP


In [None]:

# Customize UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['Sample', 'leiden'], legend_loc='on data', legend_fontsize=12, legend_fontoutline=2, frameon=False, size=35, title='Clustering of Cells', save=f'umap_plot after integration sample and cell type{img_tag}.png')
pl.show()

In [None]:
# Customize UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['leiden'],  legend_fontsize=12, legend_fontoutline=2, frameon=False, size=35, title='Clustering of Cells', save=f'umap_plot{img_tag}.png')
pl.show()

In [None]:
# Customize UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['Sample'],  legend_fontsize=12, legend_fontoutline=2, frameon=True, size=35, title='Clustering of Cells', save=f'umap_plot after intesample{img_tag}.png')
pl.show()

In [None]:
#The Celltype annotations


In [None]:
def filter_anndata_by_gene(adata, gene, lim=1) -> "AnnData object":
    """filters anndata objects by gene
    
    Args: 
        adata: 'anndata object', that we are filtering
        gene: 'str', the gene we wish to filter by
    """
    ind = np.where(adata[:, [gene]].X.flatten() > lim)
    
    return adata[ind[0], :]

In [None]:
genes = ['Esam', 'Gpnmb',  'Pax7'] 
limits = {
    "Esam": 4,
    "Gpnmb": 4,
    "Pax7": 5,
    
}

In [None]:
genes = ['Mpz', 'Kcnj8',  'Nkg7'] 
limits = {
    "Esam": 3,
    "Kcnj8": 6,
    "Pax7": 3,
    
}

In [None]:
genes = ['Myod1', 'Cd163',  'Ptprc'] 
limits = {
    "Myod1": 5,
    "Myoz1": 2.1,
    "Ptprc": 1.6,
    
}

In [None]:
genes = ['Pdgfra', 'Pax5',  'Igkc'] 
limits = {
    "Pdgfra": 2,
    "Tnmd": 9,
    "Sox10": 9.6,
    
}

In [None]:
genes = ['Pdgfra', 'Cthrc1',  'S100a8'] 
limits = {
    "Pdgfra": 2.2,
    "Tnmd": 2,
    "Sox10": 2.6,
    
}

In [None]:
genes = ['Fmond', 'Tnmd',  'Ttn'] 
limits = {
    "Pdgfra": 1,
    "Tnmd": 3,
    "Sox10": 3.6,
    
}

In [None]:
genes = ['Cd74', 'Ccl8',  'Tnf'] 
limits = {
    "Pdgfra": 2,
    "Tnmd": 3,
    "Sox10": 1.6,
    
}

In [None]:
genes = ['Cxcl3', 'H2-Ab1',  'Clec9a'] 
limits = {
    "Pdgfra": 2,
    "Tnmd": 1,
    "Sox10": 2.6,
    
}

In [None]:
import matplotlib.pyplot as plt
from matplotlib import rc_context

with rc_context({'figure.figsize': (15, 4)}):
    print('[Starting umapping]')
    fig, axs = plt.subplots(ncols=3, nrows=1)
    axs = axs.flatten()
    
    for i in range(len(axs)):
        tmp = filter_anndata_by_gene(adata, genes[i], lim=limits[genes[i]])
        
        axs[i].scatter(x=adata.obsm['X_umap'].T[0], y=adata.obsm['X_umap'].T[1], s=4, c="lightgrey")
        axs[i].scatter(x=tmp.obsm['X_umap'].T[0], y=tmp.obsm['X_umap'].T[1], s=1, c="red")
        axs[i].set_xlabel(genes[i])
        
        axs[i].set_xticks([])  # Hide x-axis ticks
        axs[i].set_yticks([])  # Hide y-axis ticks

        axs[i].set_aspect('equal')  # Set aspect ratio to equal
    plt.savefig(os.path.join(sc.settings.figdir, "marker_plot.png"))
    pl.show()
    # plt.close()


In [None]:
# Find markers/ label cell types
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False, save=f'rankgenese{img_tag}.png')

In [None]:
result=adata.uns['rank_genes_groups']
groups=result ['names'].dtype.names
df=pd.DataFrame(
    {group + '_' + key[:15]: result [key] [group]
     for group in groups for key in ['names', 'scores', 'pvals', 'pvals_adj', 'logfoldchanges']})
markers = sc.get.rank_genes_groups_df(adata, None)
markers = markers[(markers.pvals_adj < 0.05) & (markers.logfoldchanges > .5)]
markers

In [None]:
# Create a DataFrame from the 'markers' variable
markers_df = pd.DataFrame(markers)

# Define the full path including the file name and extension
csv_file_path = 'Y:/Tolulope/Cellranger Results/Yori/markers_output.csv'

# Save the DataFrame to a CSV file
markers_df.to_csv(csv_file_path, index=False)

print(f"Output saved to {csv_file_path}")

In [None]:
markers[markers.names =='Pax7']  

In [None]:
cell_type = {
    "0": "Fibroadipogenic cells",
    "1": "Residence Macrophages/APC",
    "2": "Neutrophils",
    "3": "Pro-inflamatory Macrophages II",
    "4": "Endothelial cells",
    "5": "Monocytes/Macrophages",
    "6": "Mature skeletal muscle",
    "7": "Pro-inflamatory Macrophages I",
    "8": "Lipid-Laden Macrophages",
    "9": "MuSCs and Myoblasts",
    "10": "MuSCs and progenitors",
    "11": "Myo-Fibroblast",
    "12": "Anti-inflammatory Macrophages",
    "13": "Classical Dendritic cells",
    "14":"Pericyte cells",
    "15": "T-lymphocytes and NKCs", 
    "16": "Proliferation",
    "17": "Bcells",
    "18": "Tenocyte",
    "19": "Schwann cells",
   
        
}

In [None]:
adata.obs['cell type'] = adata.obs.leiden.map(cell_type)

In [None]:
# Main UMAP

In [None]:
#UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['cell type'],  legend_fontsize=12, legend_fontoutline=2, frameon=True, size=80, title='Clustering of Cells', save=f'umap_plot{img_tag}.png')
pl.show()

In [None]:
adata.obs.Sample.unique().tolist()

In [None]:
# Define the map_condition function
def map_condition(x):
    if 'YSham_GFP' in x:
        return 'YSham-GFP'
    elif 'YSham_noGFP' in x:
        return 'YSham-noGFP'
    elif 'YOV_GFP' in x:
        return 'YOV-GFP'
    elif 'YOV_noGFP' in x:
        return 'YOV-noGFP'
    elif 'ASham_GFP' in x:
        return 'ASham-GFP'
    elif 'ASham_noGFP' in x:
        return 'ASham-noGFP'
    elif 'AOV_GFP' in x:
        return 'AOV-GFP'
    else:
        return 'AOV_noGFP'

# Apply the map_condition function
adata.obs['condition'] = adata.obs.Sample.map(map_condition)

In [None]:
adata.write_h5ad('integrated OLD.h5ad')

In [None]:
# Grouping by condition and cell type to get cell type counts
cell_type_counts = adata.obs.groupby(['condition', 'cell type']).size().reset_index(name='count')

# Adding total cell counts for each condition
total_cells = adata.obs.groupby('condition').size().reset_index(name='total_cells')
total_cells_dict = dict(zip(total_cells.condition, total_cells.total_cells))
cell_type_counts['total_cells'] = cell_type_counts['condition'].map(total_cells_dict)

# Ensure the total_cells column is numeric
cell_type_counts['total_cells'] = cell_type_counts['total_cells'].astype(int)

# Calculating the frequency of each cell type in each condition
cell_type_counts['frequency'] = cell_type_counts['count'] / cell_type_counts['total_cells']

# Set the figure size larger to accommodate longer labels
plt.figure(figsize=(14, 10))

# Define the order of samples in the plot
sample_order = ['YSham-GFP', 'YSham-noGFP', 'YOV-GFP', 'YOV-noGFP', 'ASham-GFP', 'ASham-noGFP', 'AOV-GFP', 'AOV-noGFP']

# Ensure 'condition' column is categorized to maintain the specified order
cell_type_counts['condition'] = pd.Categorical(cell_type_counts['condition'], categories=sample_order, ordered=True)

# Pivot the DataFrame for a stacked bar plot
stacked_data = cell_type_counts.pivot_table(index='condition', columns='cell type', values='frequency', fill_value=0)

# Create a stacked bar plot with distinct colors for each cell type
ax = stacked_data.plot(kind='bar', stacked=True, colormap='tab20')

# Set plot properties
plt.xlabel('Sample')
plt.ylabel('Frequency')
plt.title('Cell Type Frequencies in Different Samples')
plt.tight_layout(rect=[0, 0, 0.85, 1])  # Adjust the layout to make room for the legend

# Rotate x-axis labels
plt.xticks(rotation=45)

# Move the legend to the right side
plt.legend(title='Cell Type', bbox_to_anchor=(1.02, 1), loc='upper left')

# Show the plot
plt.show()

In [None]:
# Calculate percentages for each cell type across groups
percentage_data = stacked_data.div(stacked_data.sum(axis=1), axis=0) * 100
# Export percentages to a CSV file
percentage_data.to_csv('cell_type_percentages new99.csv', index=True)


In [None]:
# The new color map


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
import seaborn as sns
from matplotlib import rc_context
import matplotlib as mpl
import pandas as pd
import os
import scanpy as sc
from scipy.sparse import csr_matrix

# Set working directory and read the AnnData object
os.chdir("P:/Tolulope/Cellranger Results/YNO")
adata = sc.read_h5ad('integrated OLD.h5ad')
# Set the output directory for saving plots
sc.settings.figdir = "FIGURE_2"


In [None]:
# Set figure parameters
celltypePalette = {
    "Fibroadipogenic cells": '#9467bd',
    "Residence Macrophages/APC": '#17becf',
    "Neutrophils": '#c5b0d5',
    "Pro-inflamatory Macrophages II": '#1f77b4',
    "Endothelial cells": '#d62728',
    "Monocytes/Macrophages": '#8c564b',
    "Mature skeletal muscle": '#ff7f0e', 
    "Pro-inflamatory Macrophages I": '#ffcc00',  # Added color code
    "Lipid-Laden Macrophages": '#ffbb78', 
    "MuSCs and Myoblasts": '#2ca02c', 
    "MuSCs and progenitors": '#98df8a',
    "Myo-Fibroblast": '#9edae5',
    "Anti-inflammatory Macrophages": '#e377c2', 
    "Classical Dendritic cells": '#dbdb8d',
    "Pericyte cells": '#bcbd22',
    "T-lymphocytes and NKCs": '#c7c7c7',
    "Proliferation": '#ff9896',
    "Bcells": '#aec7e8',
    "Tenocyte": '#f7b6d2',
    "Schwann cells": '#c49c94'
}

# Set figure parameters
sc.set_figure_params(figsize=(10, 10))

# Plot UMAP with specified color palette for cell types
sc.pl.umap(adata, color='cell type', palette=celltypePalette, legend_fontsize=12, legend_fontoutline=2, frameon=True, size=40, title='Clustering of Cells', save='umap_plot_color.png')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Set figure parameters
celltypePalette = {
    "Fibroadipogenic cells": '#9467bd',
    "Residence Macrophages/APC": '#17becf',
    "Neutrophils": '#c5b0d5',
    "Pro-inflamatory Macrophages II": '#1f77b4',
    "Endothelial cells": '#d62728',
    "Monocytes/Macrophages": '#8c564b',
    "Mature skeletal muscle": '#ff7f0e', 
    "Pro-inflamatory Macrophages I": '#ffcc00',  # Added color code
    "Lipid-Laden Macrophages": '#ffbb78', 
    "MuSCs and Myoblasts": '#2ca02c', 
    "MuSCs and progenitors": '#98df8a',
    "Myo-Fibroblast": '#9edae5',
    "Anti-inflammatory Macrophages": '#e377c2', 
    "Classical Dendritic cells": '#dbdb8d',
    "Pericyte cells": '#bcbd22',
    "T-lymphocytes and NKCs": '#c7c7c7',
    "Proliferation": '#ff9896',
    "Bcells": '#aec7e8',
    "Tenocyte": '#f7b6d2',
    "Schwann cells": '#c49c94'
}

# Assuming cell_type_counts is already defined and loaded as a DataFrame
# Filter cell_type_counts to include only relevant columns
cell_type_counts = cell_type_counts[['Sample', 'condition', 'cell type', 'frequency']]

# Set the figure size
plt.figure(figsize=(12, 8))

# Define the order of samples in the plot
sample_order = ['YSham-GFP', 'YSham-noGFP', 'ASham-GFP', 'ASham-noGFP', 'YOV-GFP', 'YOV-noGFP', 'AOV-GFP', 'AOV-noGFP']

# Ensure the condition column in cell_type_counts is ordered correctly
cell_type_counts['condition'] = pd.Categorical(cell_type_counts['condition'], categories=sample_order, ordered=True)

# Filter data for the specified samples
filtered_data = cell_type_counts[cell_type_counts['condition'].isin(sample_order)]

# Pivot the DataFrame for a stacked bar plot
stacked_data = filtered_data.pivot_table(index='condition', columns='cell type', values='frequency', fill_value=0)

# Sort the index according to the sample_order
stacked_data = stacked_data.reindex(sample_order)

# Create a stacked bar plot with the specified color palette
stacked_data.plot(kind='bar', stacked=True, color=[celltypePalette[col] for col in stacked_data.columns])

# Set plot properties
plt.xticks(rotation=45)  # Keep the x-axis labels horizontal
plt.xlabel('Sample')
plt.ylabel('Frequency')
plt.legend(title='Cell type-subclusters', bbox_to_anchor=(1, 1))  # Move the legend outside the plot area
plt.title('Cell Type Frequencies in Different Samples')
plt.tight_layout()
plt.savefig('bar4_plot.png')
# Show the plot
plt.show()


In [None]:
# Violin 


In [None]:

# Specify the gene names for plotting
genes_for_violin = ['Pax7', 'Megf10', 'Chrdl2', 'Cthrc1', 'Col1a1', 'Dcn', 'Pdgfra', 'Cd74', 'H2-DMb1', 
                    'Clec9a', 'H2-Ab1', 'Cd163', 'Lyve1', 'Il10', 'Cd79a', 'Igkc','Ccl4', 'Ccl8', 'Cxcl3', 'Tnf', 'Cd3e', 
                    'Nkg7', 'S100a8', 'S100a9', 'Gpnmb', 'Fabp5']

# Calculate the number of rows needed based on the number of genes
num_rows = len(genes_for_violin)

# Create subplots
fig, axs = plt.subplots(nrows=num_rows, ncols=1, figsize=(10, 25))

# Ensure axs is iterable when num_rows = 1
if num_rows == 1:
    axs = [axs]

# Loop through genes and plot each one
for i, gene in enumerate(genes_for_violin):
    sc.pl.violin(adata, gene, groupby='cell type',  split=False, stripplot=False, rotation=90, 
                 use_raw=False, scale="width", ax=axs[i], show=False, fill=True, inner="box")
    
    # Hide x-axis labels for all but the last plot
    if i != num_rows - 1:
        axs[i].set_xticklabels([])
        axs[i].set_xlabel('')
    else:
        axs[i].set_xlabel('Cell Type', fontsize=12)
        axs[i].tick_params(axis='x', labelrotation=90)  # Rotate x-axis labels
    
    # Access and customize the inner box color
    for patch in axs[i].collections:
        if isinstance(patch, plt.Polygon):  # Check for the inner box (which is a Polygon)
            patch.set_edgecolor('black')  # Set the edge color to black
            patch.set_facecolor('black')  # Set the fill color to black

# Adjust spacing
plt.tight_layout()

# Save the figure
output_path = os.path.join(sc.settings.figdir, "youngagedviolinmarker_plot.png")
plt.savefig(output_path, dpi=300)

# Display all plots
plt.show()


In [None]:
# Separate the umaps


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import os

# Set working directory
working_directory = r"P:\Tolulope\Cellranger Results\YNO"
os.chdir(working_directory)

# Read the AnnData object
adata = sc.read_h5ad('integrated OLD.h5ad')
print(adata)

# Set the directory where figures will be saved
sc.settings.figdir = r"P:\Tolulope\Cellranger Results\YNO\FIGURE_2"
os.makedirs(sc.settings.figdir, exist_ok=True)  # Ensure the directory exists

# Subset the data for 'YSham_GFP' and 'YSham_noGFP' samples
subset_adata = adata[adata.obs['Sample'].isin(['YSham_GFP', 'YSham_noGFP'])]

# Define the desired cell types for subsetting and their colors
Mesen_colors = {
    "MuSCs and Myoblasts": '#2ca02c', 
    "MuSCs and progenitors": '#98df8a',
}

# Filter cells based on the 'cell type' annotation using string values
adata_subset = subset_adata[subset_adata.obs['cell type'].isin(Mesen_colors.keys())]

# Define the map_condition function
def map_condition(x):
    if 'YSham' in x:
        return 'Young'
    return 'Other'  # Default value for samples not containing 'YSham'

# Apply the map_condition function
adata_subset.obs['condition'] = adata_subset.obs['Sample'].map(map_condition)

# Save the UMAP plot for cell types
sc.pl.umap(
    adata_subset, 
    color='cell type', 
    palette=Mesen_colors, 
    legend_fontsize=12, 
    legend_fontoutline=2, 
    frameon=True, 
    size=35, 
    title='Clustering of Cells', 
    save='UMAP_by_Cell_Type.png'  # Provide only the file name
)

# Save the UMAP plot for samples
sc.pl.umap(
    adata_subset, 
    color='Sample', 
    title='UMAP by Sample', 
    save='UMAP_by_Sample.png'  # Provide only the file name
)

# Create a Pie Chart for the merged data (Young)
subset_young = adata_subset[adata_subset.obs['condition'] == 'Young']
cell_type_counts_young = subset_young.obs['cell type'].value_counts()

# Create the pie chart
fig, ax = plt.subplots(figsize=(10, 10))
wedges, texts, autotexts = ax.pie(
    cell_type_counts_young,
    autopct='%1.1f%%',  # Display percentages on the pie chart
    colors=[Mesen_colors.get(cell_type, '#d3d3d3') for cell_type in cell_type_counts_young.index]
)

# Add legend on the right
ax.legend(
    wedges,
    cell_type_counts_young.index,
    title="Cell Types",
    loc="center left",
    bbox_to_anchor=(1.05, 0, 0.3, 1)  # Adjust position to prevent cutting off
)

plt.title('Cell Type Distribution in Young')
plt.tight_layout()  # Adjust layout to prevent cutting off
plt.savefig(os.path.join(sc.settings.figdir, 'Cell_Type_Distribution_in_Young MuSC.png'))  # Save pie chart
plt.close()  # Close the plot to avoid display issues


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import os

# Set working directory
working_directory = r"P:\Tolulope\Cellranger Results\YNO"
os.chdir(working_directory)

# Read the AnnData object
adata = sc.read_h5ad('integrated OLD.h5ad')
print(adata)

# Set the directory where figures will be saved
sc.settings.figdir = r"P:\Tolulope\Cellranger Results\YNO\FIGURE_2"
os.makedirs(sc.settings.figdir, exist_ok=True)  # Ensure the directory exists

# Subset the data for 'YSham_GFP' and 'YSham_noGFP' samples
subset_adata = adata[adata.obs['Sample'].isin(['ASham_GFP', 'ASham_noGFP'])]

# Define the desired cell types for subsetting and their colors
Mesen_colors = {
    "MuSCs and Myoblasts": '#2ca02c', 
    "MuSCs and progenitors": '#98df8a',
}

# Filter cells based on the 'cell type' annotation using string values
adata_subset = subset_adata[subset_adata.obs['cell type'].isin(Mesen_colors.keys())]

# Define the map_condition function
def map_condition(x):
    if 'YSham' in x:
        return 'Young'
    return 'Other'  # Default value for samples not containing 'YSham'

# Apply the map_condition function
adata_subset.obs['condition'] = adata_subset.obs['Sample'].map(map_condition)

# Save the UMAP plot for cell types
sc.pl.umap(
    adata_subset, 
    color='cell type', 
    palette=Mesen_colors, 
    legend_fontsize=12, 
    legend_fontoutline=2, 
    frameon=True, 
    size=35, 
    title='Clustering of Cells', 
    save='UMAP_by_Cell_Type.png'  # Provide only the file name
)

# Save the UMAP plot for samples
sc.pl.umap(
    adata_subset, 
    color='Sample', 
    title='UMAP by Sample', 
    save='UMAP_by_Sample.png'  # Provide only the file name
)

# Create a Pie Chart for the merged data (Young)
subset_young = adata_subset[adata_subset.obs['condition'] == 'Aged']
cell_type_counts_young = subset_young.obs['cell type'].value_counts()

# Create the pie chart
fig, ax = plt.subplots(figsize=(10, 10))
wedges, texts, autotexts = ax.pie(
    cell_type_counts_young,
    autopct='%1.1f%%',  # Display percentages on the pie chart
    colors=[Mesen_colors.get(cell_type, '#d3d3d3') for cell_type in cell_type_counts_young.index]
)

# Add legend on the right
ax.legend(
    wedges,
    cell_type_counts_young.index,
    title="Cell Types",
    loc="center left",
    bbox_to_anchor=(1.05, 0, 0.3, 1)  # Adjust position to prevent cutting off
)

plt.title('Cell Type Distribution in Young')
plt.tight_layout()  # Adjust layout to prevent cutting off
plt.savefig(os.path.join(sc.settings.figdir, 'Cell_Type_Distribution_in_Young MuSC2.png'))  # Save pie chart
plt.close()  # Close the plot to avoid display issues


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import os

# Set working directory
working_directory = r"P:\Tolulope\Cellranger Results\YNO"
os.chdir(working_directory)

# Read the AnnData object
adata = sc.read_h5ad('integrated OLD.h5ad')
print(adata)

# Set the directory where figures will be saved
sc.settings.figdir = r"P:\Tolulope\Cellranger Results\YNO\FIGURE_2"
os.makedirs(sc.settings.figdir, exist_ok=True)  # Ensure the directory exists

# Subset the data for 'YSham_GFP' and 'YSham_noGFP' samples
subset_adata = adata[adata.obs['Sample'].isin(['YSham_GFP', 'YSham_noGFP'])]

# Define the desired cell types for subsetting and their colors
Mesen_colors = {
    "Fibroadipogenic cells": '#9467bd', 
    "Myo-Fibroblast": '#9edae5',
    "Tenocyte": '#f7b6d2',
}

# Filter cells based on the 'cell type' annotation using string values
adata_subset = subset_adata[subset_adata.obs['cell type'].isin(Mesen_colors.keys())]

# Define the map_condition function
def map_condition(x):
    if 'YSham' in x:
        return 'Young'
    return 'Other'  # Default value for samples not containing 'YSham'

# Apply the map_condition function
adata_subset.obs['condition'] = adata_subset.obs['Sample'].map(map_condition)

# Save the UMAP plot for cell types
sc.pl.umap(
    adata_subset, 
    color='cell type', 
    palette=Mesen_colors, 
    legend_fontsize=12, 
    legend_fontoutline=2, 
    frameon=True, 
    size=35, 
    title='Clustering of Cells', 
    save='UMAP_by_Cell_Type.png'  # Provide only the file name
)

# Save the UMAP plot for samples
sc.pl.umap(
    adata_subset, 
    color='Sample', 
    title='UMAP by Sample', 
    save='UMAP_by_Sample.png'  # Provide only the file name
)

# Create a Pie Chart for the merged data (Young)
subset_young = adata_subset[adata_subset.obs['condition'] == 'Young']
cell_type_counts_young = subset_young.obs['cell type'].value_counts()

# Create the pie chart
fig, ax = plt.subplots(figsize=(10, 10))
wedges, texts, autotexts = ax.pie(
    cell_type_counts_young,
    autopct='%1.1f%%',  # Display percentages on the pie chart
    colors=[Mesen_colors.get(cell_type, '#d3d3d3') for cell_type in cell_type_counts_young.index]
)

# Add legend on the right
ax.legend(
    wedges,
    cell_type_counts_young.index,
    title="Cell Types",
    loc="center left",
    bbox_to_anchor=(1.05, 0, 0.3, 1)  # Adjust position to prevent cutting off
)

plt.title('Cell Type Distribution in Young')
plt.tight_layout()  # Adjust layout to prevent cutting off
plt.savefig(os.path.join(sc.settings.figdir, 'Cell_Type_Distribution_in_Young FAPS1.png'))  # Save pie chart
plt.close()  # Close the plot to avoid display issues


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import os

# Set working directory
working_directory = r"P:\Tolulope\Cellranger Results\YNO"
os.chdir(working_directory)

# Read the AnnData object
adata = sc.read_h5ad('integrated OLD.h5ad')
print(adata)

# Set the directory where figures will be saved
sc.settings.figdir = r"P:\Tolulope\Cellranger Results\YNO\FIGURE_2"
os.makedirs(sc.settings.figdir, exist_ok=True)  # Ensure the directory exists

# Subset the data for 'YSham_GFP' and 'YSham_noGFP' samples
subset_adata = adata[adata.obs['Sample'].isin(['ASham_GFP', 'ASham_noGFP'])]

# Define the desired cell types for subsetting and their colors
Mesen_colors = {
    "Fibroadipogenic cells": '#9467bd', 
    "Myo-Fibroblast": '#9edae5',
    "Tenocyte": '#f7b6d2',
}

# Filter cells based on the 'cell type' annotation using string values
adata_subset = subset_adata[subset_adata.obs['cell type'].isin(Mesen_colors.keys())]

# Define the map_condition function
def map_condition(x):
    if 'YSham' in x:
        return 'Young'
    return 'Other'  # Default value for samples not containing 'YSham'

# Apply the map_condition function
adata_subset.obs['condition'] = adata_subset.obs['Sample'].map(map_condition)

# Save the UMAP plot for cell types
sc.pl.umap(
    adata_subset, 
    color='cell type', 
    palette=Mesen_colors, 
    legend_fontsize=12, 
    legend_fontoutline=2, 
    frameon=True, 
    size=35, 
    title='Clustering of Cells', 
    save='UMAP_by_Cell_Type.png'  # Provide only the file name
)

# Save the UMAP plot for samples
sc.pl.umap(
    adata_subset, 
    color='Sample', 
    title='UMAP by Sample', 
    save='UMAP_by_Sample.png'  # Provide only the file name
)

# Create a Pie Chart for the merged data (Young)
subset_young = adata_subset[adata_subset.obs['condition'] == 'Aged']
cell_type_counts_young = subset_young.obs['cell type'].value_counts()

# Create the pie chart
fig, ax = plt.subplots(figsize=(10, 10))
wedges, texts, autotexts = ax.pie(
    cell_type_counts_young,
    autopct='%1.1f%%',  # Display percentages on the pie chart
    colors=[Mesen_colors.get(cell_type, '#d3d3d3') for cell_type in cell_type_counts_young.index]
)

# Add legend on the right
ax.legend(
    wedges,
    cell_type_counts_young.index,
    title="Cell Types",
    loc="center left",
    bbox_to_anchor=(1.05, 0, 0.3, 1)  # Adjust position to prevent cutting off
)

plt.title('Cell Type Distribution in Young')
plt.tight_layout()  # Adjust layout to prevent cutting off
plt.savefig(os.path.join(sc.settings.figdir, 'Cell_Type_Distribution_in_Young FAPS.png'))  # Save pie chart
plt.close()  # Close the plot to avoid display issues


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import os

# Set working directory
working_directory = r"P:\Tolulope\Cellranger Results\YNO"
os.chdir(working_directory)

# Read the AnnData object
adata = sc.read_h5ad('integrated OLD.h5ad')
print(adata)

# Set the directory where figures will be saved
sc.settings.figdir = r"P:\Tolulope\Cellranger Results\YNO\FIGURE_2"
os.makedirs(sc.settings.figdir, exist_ok=True)  # Ensure the directory exists

# Subset the data for 'YSham_GFP' and 'YSham_noGFP' samples
subset_adata = adata[adata.obs['Sample'].isin(['YSham_GFP', 'YSham_noGFP'])]

# Define the desired cell types for subsetting and their colors
Mesen_colors = {
    "Residence Macrophages/APC": '#17becf',
    "Neutrophils": '#c5b0d5',
    "Pro-inflamatory Macrophages II": '#1f77b4',
    "Monocytes/Macrophages": '#8c564b',
    "Pro-inflamatory Macrophages I": '#ffcc00',
    "Lipid-Laden Macrophages": '#ffbb78', 
    "Anti-inflammatory Macrophages": '#e377c2', 
    "Classical Dendritic cells": '#dbdb8d',
    "T-lymphocytes and NKCs": '#c7c7c7',
    "Bcells": '#aec7e8',
}

# Filter cells based on the 'cell type' annotation using string values
adata_subset = subset_adata[subset_adata.obs['cell type'].isin(Mesen_colors.keys())]

# Define the map_condition function
def map_condition(x):
    if 'YSham' in x:
        return 'Young'
    return 'Other'  # Default value for samples not containing 'YSham'

# Apply the map_condition function
adata_subset.obs['condition'] = adata_subset.obs['Sample'].map(map_condition)

# Save the UMAP plot for cell types
sc.pl.umap(
    adata_subset, 
    color='cell type', 
    palette=Mesen_colors, 
    legend_fontsize=12, 
    legend_fontoutline=2, 
    frameon=True, 
    size=35, 
    title='Clustering of Cells', 
    save='UMAP_by_Cell_Type.png'  # Provide only the file name
)

# Save the UMAP plot for samples
sc.pl.umap(
    adata_subset, 
    color='Sample', 
    title='UMAP by Sample', 
    save='UMAP_by_Sample.png'  # Provide only the file name
)

# Create a Pie Chart for the merged data (Young)
subset_young = adata_subset[adata_subset.obs['condition'] == 'Young']
cell_type_counts_young = subset_young.obs['cell type'].value_counts()

# Create the pie chart
fig, ax = plt.subplots(figsize=(10, 10))
wedges, texts, autotexts = ax.pie(
    cell_type_counts_young,
    autopct='%1.1f%%',  # Display percentages on the pie chart
    colors=[Mesen_colors.get(cell_type, '#d3d3d3') for cell_type in cell_type_counts_young.index]
)

# Add legend on the right
ax.legend(
    wedges,
    cell_type_counts_young.index,
    title="Cell Types",
    loc="center left",
    bbox_to_anchor=(1.05, 0, 0.3, 1)  # Adjust position to prevent cutting off
)

plt.title('Cell Type Distribution in Young')
plt.tight_layout()  # Adjust layout to prevent cutting off
plt.savefig(os.path.join(sc.settings.figdir, 'Cell_Type_Distribution_in_Young.png'))  # Save pie chart
plt.close()  # Close the plot to avoid display issues


In [None]:
# Merged cell type map


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
import seaborn as sns
from matplotlib import rc_context
import matplotlib as mpl
import pandas as pd
import os
import scanpy as sc
from scipy.sparse import csr_matrix

# Set working directory and read the AnnData object
os.chdir("P:/Tolulope/Cellranger Results/YNO")
adata = sc.read_h5ad('integrated OLD.h5ad')
# Set the output directory for saving plots
sc.settings.figdir = "FIGURE_2"


In [None]:
# Merged cell type mapping
merged_cell_type = {
    "0": "Fibroadipogenic cells",
    "1": "Immune cells",
    "2": "Immune cells",
    "3": "Immune cells",
    "4": "Endothelial cells",
    "5": "Immune cells",
    "6": "Mature skeletal muscle",
    "7": "Immune cells",
    "8": "Immune cells",
    "9": "MuSCs and progenitors",
    "10": "MuSCs and progenitors",
    "11": "Fibroadipogenic cells",
    "12": "Immune cells",
    "13": "Immune cells",
    "14": "Pericyte cells",
    "15": "Immune cells",
    "16": "Proliferation",
    "17": "Immune cells",
    "18": "Tenocyte",
    "19": "Schwann cells",
    
}

In [None]:
adata.obs['cell type'] = adata.obs.leiden.map(merged_cell_type)

In [None]:
#UMAP plot
sc.set_figure_params(figsize=(10, 10))
img_tag = "_color"  # Specify your image tag here
sc.pl.umap(adata, color=['cell type'],  legend_fontsize=12, legend_fontoutline=2, frameon=True, size=80, title='Clustering of Cells', save=f'umap_plot{img_tag}.png')
plt.show()


In [None]:
# The new color for marged map


In [None]:
# Set figure parameters
celltypePalette = {
    "Fibroadipogenic cells": '#9467bd',
    "Immune cells": '#17becf',
    "Endothelial cells": '#d62728',
    "Mature skeletal muscle": '#ff7f0e', 
    "MuSCs and progenitors": '#2ca02c', 
    "Pericyte cells": '#bcbd22',
    "Proliferation": '#ff9896',
    "Tenocyte": '#f7b6d2',
    "Schwann cells": '#c49c94'
}

# Set figure parameters
sc.set_figure_params(figsize=(10, 10))

# Plot UMAP with specified color palette for cell types
sc.pl.umap(adata, color='cell type', palette=celltypePalette, legend_fontsize=12, legend_fontoutline=2, frameon=True, size=40, title='Clustering of Cells', save='umap_plot_color.png')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Modify condition mapping to retain four groups instead of merging into just Young and Aged
condition_mapping = {
    'YSham-GFP': 'YSham', 'YSham-noGFP': 'YSham',
    'YOV-GFP': 'YOV', 'YOV-noGFP': 'YOV',
    'ASham-GFP': 'ASham', 'ASham-noGFP': 'ASham',
    'AOV-GFP': 'AOV', 'AOV-noGFP': 'AOV'
}

# Apply the new mapping to keep four distinct categories
adata.obs['merged_condition'] = adata.obs['condition'].map(condition_mapping)

# Grouping by merged condition and cell type to get counts
cell_type_counts = adata.obs.groupby(['merged_condition', 'cell type']).size().reset_index(name='count')

# Adding total cell counts for each merged condition
total_cells = adata.obs.groupby('merged_condition').size().reset_index(name='total_cells')
total_cells_dict = dict(zip(total_cells.merged_condition, total_cells.total_cells))
cell_type_counts['total_cells'] = cell_type_counts['merged_condition'].map(total_cells_dict)

# Ensure the total_cells column is numeric
cell_type_counts['total_cells'] = cell_type_counts['total_cells'].astype(int)

# Calculating the frequency of each cell type in each condition
cell_type_counts['frequency'] = cell_type_counts['count'] / cell_type_counts['total_cells']

# Pivot the DataFrame for a stacked bar plot
stacked_data = cell_type_counts.pivot_table(index='merged_condition', columns='cell type', values='frequency', fill_value=0)

# Ensure correct order of conditions
stacked_data = stacked_data.reindex(['YSham', 'ASham', 'YOV','AOV'])

# Set the figure size
plt.figure(figsize=(10, 7))

# Plot stacked bar with custom color palette
stacked_data.plot(kind='bar', stacked=True, color=[celltypePalette[cell] for cell in stacked_data.columns])

# Set plot properties
plt.xlabel('Sample')
plt.ylabel('Frequency')
plt.title('Cell Type Frequencies in Different Samples')
plt.xticks(rotation=45)

# Move the legend to the right side
plt.legend(title='Cell Type', bbox_to_anchor=(1.05, 1), loc='upper left')

# Adjust layout for clarity
plt.tight_layout()

# Show the plot
plt.show()



In [None]:
# Ensure correct order of conditions
stacked_data = stacked_data.reindex(['YSham', 'YOV', 'ASham', 'AOV'])

# Set the figure size
plt.figure(figsize=(10, 7))

# Plot stacked bar with custom color palette
stacked_data.plot(kind='bar', stacked=True, color=[celltypePalette[cell] for cell in stacked_data.columns])

# Set plot properties
plt.xlabel('Sample')
plt.ylabel('Frequency')
plt.title('Cell Type Frequencies in Different Samples')

# Change x-axis labels to 'Young' and 'Aged' while keeping four bars
plt.xticks(ticks=range(len(stacked_data.index)), labels=['Young', 'Aged', 'Young', 'Aged'], rotation=45)

# Move the legend to the right side
plt.legend(title='Cell Type', bbox_to_anchor=(1.05, 1), loc='upper left')

# Adjust layout for clarity
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
# Violin 


In [None]:

# Specify the gene names for plotting
genes_for_violin = ['Esam', 'Pecam1', 'Pdgfra', 'Ptprc', 'Cd68', 'Kcnj8', 'Mpz', 'Ttn', 'Ckm', 
                    'Pax7', 'Vcam1', 'Top2a', 'Birc5', 'Fmod']

# Calculate the number of rows needed based on the number of genes
num_rows = len(genes_for_violin)

# Create subplots
fig, axs = plt.subplots(nrows=num_rows, ncols=1, figsize=(10, 15))

# Ensure axs is iterable when num_rows = 1
if num_rows == 1:
    axs = [axs]

# Loop through genes and plot each one
for i, gene in enumerate(genes_for_violin):
    sc.pl.violin(adata, gene, groupby='cell type',  split=False, stripplot=False, rotation=90, 
                 use_raw=False, scale="width", ax=axs[i], show=False, fill=True, inner="box")
    
    # Hide x-axis labels for all but the last plot
    if i != num_rows - 1:
        axs[i].set_xticklabels([])
        axs[i].set_xlabel('')
    else:
        axs[i].set_xlabel('Cell Type', fontsize=12)
        axs[i].tick_params(axis='x', labelrotation=90)  # Rotate x-axis labels
    
    # Access and customize the inner box color
    for patch in axs[i].collections:
        if isinstance(patch, plt.Polygon):  # Check for the inner box (which is a Polygon)
            patch.set_edgecolor('black')  # Set the edge color to black
            patch.set_facecolor('black')  # Set the fill color to black

# Adjust spacing
plt.tight_layout()

# Save the figure
output_path = os.path.join(sc.settings.figdir, "youngviolinmarker_plot.png")
plt.savefig(output_path, dpi=300)

# Display all plots
plt.show()
