In [None]:
import scanpy as sc
import seaborn as sns
import numpy as np
import pandas as pd
import random
import os
from matplotlib.pyplot import rc_context
sc.set_figure_params(dpi=100)

import warnings
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", RuntimeWarning)
# Set working directory

os.chdir("P:/Tolulope/Cellranger Results/YNO")
adata = sc.read_h5ad('integrated OLD.h5ad')
adata

In [None]:
# Subset for endothelial cells
cell_subset = adata[adata.obs['cell type'] == "Endothelial cells"].copy()  # Make sure to use .copy() to avoid modifying original object
from scipy.sparse import csr_matrix
# Convert to sparse matrix
cell_subset.X = csr_matrix(cell_subset.X)
cell_subset.X

#example WITH pseudo replicates
pbs = []
for sample in cell_subset.obs.Sample.unique():
    samp_cell_subset = cell_subset[cell_subset.obs['Sample'] == sample]
    
    samp_cell_subset.X = samp_cell_subset.layers['counts'] #make sure to use raw data
    
    
    
    indices = list(samp_cell_subset.obs_names)
    random.shuffle(indices)
    indices = np.array_split(np.array(indices), 4) #change number here for number of replicates deisred
    
    for i, pseudo_rep in enumerate(indices):
    
        rep_adata = sc.AnnData(X = samp_cell_subset[indices[i]].X.sum(axis = 0),
                               var = samp_cell_subset[indices[i]].var[[]])

        rep_adata.obs_names = [sample + '_' + str(i)]
        rep_adata.obs['condition'] = samp_cell_subset.obs['condition'].iloc[0]
        rep_adata.obs['replicate'] = i

        pbs.append(rep_adata)

pb = sc.concat(pbs)
pb.obs


from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
counts = pd.DataFrame(pb.X, columns = pb.var_names) #need to do this to pass var names

# Create DeseqDataSet object with pseudo-replicates
dds = DeseqDataSet(
    counts=counts,  # Make sure 'counts' is correctly defined
    metadata=pb.obs,
    design_factors=['condition', 'replicate']  # Adjust to 'pseudo_replicate_group' if needed
)
sc.pp.filter_genes(dds, min_cells = 1)
dds.deseq2()


In [None]:
#DGE Analysis of Young Endothelial Cells (YSham-GFP vs. YSham-noGFP) at Rest

In [None]:
stat_res = DeseqStats(dds, contrast=('condition', 'YSham-GFP', 'YSham-noGFP'))
stat_res.summary()
de  = stat_res.results_df
de.sort_values('stat', ascending = False)
# Assuming 'res' is your DataFrame
de['Symbol'] = de.index
# Make 'Symbol' column uppercase
de['Symbol'] = de['Symbol'].str.upper()
de

# Assuming 'de' is your DataFrame
de_sorted = de.sort_values('stat', ascending=False)

# Save the DataFrame with gene names as the index
de_sorted.to_csv('YSham Endothelial cell1.csv')


In [None]:
res = stat_res.results_df
res
# Assuming 'res' is your DataFrame
res['Symbol'] = res.index

# Print the DataFrame with gene symbols
print(res)
res
res = res[res.baseMean >= 100]
res
sigs = res[(res.padj < 0.05) & (abs(res.log2FoldChange) > 0.5)]
sigs


In [None]:
import numpy as np
import seaborn as sns
dds.layers['normed_counts']
dds.layers['log1p'] = np.log1p(dds.layers['normed_counts'])
dds.layers['log1p']
sigs
dds_sigs = dds[:, sigs.index]
dds_sigs
grapher = pd.DataFrame(dds_sigs.layers['log1p'].T,
                       index=dds_sigs.var_names, columns=dds_sigs.obs_names)

In [None]:
import matplotlib.pyplot as plt

# Confirm and select columns of interest
conditions_of_interest = ['YSham_GFP_0', 'YSham_GFP_1', 'YSham_GFP_2', 'YSham_GFP_3',
                          'YSham_noGFP_0', 'YSham_noGFP_1', 'YSham_noGFP_2', 'YSham_noGFP_3']

# Subset grapher DataFrame for significant genes and conditions of interest
grapher_subset = grapher.loc[sigs.index.intersection(grapher.index), conditions_of_interest]

# Create and display the clustermap
plt.figure(figsize=(10, 8))  # Adjust figure size as needed
clustermap = sns.clustermap(grapher_subset, z_score=0, cmap='RdYlBu_r')
clustermap.ax_heatmap.set_xlabel('Samples')
clustermap.ax_heatmap.set_ylabel('Genes')
plt.title('Heatmap of Significant Genes between YSham_GFP vs YSham_noGFP')
plt.savefig('clustermap_YSham.png')  # Save the clustermap as an image file
plt.show()


In [None]:
#DGE Analysis of Aged Endothelial Cells (ASham-GFP vs. ASham-noGFP) at Rest

In [None]:
stat_res = DeseqStats(dds, contrast=('condition', 'ASham-GFP', 'ASham-noGFP'))
stat_res.summary()
de  = stat_res.results_df
de.sort_values('stat', ascending = False)
# Assuming 'res' is your DataFrame
de['Symbol'] = de.index
# Make 'Symbol' column uppercase
de['Symbol'] = de['Symbol'].str.upper()
de

# Assuming 'de' is your DataFrame
de_sorted = de.sort_values('stat', ascending=False)

# Save the DataFrame with gene names as the index
de_sorted.to_csv('ASham Endothelial cell2.csv')


In [None]:
res = stat_res.results_df
res
# Assuming 'res' is your DataFrame
res['Symbol'] = res.index

# Print the DataFrame with gene symbols
print(res)
res
res = res[res.baseMean >= 80]
res
sigs = res[(res.padj < 0.05) & (abs(res.log2FoldChange) > 0.5)]
sigs


In [None]:
import numpy as np
import seaborn as sns
dds.layers['normed_counts']
dds.layers['log1p'] = np.log1p(dds.layers['normed_counts'])
dds.layers['log1p']
sigs
dds_sigs = dds[:, sigs.index]
dds_sigs
grapher = pd.DataFrame(dds_sigs.layers['log1p'].T,
                       index=dds_sigs.var_names, columns=dds_sigs.obs_names)

In [None]:
import matplotlib.pyplot as plt

# Confirm and select columns of interest
conditions_of_interest = ['ASham_GFP_0', 'ASham_GFP_1', 'ASham_GFP_2', 'ASham_GFP_3',
                          'ASham_noGFP_0', 'ASham_noGFP_1', 'ASham_noGFP_2', 'ASham_noGFP_3']

# Subset grapher DataFrame for significant genes and conditions of interest
grapher_subset = grapher.loc[sigs.index.intersection(grapher.index), conditions_of_interest]

# Create and display the clustermap
plt.figure(figsize=(10, 8))  # Adjust figure size as needed
clustermap = sns.clustermap(grapher_subset, z_score=0, cmap='RdYlBu_r')
clustermap.ax_heatmap.set_xlabel('Samples')
clustermap.ax_heatmap.set_ylabel('Genes')
plt.title('Heatmap of Significant Genes between ASham_GFP vs ASham_noGFP')
plt.savefig('clustermap_ASham.png')  # Save the clustermap as an image file
plt.show()


In [None]:
#DGE Analysis of Young and Aged GFP-Positive Endothelial Cells at Rest

In [None]:
stat_res = DeseqStats(dds, contrast=('condition', 'YSham-GFP', 'ASham-GFP'))
stat_res.summary()
de  = stat_res.results_df
de.sort_values('stat', ascending = False)
# Assuming 'res' is your DataFrame
de['Symbol'] = de.index
# Make 'Symbol' column uppercase
de['Symbol'] = de['Symbol'].str.upper()
de

# Assuming 'de' is your DataFrame
de_sorted = de.sort_values('stat', ascending=False)

# Save the DataFrame with gene names as the index
de_sorted.to_csv('Y&ASham Endothelial cell3.csv')


In [None]:
res = stat_res.results_df
res
# Assuming 'res' is your DataFrame
res['Symbol'] = res.index

# Print the DataFrame with gene symbols
print(res)
res
res = res[res.baseMean >= 80]
res
sigs = res[(res.padj < 0.05) & (abs(res.log2FoldChange) > 0.5)]
sigs


In [None]:
import numpy as np
import seaborn as sns
dds.layers['normed_counts']
dds.layers['log1p'] = np.log1p(dds.layers['normed_counts'])
dds.layers['log1p']
sigs
dds_sigs = dds[:, sigs.index]
dds_sigs
grapher = pd.DataFrame(dds_sigs.layers['log1p'].T,
                       index=dds_sigs.var_names, columns=dds_sigs.obs_names)

In [None]:
import matplotlib.pyplot as plt

# Confirm and select columns of interest
conditions_of_interest = ['YSham_GFP_0', 'YSham_GFP_1', 'YSham_GFP_2', 'YSham_GFP_3',
                          'ASham_GFP_0', 'ASham_GFP_1', 'ASham_GFP_2', 'ASham_GFP_3']

# Subset grapher DataFrame for significant genes and conditions of interest
grapher_subset = grapher.loc[sigs.index.intersection(grapher.index), conditions_of_interest]

# Create and display the clustermap
plt.figure(figsize=(10, 8))  # Adjust figure size as needed
clustermap = sns.clustermap(grapher_subset, z_score=0, cmap='RdYlBu_r')
clustermap.ax_heatmap.set_xlabel('Samples')
clustermap.ax_heatmap.set_ylabel('Genes')
plt.title('Heatmap of Significant Genes between YSham_GFP vs ASham_GFP')
plt.savefig('clustermap_ASham.png')  # Save the clustermap as an image file
plt.show()


In [None]:
# The above code was reused for differential gene expression (DGE) analysis  
# of other cell types, including:  
# - Anti-inflammatory macrophages at rest  
# - Lipid-laden macrophages after mechanical overload  
# - Pro-inflammatory macrophages II during MOV  


In [None]:
# Venn diagram: Overlapping and unique differentially expressed genes (DEGs)  
# between young and aged endothelial cells (GFP+ vs. GFP−) AT REST.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import os

# Set working directory
os.chdir("P:/Tolulope/Cellranger Results/YNO/excel")

# Load DEG data from CSV files
df_ysham = pd.read_csv('YSham Endothelial cells.csv')
df_asham = pd.read_csv('ASham Endothelial cells.csv')

# Ensure 'Symbol' column is in uppercase
df_ysham['Symbol'] = df_ysham['Symbol'].str.upper()
df_asham['Symbol'] = df_asham['Symbol'].str.upper()

# Define the conditions for upregulated and downregulated genes in each dataset
up_genes_ysham = set(df_ysham[(df_ysham['padj'] < 0.05) & (df_ysham['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_ysham = set(df_ysham[(df_ysham['padj'] < 0.05) & (df_ysham['log2FoldChange'] < -0.5)]['Symbol'])

up_genes_asham = set(df_asham[(df_asham['padj'] < 0.05) & (df_asham['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_asham = set(df_asham[(df_asham['padj'] < 0.05) & (df_asham['log2FoldChange'] < -0.5)]['Symbol'])

# Create Venn diagrams for upregulated and downregulated genes
plt.figure(figsize=(12, 6))

# Upregulated Genes
plt.subplot(1, 2, 1)
venn2([up_genes_ysham, up_genes_asham], ('YSham Upregulated', 'ASham Upregulated'))
plt.title('Upregulated Genes Comparison')

# Downregulated Genes
plt.subplot(1, 2, 2)
venn2([down_genes_ysham, down_genes_asham], ('YSham Downregulated', 'ASham Downregulated'))
plt.title('Downregulated Genes Comparison')

plt.tight_layout()
plt.show()


In [None]:
# Venn diagram: Overlapping and unique differentially expressed genes (DEGs)  
# between young and aged Anti-inflammatory Macrophages (GFP+ vs. GFP−) AT REST.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import os

# Set working directory
os.chdir("P:/Tolulope/Cellranger Results/YNO/excel")

# Load DEG data from CSV files
df_ysham = pd.read_csv('YSham Anti-inflammatory Macrophages.csv')
df_asham = pd.read_csv('ASham Anti-inflammatory Macrophages.csv')

# Ensure 'Symbol' column is in uppercase
df_ysham['Symbol'] = df_ysham['Symbol'].str.upper()
df_asham['Symbol'] = df_asham['Symbol'].str.upper()

# Define the conditions for upregulated and downregulated genes in each dataset
up_genes_ysham = set(df_ysham[(df_ysham['padj'] < 0.05) & (df_ysham['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_ysham = set(df_ysham[(df_ysham['padj'] < 0.05) & (df_ysham['log2FoldChange'] < -0.5)]['Symbol'])

up_genes_asham = set(df_asham[(df_asham['padj'] < 0.05) & (df_asham['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_asham = set(df_asham[(df_asham['padj'] < 0.05) & (df_asham['log2FoldChange'] < -0.5)]['Symbol'])

# Create Venn diagrams for upregulated and downregulated genes
plt.figure(figsize=(12, 6))

# Upregulated Genes
plt.subplot(1, 2, 1)
venn2([up_genes_ysham, up_genes_asham], ('YSham Upregulated', 'ASham Upregulated'))
plt.title('Upregulated Genes Comparison')

# Downregulated Genes
plt.subplot(1, 2, 2)
venn2([down_genes_ysham, down_genes_asham], ('YSham Downregulated', 'ASham Downregulated'))
plt.title('Downregulated Genes Comparison')

plt.tight_layout()
plt.show()


In [None]:
# Venn diagram: Overlapping and unique differentially expressed genes (DEGs)  
# between young and aged Lipid-Laden Macrophages (GFP+ vs. GFP−) durin MOV.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import os

# Set working directory
os.chdir("P:/Tolulope/Cellranger Results/YNO/excel")

# Load DEG data from CSV files
df_ysham = pd.read_csv('YOV Lipid-Laden Macrophages.csv')
df_asham = pd.read_csv('AOV Lipid-Laden Macrophages.csv')

# Ensure 'Symbol' column is in uppercase
df_ysham['Symbol'] = df_ysham['Symbol'].str.upper()
df_asham['Symbol'] = df_asham['Symbol'].str.upper()

# Define the conditions for upregulated and downregulated genes in each dataset
up_genes_ysham = set(df_ysham[(df_ysham['padj'] < 0.05) & (df_ysham['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_ysham = set(df_ysham[(df_ysham['padj'] < 0.05) & (df_ysham['log2FoldChange'] < -0.5)]['Symbol'])

up_genes_asham = set(df_asham[(df_asham['padj'] < 0.05) & (df_asham['log2FoldChange'] > 0.5)]['Symbol'])
down_genes_asham = set(df_asham[(df_asham['padj'] < 0.05) & (df_asham['log2FoldChange'] < -0.5)]['Symbol'])

# Create Venn diagrams for upregulated and downregulated genes
plt.figure(figsize=(12, 6))

# Upregulated Genes
plt.subplot(1, 2, 1)
venn2([up_genes_ysham, up_genes_asham], ('YSham Upregulated', 'ASham Upregulated'))
plt.title('Upregulated Genes Comparison')

# Downregulated Genes
plt.subplot(1, 2, 2)
venn2([down_genes_ysham, down_genes_asham], ('YSham Downregulated', 'ASham Downregulated'))
plt.title('Downregulated Genes Comparison')

plt.tight_layout()
plt.show()
