In [None]:
# This code is used to generate violin plots for gene expression across different cell types.
# The structure remains the same, with only the cell type and genes of interest being modified.
# The edge color of the violin plot is set to match the corresponding color used for the cell type in the UMAP plot.


In [None]:
import scanpy as sc
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.sparse
import random
import os

# Set working directory
os.chdir("P:/Tolulope/Cellranger Results/YNO")
adata = sc.read_h5ad('integrated OLD.h5ad')
adata

In [None]:

# Subset for endothelial cells
cell_subset = adata[adata.obs['cell type'] == "Endothelial cells"].copy()  # Make sure to use .copy() to avoid modifying original object
from scipy.sparse import csr_matrix
# Convert to sparse matrix
cell_subset.X = csr_matrix(cell_subset.X)  # Define the color palette with the specified colors
palette = {'YSham-noGFP': 'gray', 'YSham-GFP': '#d62728'}

# Assuming genes_of_interest is defined somewhere in your script
genes_of_interest = ['Rps19', 'Rpl13a', 'Rpl17', 'Rps6']

# Check which genes are actually in the dataset
available_genes = [gene for gene in genes_of_interest if gene in cell_subset.raw.var_names]
print(f"Available genes in the dataset: {available_genes}")

if not available_genes:
    raise ValueError("None of the genes of interest are present in the dataset.")

# Create a DataFrame for plotting
# Convert to dense matrix if necessary
if scipy.sparse.issparse(cell_subset.raw.X):
    expr_matrix = cell_subset.raw.X.toarray()
else:
    expr_matrix = cell_subset.raw.X

# Extract gene expression values for each gene of interest
data = []
for gene in available_genes:
    gene_index = cell_subset.raw.var_names.get_loc(gene)
    gene_expr = expr_matrix[:, gene_index]
    data.append(pd.DataFrame({
        'Condition': cell_subset.obs['condition'],
        'Gene': gene,
        'Expression': gene_expr
    }))

plot_data = pd.concat(data, ignore_index=True)

# Debugging: Check if plot_data has values
print(plot_data.head())
print(f"Number of rows in plot_data: {len(plot_data)}")

if len(plot_data) == 0:
    raise ValueError("The plot_data DataFrame is empty. Check the gene extraction and subsetting steps.")

# Plot the gene expression levels as separate violin plots for each gene
plt.figure(figsize=(12, 8))
sns.violinplot(x='Gene', y='Expression', hue='Condition', data=plot_data, palette=palette, inner='box', hue_order=['YSham-noGFP', 'YSham-GFP'])

# Apply hatch pattern to ASham-GFP
ax = plt.gca()
for i, artist in enumerate(ax.collections):
    if isinstance(artist, plt.Line2D):
        continue
    if i % 2 == 1:  # Odd indices correspond to the second hue category (ASham-GFP)
        artist.set_edgecolor('#d62728') 
        artist.set_facecolor('#d62728')
        artist.set_hatch('//')

plt.ylim(0, plot_data['Expression'].max() * 1.1)  # Set y-axis limit to start from 0
plt.title('Gene Expression Levels in Endothelial Cells')
plt.xticks(rotation=45)

# Adjust legend to ensure ASham-noGFP comes first
handles, labels = plt.gca().get_legend_handles_labels()
order = ['YSham-noGFP', 'YSham-GFP']
plt.legend([handles[labels.index(cond)] for cond in order], order, title='Condition')
plt.savefig('Violin_YSham1.png')  # Save the plot as an image file
plt.show()
