In [1]:
import scanpy as sc
import scvi
import seaborn as sns
import numpy as np
import pandas as pd
import os

In [2]:
# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

In [3]:
adata = sc.read("integratedssssss.h5ad")

In [4]:
adata_concat=adata.copy()

In [5]:
# Now, check the raw attribute of the concatenated data
print(adata_concat.raw)  # Print the raw object
print(adata_concat.raw.var.shape)  # Print the shape of raw gene data


Raw AnnData with n_obs × n_vars = 45833 × 37912
    var: 'n_cells'
(37912, 1)


In [6]:
ad_filtered = adata_concat[((adata_concat.raw[: , "Gfp"].X.toarray().flatten() ==0) & (adata_concat.raw[: , "Pax7"].X.toarray().flatten() ==0)), :].copy()
#ad_filtered = adata_concat[(adata_concat.raw[: , "Gfp"].X.toarray().flatten() ==0) , :].copy()
print(ad_filtered)

AnnData object with n_obs × n_vars = 43257 × 37912
    obs: 'batch', 'doublet', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'Sample', '_scvi_batch', '_scvi_labels', 'leiden', '_scvi_raw_norm_scaling', 'cell type', 'condition'
    var: 'n_cells'
    uns: 'Sample_colors', '_scvi_manager_uuid', '_scvi_uuid', 'cell type_colors', 'leiden', 'leiden_colors', 'log1p', 'markers', 'neighbors', 'rank_genes_groups', 'scvi_markers', 'umap'
    obsm: 'X_scVI', 'X_umap', '_scvi_extra_categorical_covs', '_scvi_extra_continuous_covs'
    layers: 'counts', 'scvi_normalized'
    obsp: 'connectivities', 'distances'


In [7]:
sc.tl.embedding_density(ad_filtered, groupby='Sample')

In [8]:
ad_filtered.obs[['umap_density_Sample','leiden','Sample']].to_csv('density_by_cluster_filteredscvitools1.csv')

In [9]:
import pandas as pd
import numpy as np

# STEP 1: Create DataFrame with relevant columns
df = ad_filtered.obs[['Sample', 'cell type', 'umap_density_Sample']].copy()
df['count'] = 1

# STEP 2: Count cells per (Sample, cell type)
cell_counts = df.groupby(['Sample', 'cell type'])['count'].count().unstack(fill_value=0)

# STEP 3: Get average density per (Sample, cell type)
density = df.groupby(['Sample', 'cell type'])['umap_density_Sample'].mean().unstack(fill_value=0)

# STEP 4: Define GFP-positive samples
gfp_positive_samples = ['YSham_GFP', 'ASham_GFP', 'YOV_GFP', 'AOV_GFP']

# STEP 5: Define safe ratio computation with GFP threshold only
def compute_ratio_with_gfp_filter(gfp_sample, no_gfp_sample, min_cells=20):
    numerator = density.loc[gfp_sample]
    denominator = density.loc[no_gfp_sample]
    gfp_counts = cell_counts.loc[gfp_sample]

    # Mask cell types where GFP+ sample has < 20 cells
    mask = gfp_counts < min_cells

    ratio = numerator / denominator
    ratio[mask] = 0  # Set to 0 for low GFP+ cell types

    return ratio

# STEP 6: Compute ratios for desired comparisons
ratios = pd.DataFrame({
    'YSham': compute_ratio_with_gfp_filter('YSham_GFP', 'YSham_noGFP'),
    'ASham': compute_ratio_with_gfp_filter('ASham_GFP', 'ASham_noGFP'),
    'YOV': compute_ratio_with_gfp_filter('YOV_GFP', 'YOV_noGFP'),
    'AOV': compute_ratio_with_gfp_filter('AOV_GFP', 'AOV_noGFP')
})

# STEP 7: Save to CSV
ratios.to_csv('GFP_communication_ratios_thresholded.csv')


  cell_counts = df.groupby(['Sample', 'cell type'])['count'].count().unstack(fill_value=0)
  density = df.groupby(['Sample', 'cell type'])['umap_density_Sample'].mean().unstack(fill_value=0)


In [17]:
import pandas as pd
import numpy as np

# STEP 1: Extract relevant columns and count per cell
df = ad_filtered.obs[['Sample', 'cell type', 'umap_density_Sample']].copy()
df['count'] = 1

# STEP 2: Count cells per (Sample, cell type)
cell_counts = df.groupby(['Sample', 'cell type'])['count'].count().unstack(fill_value=0)

# STEP 3: Compute mean UMAP density per (Sample, cell type)
density = df.groupby(['Sample', 'cell type'])['umap_density_Sample'].mean().unstack(fill_value=0)

# STEP 4: Identify the minimum non-zero density value from entire dataset
min_nonzero_density = density[density > 0].min().min()
print(f"Minimum non-zero density in dataset: {min_nonzero_density:.6f}")

# STEP 5: Ratio calculation function with logging
def compute_ratio_with_logging(gfp_sample, no_gfp_sample, min_cells=1):
    numerator = density.loc[gfp_sample]
    denominator = density.loc[no_gfp_sample]
    gfp_counts = cell_counts.loc[gfp_sample]

    # Replace 0 or NaN in denominator with the minimum non-zero density
    denominator_safe = denominator.replace(0, np.nan).fillna(min_nonzero_density)

    # Compute ratio
    ratio = numerator / denominator_safe

    # Mask ratios where GFP+ count is below threshold
    low_gfp_mask = gfp_counts < min_cells
    ratio[low_gfp_mask] = np.nan

    # Log exclusions
    excluded = pd.DataFrame({
        'GFP_count': gfp_counts,
        'noGFP_density': denominator,
        'Reason': np.where(
            low_gfp_mask, 'Low GFP+ cell count',
            np.where(denominator == 0, 'No cells in noGFP', 'OK')
        )
    })

    return ratio, excluded

# STEP 6: Compute ratios and logs for all conditions
ratios = {}
logs = {}

for condition in ['YSham', 'ASham', 'YOV', 'AOV']:
    gfp_sample = f'{condition}_GFP'
    no_gfp_sample = f'{condition}_noGFP'
    ratio, log = compute_ratio_with_logging(gfp_sample, no_gfp_sample)
    ratios[condition] = ratio
    logs[condition] = log
    log.to_csv(f'{condition}_excluded_celltypes_log.csv')

# STEP 7: Combine ratios into single DataFrame and save
ratios_df = pd.DataFrame(ratios)
ratios_df.to_csv('GFP_communication_ratios_minDensityFill_minCells1.csv')

# Optional: Export long-form version for visualization
ratios_long = ratios_df.reset_index().melt(id_vars='cell type', var_name='Condition', value_name='Density Ratio')
ratios_long.to_csv('GFP_communication_ratios_longform.csv', index=False)


Minimum non-zero density in dataset: 0.000584


  cell_counts = df.groupby(['Sample', 'cell type'])['count'].count().unstack(fill_value=0)
  density = df.groupby(['Sample', 'cell type'])['umap_density_Sample'].mean().unstack(fill_value=0)


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

# Set working directory
os.chdir("P:/Tolulope/Manuscript/Yuan Analysis")

# Define the updated data
data_dict = {
    "cell type": [
        "FAPs I", "Residence Macrophages/APCs I", "Neutrophils", "Monocyte/Macrophages",
        "M2-like macrophages", "Satellite cells", "Mature skeletal muscle I", "Endothelial cells I",
        "Myo-Fibroblasts", "FAPs II", "Endothelial cells II", "Lipid-Laden Macrophages",
        "Mature skeletal muscle II", "Dendritic cells", "Residence Macrophages/APCs II",
        "T cells/NKCs", "Fusion-Competent Myoblasts", "Proliferation", "B cells",
        " Migratory immune cells", "Vascular APCs"
    ],
    "YSham": [
        0.1262, 0.0, 0.0, 0.0, 0.3049, 11.4398, 0.1171, 0.2834, 0.0, 0.0, 0.0, np.nan, 0.0,
        0.0, 0.0, 0.0, 0.0, np.nan, 0.0, 0.0, np.nan
    ],
    "ASham": [
        0.0800, 2.5341, 0.0, 1.4463, 7.9571, 16.3412, 0.1683, 0.3710, 0.0, 0.0, 0.5686,
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    ],
    "YOV": [
        0.0, 0.9414, 0.0664, 0.6971, 0.9417, 282.9240, 0.0, 0.2062, 0.1060, 0.0, 0.0,
        1.1610, 0.0722, 0.4215, 0.0, 0.0, 7.5575, 0.0, 0.0, 0.0, 0.0
    ],
    "AOV": [
        0.0, 1.1401, 0.0877, 1.2615, 2.0054, 25.7731, 0.0, 0.2012, 0.0932, 0.0, 0.0,
        2.2648, 0.0, 0.5427, 0.2085, 0.0, np.nan, 0.6779, 0.0, 0.0, 0.0
    ]
}

df = pd.DataFrame(data_dict).set_index("cell type")

# Cap data at a threshold to avoid skewing heatmap color range
cap_threshold = 1
df_capped = df.clip(upper=cap_threshold)

# Plot
fig, ax = plt.subplots(figsize=(6, 8))
heatmap = sns.heatmap(
    df_capped,
    cmap="coolwarm",
    annot=df.round(2),
    fmt="",
    linewidths=0.5,
    cbar=True,
    vmin=0, vmax=cap_threshold,
    ax=ax
)

# Highlight the MuSCs row with white
musc_idx = df.index.get_loc("Satellite cells")
for j in range(df.shape[1]):
    ax.add_patch(plt.Rectangle((j, musc_idx), 1, 1, color="white", zorder=2))

# Re-annotate MuSCs values
for j, col in enumerate(df.columns):
    value = df.iloc[musc_idx, j]
    if not pd.isna(value):
        ax.text(j + 0.5, musc_idx + 0.5, f"{value:.2f}",
                ha='center', va='center', color='black', fontsize=10)

# Formatting
plt.xlabel("Condition", fontsize=12)
plt.ylabel("Cell Type Cluster", fontsize=12)
#plt.title("Cell Type Abundance Across Conditions (Capped at 1)", fontsize=14)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10, rotation=0)

plt.tight_layout()
plt.savefig("cell_type_abundance_heatmap_updated.png", dpi=300, bbox_inches='tight')
plt.show()
