# Calculate CG surrounding count

In [None]:
import sys
from pathlib import Path
from datetime import datetime

# Add the folder containing 'my_functions.py' to sys.path
sys.path.append("/home/michalula/code/epiCausality/epiCode/utils/") # str(Path(__file__).parent / 'utils'))

# Import the module or specific functions
# from /home/michalula/code/epiCausality/epiCode/utils/funcs_extract_mC_profiles_from_BAMs.py
# from funcs_extract_mC_profiles_from_BAMs import system_info, extract_from_bam
from funcs_extract_mC_profiles_from_BAMs import (
    system_info,
    get_reference_sequence,
    create_output_directory,
    extract_from_bam,
    process_extracted_reads,
    visualize_data,
    create_padded_reads,
    plot_padded_reads,
    save_padded_reads,
    process_extracted_reads_no_fully_unmethylated,
    create_padded_reads_no_fully_unmethylated
    # main,
)


def find_cpgs(dna: str) -> list[int]:
    """Return the start positions of all CpG dinucleotides in a DNA string."""
    dna = dna.upper()
    return [i for i in range(len(dna) - 1) if dna[i] == "C" and dna[i + 1] == "G"]


def count_surrounding_cpgs(dna: str, n: int) -> dict[int, int]:
    """
    For each CpG in dna, count how many *other* CpGs have their start position
    within N bases (i.e. |pos_other - pos_self| <= N, excluding self).

    Returns a dict mapping each CpG start position -> surrounding CpG count.
    """
    cpg_positions = find_cpgs(dna)
    result = {}
    for pos in cpg_positions:
        count = sum(
            1
            for other in cpg_positions
            if other != pos and abs(other - pos) <= n
        )
        result[pos] = count
    return result



system_info()
date_today = datetime.today().strftime('%Y-%m-%d')
ref_genome_path = Path('/home/michalula/data/ref_genomes/t2t_v2_0/up_chm13v2.0.fasta')
reg_genome_version = "t2t_v2_0"
region_chr = 'chr1'

# 6500bps: EXACT cutting region  
# chr1:206,583,334-206,589,873
region_start = 206583334    + 20
region_end = 206589874      - 20 
# CD55 TSS start: chr1:206586828-206606065 (+)
# (before TSS: 3474, after TSS: 3026 bps)
# region_start 206583354
# region_end 206589854
# chr1:206583354-206589854

region_str = region_chr + ":" + str(region_start) + "-" + str(region_end) #'chr1:206586162-206586192'
region_length = region_end - region_start
print("region_length", region_length)


motifs=['CG,0']
ref_seq_list = get_reference_sequence(ref_genome_path, region_chr, region_start, region_end)

print('region_chr', region_chr)
print('region_start', region_start)
print('region_end', region_end)
print(f'{region_chr}:{region_start}-{region_end}') 



dna_6500_roi_str = "".join(ref_seq_list)         # "abc"

dna = dna_6500_roi_str
N = 50
SEQ_LEN = len(dna)

surrounding = count_surrounding_cpgs(dna, n=N)
print(f"Surrounding CpG counts within +/-{N} bases:")
print(f"{'CpG pos':>10}  {'context':>20}  {'surrounding CpGs':>16}")
print("-" * 52)
for pos, count in surrounding.items():
    # Show a small context window around the CpG
    start = max(0, pos - 3)
    end = min(SEQ_LEN, pos + 5)
    context = dna[start:end]
    print(f"{pos:>10}  {context:>20}  {count:>16}")

import matplotlib.pyplot as plt

# Extract positions and counts from the surrounding dictionary
positions =  list(range(0, len(surrounding) )) # CG_indexes #list(surrounding.keys())
counts = list(surrounding.values())

# Create a bar plot
plt.figure(figsize=(14, 6))
plt.bar(positions, counts, width=1.0, edgecolor='none')
plt.xlabel('CpG Position')
plt.ylabel('Surrounding CpG Count')
plt.title(f'Surrounding CpG Counts within +/-{N} bases')
plt.tight_layout()
plt.show()

In [None]:
# CD55 Differential Methylation Analysis — Clean Version

# **Created:** 2026-02-14  
# **Based on:** `20251021_DIFmC_analyze.ipynb`

# Analyzes differential methylation (ΔmC) at 137 CpG sites in the CD55 gene region,  
# comparing CRISPRoff-silenced vs. unedited T cells across timepoints (Days 6, 28, 35).  
# Integrates LASSO and SHAP ML coefficients and exports annotated coordinate files.

# **Region:** chr1:206,583,354–206,589,854 (T2T v2.0) = chr1:207,318,058–207,324,558 (hg38)  
# **TSS (hg38):** 207,321,678 | **TSS (T2T):** 206,586,974

# ---
# ## Sections
# 1. Setup & Constants
# 2. Load Methylation Data
# 3. Compute Delta mC
# 4. Load ML Coefficients
# 5. Visualize Methylation Fractions & Delta mC
# 6. Visualize ML Coefficients
# 7. Generate CpG Coordinates
# 8. Merge All Data & Save Annotated CSV
# 9. Visualize Spatial Distribution of Delta mC
# 10. Export BED Files for UCSC Genome Browser
## Section 1 — Setup & Constants
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from pathlib import Path
from datetime import date

# Utility imports
sys.path.append("/home/michalula/code/epiCausality/epiCode/utils/")
from funcs_analize_forward_reverse_extracted_mC_reads import load_padded_reads
from funcs_extract_mC_profiles_from_BAMs import get_reference_sequence

# ── Genomic constants ──────────────────────────────────────────────────────────
REGION_CHR   = "chr1"
REGION_START = 206_583_354   # T2T v2.0, 1-based inclusive
REGION_END   = 206_589_854   # T2T v2.0, 1-based inclusive
OFFSET_HG38  = 734_704       # hg38_pos = T2T_pos + OFFSET_HG38
TSS_T2T      = 206_586_974   # CD55 TSS (T2T v2.0)
TSS_HG38     = 207_321_678   # CD55 TSS (hg38) — verified correct value
PROMOTER_WIN = 1_000         # ±1,000 bp around TSS defines promoter

# ── File paths ─────────────────────────────────────────────────────────────────
REF_GENOME = Path("/home/michalula/data/ref_genomes/t2t_v2_0/chm13v2.0.fa")
BASE       = Path("/home/michalula/code/epiCausality/epiCode/analyze_ont_data/T2T_v2.0_mapped/T_cells")
ML_DIR     = BASE / "day_6/model_data"
OUT_DIR    = Path("/home/michalula/code/epiCausality/epiCode/analyze_ont_data/compare_conditions")
TODAY      = date.today().strftime("%Y-%m-%d")

print(f"Output directory: {OUT_DIR}")
print(f"Reference genome: {REF_GENOME}  (exists={REF_GENOME.exists()})")
print(f"Date: {TODAY}")
## Section 2 — Load Methylation Data
def load_condition(npy_path):
    """Load a padded-reads .npy file, validate shape, return (array, fracs).

    Parameters
    ----------
    npy_path : Path or str
        Absolute path to the .npy file.

    Returns
    -------
    arr   : np.ndarray, shape (n_reads, n_cpgs)
    fracs : np.ndarray, shape (n_cpgs,)  — mean methylation fraction per CpG
    """
    arr = np.load(npy_path)
    assert arr.ndim == 2, f"Expected 2D array, got shape {arr.shape}"
    fracs = np.nansum(arr, axis=0) / arr.shape[0]
    name = Path(npy_path).name
    print(f"  {name}")
    print(f"    shape={arr.shape}  reads={arr.shape[0]}  CpGs={arr.shape[1]}")
    return arr, fracs


# ── Day 6 ──────────────────────────────────────────────────────────────────────
print("=== Day 6 ===")
D6_UNE = BASE / "day_6/unedited/analyze_single_reads/dimelo_v2_output"
D6_CR  = BASE / "day_6/croff/analyze_single_reads/dimelo_v2_output"

CGs_D6_unedited_mc07,   CGs_D6_unedited_mc07_fracs   = load_condition(
    D6_UNE / "CG_137_padded_reads_day6_unedited_Tcells_mC0.7_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modBaseQ10_mCthresh0.7_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD490_numRVS644.npy")

CGs_D6_unedited_mc0995, CGs_D6_unedited_mc0995_fracs = load_condition(
    D6_UNE / "CG_137_padded_reads_day6_unedited_Tcells_mC0.995_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modBaseQ10_mCthresh0.995_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD489_numRVS638.npy")

CGs_D6_CRoff_mc07,      CGs_D6_CRoff_mc07_fracs      = load_condition(
    D6_CR  / "CG_137_padded_reads_day6_CRoff_Tcells_mC0.7_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modBaseQ10_mCthresh0.7_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD802_numRVS1480.npy")

CGs_D6_CRoff_mc0995,    CGs_D6_CRoff_mc0995_fracs    = load_condition(
    D6_CR  / "CG_137_padded_reads_day6_CRoff_Tcells_mC0.995_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modBaseQ10_mCthresh0.995_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD793_numRVS1449.npy")


# ── Day 28 (low coverage — warning issued in Section 3) ────────────────────────
print("\n=== Day 28 ===")
D28_UNE = BASE / "day_28/unedited/analyze_single_reads/dimelo_v2_output"
D28_CR  = BASE / "day_28/croff/analyze_single_reads/dimelo_v2_output"

CGs_D28_unedited_mc0995, CGs_D28_unedited_mc0995_fracs = load_condition(
    D28_UNE / "CG_137_padded_reads_day28_unedited_Tcells_mC0.995_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modeBaseQ10_mCthresh0.995_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD6_numRVS17.npy")

CGs_D28_unedited_mc07,   CGs_D28_unedited_mc07_fracs   = load_condition(
    D28_UNE / "CG_137_padded_reads_day28_unedited_Tcells_mC0.7_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modeBaseQ10_mCthresh0.7_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD6_numRVS18.npy")

CGs_D28_CRoff_mc07,      CGs_D28_CRoff_mc07_fracs      = load_condition(
    D28_CR  / "CG_137_padded_reads_day28_CRoff_Tcells_mC0.7_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modBaseQ10_mCthresh0.7_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD6_numRVS6.npy")

CGs_D28_CRoff_mc0995,    CGs_D28_CRoff_mc0995_fracs    = load_condition(
    D28_CR  / "CG_137_padded_reads_day28_CRoff_Tcells_mC0.995_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modBaseQ10_mCthresh0.995_t2t_v2_0_chr1:206583354-206589854_2025-09-29_units_combined_numFWD6_numRVS6.npy")


# ── Day 35 ─────────────────────────────────────────────────────────────────────
print("\n=== Day 35 ===")
# CRoff replica_1 (current, 2025-11-09) — same read counts as original 2025-09-29 files
D35_CR   = BASE / "day_35/croff/replica_1/analyze_single_reads/dimelo_v2_output"
D35_UNE1 = BASE / "day_35/unedited/part1_37h_sequenced/dimelo_v2_output"

CGs_D35_CRoff_mc07,   CGs_D35_CRoff_mc07_fracs   = load_condition(
    D35_CR / "CG_137_padded_reads_Tcells_CRISPRoff_Day35_postEP_R9minion_threshold_mC0.7_T2Tv2_0_filterMode10_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_mCthresh0.7_T2Tv2_0_chr1:206583354-206589854_2025-11-09_units_combined_numFWD104_numRVS222.npy")

CGs_D35_CRoff_mc0995, CGs_D35_CRoff_mc0995_fracs = load_condition(
    D35_CR / "CG_137_padded_reads_Tcells_CRISPRoff_Day35_postEP_R9minion_threshold_mC0.995_T2Tv2_0_filterMode10_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_mCthresh0.995_T2Tv2_0_chr1:206583354-206589854_2025-11-09_units_combined_numFWD104_numRVS222.npy")

# Note: Day 35 unedited .npy files carry 'CRoff' in their filename — this is a
# naming artefact from the original pipeline; the data corresponds to unedited cells.
CGs_D35_unedited_mc07_lib1,   CGs_D35_unedited_mc07_fracs_lib1   = load_condition(
    D35_UNE1 / "CG_137_padded_reads_day35_CRoff_Tcells_mC0.7_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modeBaseQ10_mCthresh0.7_t2t_v2_0_chr1:206583354-206589854_2025-10-05_units_combined_numFWD86_numRVS115.npy")

CGs_D35_unedited_mc0995_lib1, CGs_D35_unedited_mc0995_fracs_lib1 = load_condition(
    D35_UNE1 / "CG_137_padded_reads_day35_CRoff_Tcells_mC0.995_T2Tv2_NoFullyUnmeth_ovrlap0.9_mismat0.7_mapQ60_modeBaseQ10_mCthresh0.995_t2t_v2_0_chr1:206583354-206589854_2025-10-05_units_combined_numFWD86_numRVS114.npy")
## Section 3 — Compute Delta mC
# ── Primary analysis: Day 6, mC threshold 0.995 ────────────────────────────────
dif_D6_mc0995 = CGs_D6_CRoff_mc0995_fracs - CGs_D6_unedited_mc0995_fracs

# ── Day 6, mC threshold 0.7 ────────────────────────────────────────────────────
dif_D6_mc07 = CGs_D6_CRoff_mc07_fracs - CGs_D6_unedited_mc07_fracs

# ── Day 35, mC threshold 0.995 ─────────────────────────────────────────────────
dif_D35_mc0995 = CGs_D35_CRoff_mc0995_fracs - CGs_D35_unedited_mc0995_fracs_lib1

# ── Day 28 coverage warning ────────────────────────────────────────────────────
MIN_COVERAGE = 30
d28_une_n = CGs_D28_unedited_mc0995.shape[0]
d28_cr_n  = CGs_D28_CRoff_mc0995.shape[0]
if d28_une_n < MIN_COVERAGE or d28_cr_n < MIN_COVERAGE:
    print(f"WARNING: Day 28 coverage is very low "
          f"(unedited={d28_une_n} reads, CRoff={d28_cr_n} reads).")
    print("  Day 28 delta-mC values are statistically unreliable and "
          "should NOT be used for quantitative conclusions.")

# ── Normalizations of Day 6 ΔmC (for overlays with ML coefficients) ───────────
dif_D6_mc0995_norm01 = (
    (dif_D6_mc0995 - dif_D6_mc0995.min())
    / (dif_D6_mc0995.max() - dif_D6_mc0995.min())
)
dif_D6_mc0995_max1 = dif_D6_mc0995 / np.abs(dif_D6_mc0995).max()

print(f"Day 6 ΔmC (mc0.995): min={dif_D6_mc0995.min():.3f}  max={dif_D6_mc0995.max():.3f}")
print(f"Day 6 ΔmC (mc0.7):   min={dif_D6_mc07.min():.3f}  max={dif_D6_mc07.max():.3f}")
print(f"Day 35 ΔmC (mc0.995):min={dif_D35_mc0995.min():.3f}  max={dif_D35_mc0995.max():.3f}")

In [None]:
def plot_fracs(fracs_dict, title="Methylation Fractions",
               figsize=(16, 5), alpha=0.6, colors=None):
    """Bar plot comparing methylation fractions across conditions.

    Parameters
    ----------
    fracs_dict : dict[str -> np.ndarray]
        {condition_label: fracs_array}  — plotted in insertion order.
    title : str
    figsize : tuple
    alpha : float
    colors : list of str, optional
    """
    default_colors = ["deepskyblue", "navy", "tomato", "darkred",
                      "mediumseagreen", "darkgreen"]
    colors = colors or default_colors
    n_cpgs = next(iter(fracs_dict.values())).shape[0]
    x = np.arange(n_cpgs)

    fig, ax = plt.subplots(figsize=figsize)
    for i, (label, fracs) in enumerate(fracs_dict.items()):
        ax.bar(x, fracs, snap=False, alpha=alpha,
               label=label, color=colors[i % len(colors)])
    ax.set_xlabel("CpG Index")
    ax.set_ylabel("Methylation Fraction")
    ax.set_title(title)
    ax.legend()
    plt.tight_layout()
    plt.show()


# Day 6: CRoff vs Unedited, mc0.995
plot_fracs(
    {"CRISPRoff (mc>0.995)": CGs_D6_CRoff_mc0995_fracs,
     "Unedited  (mc>0.995)": CGs_D6_unedited_mc0995_fracs},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.995)",
    colors=["deepskyblue", "navy"])

# Day 6: CRoff vs Unedited, mc0.7
plot_fracs(
    {"CRISPRoff (mc>0.7)": CGs_D6_CRoff_mc07_fracs,
     "Unedited  (mc>0.7)": CGs_D6_unedited_mc07_fracs},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.7)",
    colors=["skyblue", "royalblue"])

# Day 35: CRoff vs Unedited, mc0.995
plot_fracs(
    {"CRISPRoff Day 35 (mc>0.995)": CGs_D35_CRoff_mc0995_fracs,
     "Unedited  Day 35 (mc>0.995)": CGs_D35_unedited_mc0995_fracs_lib1},
    title="Day 35 — CD55 CpG Methylation Fractions (mC threshold 0.995)",
    colors=["tomato", "darkred"])

# Delta mC: Day 6 (mc0.995) — primary analysis
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_D6_mc0995)), dif_D6_mc0995,
       snap=False, alpha=0.7, color="turquoise",
       label="ΔmC = CRISPRoff − Unedited (mc>0.995, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.995")
ax.legend()
plt.tight_layout()
plt.show()

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_D6_mc07)), dif_D6_mc07,
       snap=False, alpha=0.7, color="steelblue",
       label="ΔmC = CRISPRoff − Unedited (mc>0.7, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:

# Day 6: CRoff vs Unedited, mc0.7
plot_fracs(
    {"CRISPRoff (mc>0.7)": CGs_D6_CRoff_mc07_fracs,
     "Unedited  (mc>0.7)": CGs_D6_unedited_mc07_fracs},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.7)",
    colors=["skyblue", "royalblue"])


In [None]:
def plot_fracs(fracs_dict, title="Methylation Fractions",
               figsize=(16, 5), alpha=0.6, colors=None):
    """Bar plot comparing methylation fractions across conditions.

    Parameters
    ----------
    fracs_dict : dict[str -> np.ndarray]
        {condition_label: fracs_array}  — plotted in insertion order.
    title : str
    figsize : tuple
    alpha : float
    colors : list of str, optional
    """
    default_colors = ["deepskyblue", "navy", "tomato", "darkred",
                      "mediumseagreen", "darkgreen"]
    colors = colors or default_colors
    n_cpgs = next(iter(fracs_dict.values())).shape[0]
    x = np.arange(n_cpgs)

    fig, ax = plt.subplots(figsize=figsize)
    for i, (label, fracs) in enumerate(fracs_dict.items()):
        ax.bar(x, fracs, snap=False, alpha=alpha,
               label=label, color=colors[i % len(colors)])
    ax.set_xlabel("CpG Index")
    ax.set_ylabel("Methylation Fraction")
    ax.set_title(title)


    # Set x-axis ticks to show all CpG indices
    ax.set_xticks(np.arange(n_cpgs))
    ax.set_xticklabels(np.arange(n_cpgs), rotation=90, fontsize=8)

    ax.legend()
    plt.tight_layout()
    plt.show()

    


In [None]:

# Day 6: CRoff vs Unedited, mc0.7
plot_fracs(
    {"CRISPRoff (mc>0.7)": CGs_D6_CRoff_mc07_fracs,
     "Unedited  (mc>0.7)": CGs_D6_unedited_mc07_fracs},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.7)",
    colors=["skyblue", "royalblue"])


In [None]:

# Set x-axis ticks to show all CpG indices
ax.set_xticks(np.arange(len(dif_D6_mc0995)))
ax.set_xticklabels(np.arange(len(dif_D6_mc0995)), rotation=90, fontsize=8)

ax.legend()
plt.tight_layout()
plt.show()

In [None]:
surrounding.keys()

In [None]:
CG_num_surrounds = list(surrounding.values())
CG_num_surrounds

In [None]:
CG_num_surrounds_001 = [x + 0.01 for x in CG_num_surrounds]
CG_num_surrounds_001

In [None]:
CGs_D6_CRoff_mc07_fracs_by_CG_num_surrounds_001 = CGs_D6_CRoff_mc07_fracs / CG_num_surrounds_001
CGs_D6_CRoff_mc07_fracs_by_CG_num_surrounds_001

In [None]:

# Day 6: CRoff vs Unedited, mc0.7
plot_fracs(
    {"CRISPRoff (mc>0.7)": CGs_D6_CRoff_mc07_fracs,
     "UnediteCGs_D6_CRoff_mc07_fracs_by_CG_num_surrounds_001   (mc>0.7)": CGs_D6_CRoff_mc07_fracs_by_CG_num_surrounds_001},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.7)",
    colors=["skyblue", "royalblue"])


In [None]:
CGs_D6_CRoff_mc07_fracs_by_CG_num_surrounds_001

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs_by_CG_num_surrounds_001)), CGs_D6_CRoff_mc07_fracs_by_CG_num_surrounds_001,
       snap=False, alpha=0.7, color="steelblue",
       label="ΔmC = CRISPRoff − Unedited (mc>0.7, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_D6_mc07)), dif_D6_mc07,
       snap=False, alpha=0.7, color="steelblue",
       label="ΔmC = CRISPRoff − Unedited (mc>0.7, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
CG_num_surrounds_001

In [None]:
CGs_D6_CRoff_mc07.shape

In [None]:
CGs_D6_CRoff_mc07 # replace nan with 0
CGs_D6_CRoff_mc07_no_nan = np.nan_to_num(CGs_D6_CRoff_mc07, nan=0.0)
CGs_D6_CRoff_mc07_no_nan

In [None]:
CGs_D6_CRoff_mc07_no_nan_counts =  CGs_D6_CRoff_mc07_no_nan.sum( axis=0)
CGs_D6_CRoff_mc07_no_nan_counts.shape, CGs_D6_CRoff_mc07_no_nan_counts

In [None]:
CGs_D6_CRoff_mc07_by_CG_num_surrounds_001 = CGs_D6_CRoff_mc07_no_nan_counts / CG_num_surrounds_001
CGs_D6_CRoff_mc07_by_CG_num_surrounds_001

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(CGs_D6_CRoff_mc07_by_CG_num_surrounds_001)), CGs_D6_CRoff_mc07_by_CG_num_surrounds_001,
       snap=False, alpha=0.7, color="steelblue",
       label="ΔmC = CRISPRoff − Unedited (mc>0.7, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
# CG_num_surrounds_001 / max(CG_num_surrounds_001)
CGs_D6_CRoff_mc07_by_CG_num_surrounds_norm01 = CGs_D6_CRoff_mc07 / max(CG_num_surrounds)
CGs_D6_CRoff_mc07_by_CG_num_surrounds_norm01

In [None]:
# Extract CG surrounding counts in the same order as CGs_D6_CRoff_mc07_fracs
CG_surrounding_counts = np.array(list(surrounding.values()))

# Normalize CGs_D6_CRoff_mc07_fracs by surrounding CpG counts
CGs_D6_CRoff_mc07_fracs_normalized = CGs_D6_CRoff_mc07_fracs / (CG_surrounding_counts + 1)

In [None]:
(CG_surrounding_counts)/ max(CG_surrounding_counts)

In [None]:
CG_surrounding_counts

In [None]:
max(CG_surrounding_counts)

In [None]:
# Extract CG surrounding counts in the same order as CGs_D6_CRoff_mc07_fracs
CG_surrounding_counts = np.array(list(surrounding.values()))

# Normalize CGs_D6_CRoff_mc07_fracs by surrounding CpG counts
CGs_D6_CRoff_mc07_fracs_normalized = CGs_D6_CRoff_mc07_fracs * (CG_surrounding_counts)/ max(CG_surrounding_counts)


In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs_normalized)), CGs_D6_CRoff_mc07_fracs_normalized,
       snap=False, alpha=0.7, color="steelblue",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs_normalized (mc>0.7, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs)), CGs_D6_CRoff_mc07_fracs,
       snap=False, alpha=0.7, color="steelblue",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs (mc>0.7, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
N

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs)), CGs_D6_CRoff_mc07_fracs,
       snap=False, alpha=0.3, color="steelblue",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs (mc>0.7, Day 6)")

ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs_normalized)), CGs_D6_CRoff_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="red",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs_normalized (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:

# Normalize CGs_D6_CRoff_mc07_fracs by surrounding CpG counts
CGs_D6_unedited_mc07_fracs_normalized = CGs_D6_unedited_mc07_fracs * (CG_surrounding_counts)/ max(CG_surrounding_counts)
CGs_D6_unedited_mc07_fracs_normalized

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
# ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs)), CGs_D6_unedited_mc07_fracs,
#        snap=False, alpha=0.3, color="steelblue",
#        label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs (mc>0.7, Day 6)")

ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs_normalized)), CGs_D6_unedited_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="red",
       label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs_normalized (mc>0.7, Day 6)")


ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs_normalized)), CGs_D6_CRoff_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="green",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs_normalized (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
# ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs)), CGs_D6_unedited_mc07_fracs,
#        snap=False, alpha=0.3, color="steelblue",
#        label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs (mc>0.7, Day 6)")

ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs_normalized)), CGs_D6_unedited_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="red",
       label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs_normalized (mc>0.7, Day 6)")



ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs_normalized)), CGs_D6_CRoff_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="green",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs_normalized (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs)), CGs_D6_unedited_mc07_fracs,
       snap=False, alpha=0.3, color="steelblue",
       label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs (mc>0.7, Day 6)")

ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs)), CGs_D6_CRoff_mc07_fracs,
       snap=False, alpha=0.3, color="green",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
dif_CGs_D6_unedited_CRof = CGs_D6_unedited_mc07_fracs - CGs_D6_CRoff_mc07_fracs
dif_CGs_D6_unedited_CRof

In [None]:
dif_CGs_D6_unedited_CRof_normalized = CGs_D6_unedited_mc07_fracs_normalized - CGs_D6_CRoff_mc07_fracs_normalized
dif_CGs_D6_unedited_CRof_normalized

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_CGs_D6_unedited_CRof_normalized)), dif_CGs_D6_unedited_CRof_normalized,
       snap=False, alpha=0.3, color="steelblue",
       label="ΔmC =  dif_CGs_D6_unedited_CRof_normalized (mc>0.7, Day 6)")

ax.bar(np.arange(len(dif_CGs_D6_unedited_CRof)), dif_CGs_D6_unedited_CRof,
       snap=False, alpha=0.3, color="green",
       label="ΔmC =  dif_CGs_D6_unedited_CRof (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
dif_dif_norm_orig = dif_CGs_D6_unedited_CRof_normalized - dif_CGs_D6_unedited_CRof
dif_dif_norm_orig

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_dif_norm_orig)), dif_dif_norm_orig,
       snap=False, alpha=0.3, color="steelblue",
       label="ΔmC =  dif_dif_norm_orig (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
CGs_D6_CRoff_mc07_fracs 

In [None]:

seq_str = ''.join(ref_seq_list)
CG_pair_idx = [i for i in range(len(seq_str) - 1) if seq_str[i] == 'C' and seq_str[i + 1] == 'G']
# print("CG Pair Indices:", CG_pair_idx)
# Calculate the genomic coordinates of the CGs
CG_coordinates = [(region_start + idx) for idx in CG_pair_idx]

In [None]:
CG_coordinates

In [None]:
def plot_fracs(fracs_dict, title="Methylation Fractions",
               figsize=(16, 5), alpha=0.6, colors=None):
    """Bar plot comparing methylation fractions across conditions.

    Parameters
    ----------
    fracs_dict : dict[str -> np.ndarray]
        {condition_label: fracs_array}  — plotted in insertion order.
    title : str
    figsize : tuple
    alpha : float
    colors : list of str, optional
    """
    default_colors = ["deepskyblue", "navy", "tomato", "darkred",
                      "mediumseagreen", "darkgreen"]
    colors = colors or default_colors
    n_cpgs = next(iter(fracs_dict.values())).shape[0]
    x = np.arange(n_cpgs)

    fig, ax = plt.subplots(figsize=figsize)
    for i, (label, fracs) in enumerate(fracs_dict.items()):
        ax.bar(x, fracs, snap=False, alpha=alpha,
               label=label, color=colors[i % len(colors)])
    ax.set_xlabel("CpG Index")
    ax.set_ylabel("Methylation Fraction")
    ax.set_title(title)
    ax.legend()
    
    plt.tight_layout()
    plt.show()

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
# ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs)), CGs_D6_unedited_mc07_fracs,
#        snap=False, alpha=0.3, color="steelblue",
#        label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs (mc>0.7, Day 6)")

ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs_normalized)), CGs_D6_unedited_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="red",
       label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs_normalized (mc>0.7, Day 6)")



ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs_normalized)), CGs_D6_CRoff_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="green",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs_normalized (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:

# Delta mC: Day 6 (mc0.7)
fig, ax = plt.subplots(figsize=(16, 5))
# ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs)), CGs_D6_unedited_mc07_fracs,
#        snap=False, alpha=0.3, color="steelblue",
#        label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs (mc>0.7, Day 6)")

ax.bar(np.arange(len(CGs_D6_unedited_mc07_fracs_normalized)), CGs_D6_unedited_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="red",
       label="ΔmC = CRISPRoff CGs_D6_unedited_mc07_fracs_normalized (mc>0.7, Day 6)")



ax.bar(np.arange(len(CGs_D6_CRoff_mc07_fracs_normalized)), CGs_D6_CRoff_mc07_fracs_normalized,
       snap=False, alpha=0.3, color="green",
       label="ΔmC = CRISPRoff CGs_D6_CRoff_mc07_fracs_normalized (mc>0.7, Day 6)")

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.7")
ax.legend()

# Set x-axis ticks to show all CpG indices
ax.set_xticks(np.arange(len(dif_D6_mc0995)))
ax.set_xticklabels(np.arange(len(dif_D6_mc0995)), rotation=90, fontsize=8)

ax.legend()
plt.tight_layout()
plt.show()


In [None]:
dif_CRof_unedit_norm = CGs_D6_CRoff_mc07_fracs_normalized - CGs_D6_unedited_mc07_fracs_normalized
dif_CRof_unedit_norm

In [None]:
# Add all CpG position ticks to the x-axis
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_CRof_unedit_norm)), dif_CRof_unedit_norm,
    snap=False, alpha=0.7, color="turquoise",
    label="ΔmC = CRISPRoff − Unedited (mc>0.995, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — NORM Differential Methylation (CRISPRoff − Unedited), mC threshold 0.995")

# Set x-axis ticks to show all CpG indices
ax.set_xticks(np.arange(len(dif_CRof_unedit_norm)))
ax.set_xticklabels(np.arange(len(dif_CRof_unedit_norm)), rotation=90, fontsize=8)

ax.legend()
plt.tight_layout()
plt.show()

In [None]:
# Add all CpG position ticks to the x-axis
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_D6_mc0995)), dif_D6_mc0995,
    snap=False, alpha=0.7, color="turquoise",
    label="ΔmC = CRISPRoff − Unedited (mc>0.995, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.995")

# Set x-axis ticks to show all CpG indices
ax.set_xticks(np.arange(len(dif_D6_mc0995)))
ax.set_xticklabels(np.arange(len(dif_D6_mc0995)), rotation=90, fontsize=8)

ax.legend()
plt.tight_layout()
plt.show()

In [None]:
def plot_fracs(fracs_dict, title="Methylation Fractions",
               figsize=(16, 5), alpha=0.6, colors=None):
    """Bar plot comparing methylation fractions across conditions.

    Parameters
    ----------
    fracs_dict : dict[str -> np.ndarray]
        {condition_label: fracs_array}  — plotted in insertion order.
    title : str
    figsize : tuple
    alpha : float
    colors : list of str, optional
    """
    default_colors = ["deepskyblue", "navy", "tomato", "darkred",
                      "mediumseagreen", "darkgreen"]
    colors = colors or default_colors
    n_cpgs = next(iter(fracs_dict.values())).shape[0]
    x = np.arange(n_cpgs)

    fig, ax = plt.subplots(figsize=figsize)
    for i, (label, fracs) in enumerate(fracs_dict.items()):
        ax.bar(x, fracs, snap=False, alpha=alpha,
               label=label, color=colors[i % len(colors)])
    ax.set_xlabel("CpG Index")
    ax.set_ylabel("Methylation Fraction")
    ax.set_title(title)


    # Set x-axis ticks to show all CpG indices
    ax.set_xticks(np.arange(n_cpgs))
    ax.set_xticklabels(np.arange(n_cpgs), rotation=90, fontsize=8)

    ax.legend()
    plt.tight_layout()
    plt.show()


# Day 6: CRoff vs Unedited, mc0.7
plot_fracs(
    {"CRISPRoff (mc>0.7)": CGs_D6_CRoff_mc07_fracs,
     "Unedited  (mc>0.7)": CGs_D6_unedited_mc07_fracs},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.7)",
    colors=["skyblue", "royalblue"])

In [None]:
fig, ax = plt.subplots(figsize=(18, 6))

# Use genomic coordinates for x-axis instead of CpG indices
ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
    color="turquoise", edgecolor='none', width=50)

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

# Format x-axis with readable genomic coordinates
ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:
# Add all CpG position ticks to the x-axis
fig, ax = plt.subplots(figsize=(16, 5))
ax.bar(np.arange(len(dif_D6_mc07)), dif_D6_mc07,
    snap=False, alpha=0.7, color="turquoise",
    label="ΔmC = CRISPRoff − Unedited (mc>0.7, Day 6)")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("CpG Index")
ax.set_ylabel("ΔmC")
ax.set_title("Day 6 — Differential Methylation (CRISPRoff − Unedited), mC threshold 0.995")

# Set x-axis ticks to show all CpG indices
ax.set_xticks(np.arange(len(dif_D6_mc0995)))
ax.set_xticklabels(np.arange(len(dif_D6_mc0995)), rotation=90, fontsize=8)

ax.legend()
plt.tight_layout()
plt.show()

In [None]:
dif_D6_mc07

In [None]:
# Create a comprehensive plot of CpG methylation mapped to genomic coordinates
fig, ax = plt.subplots(figsize=(18, 6))

# Use genomic coordinates for x-axis instead of CpG indices
ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
    color="turquoise", edgecolor='none', width=50)

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

# Format x-axis with readable genomic coordinates
ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:
# Create a comprehensive plot of CpG methylation mapped to genomic coordinates
fig, ax = plt.subplots(figsize=(18, 6))

# Use genomic coordinates for x-axis instead of CpG indices
ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
    color="turquoise", edgecolor='none', width=50)

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

# Format x-axis with readable genomic coordinates
ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:
# Add CpG index labels to each bar in the plot
fig, ax = plt.subplots(figsize=(18, 6))

ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
    color="turquoise", edgecolor='none', width=50)

# Add CpG index labels on top of each bar
for idx, (coord, value) in enumerate(zip(CG_coordinates, dif_D6_mc0995)):
    ax.text(coord, value + 0.01 if value > 0 else value - 0.01, 
            str(idx), ha='center', va='bottom' if value > 0 else 'top',
            fontsize=7, rotation=0)

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:
# Create a comprehensive plot of CpG methylation mapped to genomic coordinates
fig, ax = plt.subplots(figsize=(18, 6))

# Use genomic coordinates for x-axis instead of CpG indices
ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
    color="turquoise", edgecolor='none', width=10)

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

# Format x-axis with readable genomic coordinates
ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:
# Create a comprehensive plot of CpG methylation mapped to genomic coordinates
fig, ax = plt.subplots(figsize=(22, 6))

# Use genomic coordinates for x-axis instead of CpG indices
ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
    color="turquoise", edgecolor='none', width=10)


# Add CpG index labels on top of each bar
for idx, (coord, value) in enumerate(zip(CG_coordinates, dif_D6_mc0995)):
    ax.text(coord, value + 0.01 if value > 0 else value - 0.01, 
            str(idx), ha='center', va='bottom' if value > 0 else 'top',
            fontsize=5, rotation=0)


ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

# Format x-axis with readable genomic coordinates
ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:
import numpy as np

# Zoom plot: CpG units 16..129 (inclusive)
start_idx, end_idx = 16, 130
sel_coords = CG_coordinates[start_idx:end_idx+1]
sel_vals = dif_D6_mc0995[start_idx:end_idx+1]

# compute a reasonable bar width from local coordinate spacing
if len(sel_coords) > 1:
    median_spacing = int(np.median(np.diff(sel_coords)))
    bar_width = max(10, int(median_spacing * 0.6))
else:
    bar_width = 50

fig, ax = plt.subplots(figsize=(14, 5))
ax.bar(sel_coords, sel_vals, width=bar_width, color="turquoise", alpha=0.85, edgecolor="none")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")

ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Zoom: CpG units {start_idx}–{end_idx} ({REGION_CHR}:{sel_coords[0]}-{sel_coords[-1]})")

# Add CpG index labels on top of each bar
for idx, (coord, value) in enumerate(zip(sel_coords, sel_vals), start=start_idx):
    ax.text(coord, value + 0.01 if value > 0 else value - 0.01, 
            str(idx), ha='center', va='bottom' if value > 0 else 'top',
            fontsize=3.5, rotation=0) 

# format x-axis and show fewer ticks to avoid crowding
ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=8))
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
import numpy as np

# Zoom plot: CpG units 16..129 (inclusive)
start_idx, end_idx = 17, 130
sel_coords = CG_coordinates[start_idx:end_idx+1]
sel_vals = dif_D6_mc0995[start_idx:end_idx+1]

# compute a reasonable bar width from local coordinate spacing
if len(sel_coords) > 1:
    median_spacing = int(np.median(np.diff(sel_coords)))
    bar_width = max(10, int(median_spacing * 0.6))
else:
    bar_width = 50

fig, ax = plt.subplots(figsize=(16, 4))
ax.bar(sel_coords, sel_vals, width=bar_width, color="turquoise", alpha=0.85, edgecolor="none")
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")

ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Zoom: CpG units {start_idx}–{end_idx} ({REGION_CHR}:{sel_coords[0]}-{sel_coords[-1]})")

# Add CpG index labels on top of each bar
for idx, (coord, value) in enumerate(zip(sel_coords, sel_vals), start=start_idx):
    ax.text(coord, value + 0.01 if value > 0 else value - 0.01, 
            str(idx), ha='center', va='bottom' if value > 0 else 'top',
            fontsize=5, rotation=0) 

# format x-axis and show fewer ticks to avoid crowding
ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=8))
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Add CpG index labels to each bar in the plot
fig, ax = plt.subplots(figsize=(18, 6))

ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
    color="turquoise", edgecolor='none', width=50)

# Add CpG index labels on top of each bar
for idx, (coord, value) in enumerate(zip(CG_coordinates, dif_D6_mc0995)):
    ax.text(coord, value + 0.01 if value > 0 else value - 0.01, 
            str(idx), ha='center', va='bottom' if value > 0 else 'top',
            fontsize=7, rotation=0)

ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:


# Day 6: CRoff vs Unedited, mc0.7
plot_fracs(
    {"CRISPRoff (mc>0.7)": CGs_D6_CRoff_mc07_fracs,
     "Unedited  (mc>0.7)": CGs_D6_unedited_mc07_fracs},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.7)",
    colors=["skyblue", "royalblue"])

In [None]:
# Bigger genomic ΔmC plot
fig, ax = plt.subplots(figsize=(24, 8))
ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.85,
    color="turquoise", edgecolor='none', width=200)
ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

ax.ticklabel_format(style='plain', axis='x')
ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()

In [None]:
cpg_df = pd.DataFrame({
    'CpG_Index': list(range(len(surrounding))),
    'Genomic_Coordinate': CG_coordinates,
    'Surrounding_CpGs_N50': list(surrounding.values())
})

print(cpg_df)


In [None]:
def plot_fracs_CG_coordinates(fracs_dict, title="Methylation Fractions",
               figsize=(16, 5), alpha=0.6, colors=None):
    """Bar plot comparing methylation fractions across conditions.

    Parameters
    ----------
    fracs_dict : dict[str -> np.ndarray]
        {condition_label: fracs_array}  — plotted in insertion order.
    title : str
    figsize : tuple
    alpha : float
    colors : list of str, optional
    """
    default_colors = ["deepskyblue", "navy", "tomato", "darkred",
                      "mediumseagreen", "darkgreen"]
    colors = colors or default_colors
    n_cpgs = next(iter(fracs_dict.values())).shape[0]
    x = np.arange(n_cpgs)

    fig, ax = plt.subplots(figsize=figsize)
    for i, (label, fracs) in enumerate(fracs_dict.items()):
        ax.bar(CG_coordinates, fracs, snap=False, alpha=alpha,
               label=label, color=colors[i % len(colors)])
        
    
        # # Use genomic coordinates for x-axis instead of CpG indices
        # ax.bar(CG_coordinates, dif_D6_mc0995, snap=False, alpha=0.7, 
        #     color="turquoise", edgecolor='none', width=50)

        # ax.axhline(0, color="black", linewidth=0.8, linestyle="--")
        # ax.set_xlabel("Genomic Coordinate (T2T v2.0)")
        # ax.set_ylabel("ΔmC (CRISPRoff − Unedited)")
        # ax.set_title(f"Day 6 Differential Methylation across {REGION_CHR}:{region_start}-{region_end}")

        # # Format x-axis with readable genomic coordinates
        # ax.ticklabel_format(style='plain', axis='x')
        # ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))


    ax.set_xlabel("CpG Index")
    ax.set_ylabel("Methylation Fraction")
    ax.set_title(title)


    # Set x-axis ticks to show all CpG indices
    ax.set_xticks(np.arange(n_cpgs))
    ax.set_xticklabels(np.arange(n_cpgs), rotation=90, fontsize=8)

    ax.legend()
    plt.tight_layout()
    plt.show()


# Day 6: CRoff vs Unedited, mc0.7
plot_fracs_CG_coordinates(
    {"CRISPRoff (mc>0.7)": CGs_D6_CRoff_mc07_fracs,
     "Unedited  (mc>0.7)": CGs_D6_unedited_mc07_fracs},
    title="Day 6 — CD55 CpG Methylation Fractions (mC threshold 0.7)",
    colors=["skyblue", "royalblue"])