<a href="https://colab.research.google.com/github/QntmSeer/CardioVar/blob/main/app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# # CardioVar: RNA-seq–based Regulatory Variant Analysis for CVD

This notebook demonstrates how to query AlphaGenome for predicted RNA-seq impact of a variant in heart tissue, visualize differential expression, identify top-shift regions, and annotate with genes.

# Step 0: Fix torch version to avoid Colab conflict
!pip uninstall -y torch
!pip install torch==2.0.1

# Step 1: Install dependencies
!pip install alphagenome numpy==1.24.4 matplotlib seaborn pandas pyensembl
import os
os.kill(os.getpid(), 9)  # restart runtime after installation

# Step 2: Imports and setup
from alphagenome.data import genome
from alphagenome.models import dna_client
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pyensembl import EnsemblRelease

# Step 3: AlphaGenome API + prediction
API_KEY = 'YOUR_API_KEY'
model = dna_client.create(API_KEY)

chrom = 'chr22'
position = 36191400
ref = 'A'
alt = 'C'
interval = genome.Interval(chromosome=chrom, start=35677410, end=36725986)
variant = genome.Variant(chromosome=chrom, position=position, reference_bases=ref, alternate_bases=alt)

outputs = model.predict_variant(
    interval=interval,
    variant=variant,
    ontology_terms=['UBERON:0000948'],
    requested_outputs=[dna_client.OutputType.RNA_SEQ]
)
print(outputs)

# Step 4: Compute and visualize Δ RNA-seq
ref_vals = outputs.reference.rna_seq.values
alt_vals = outputs.alternate.rna_seq.values
diff_vals = alt_vals - ref_vals
meta = outputs.reference.rna_seq.metadata
positions = np.arange(ref_vals.shape[0])

sns.set_style("white")
plt.figure(figsize=(12,5))
sns.lineplot(x=positions, y=diff_vals.mean(axis=1),
             errorbar=None, color="steelblue")
plt.axhline(0, color='gray', linestyle='--', linewidth=0.6)
plt.title("Average Δ RNA‑seq (Alt – Ref) across assays")
plt.xlabel("Relative Position")
plt.ylabel("Δ RNA‑seq")
sns.despine()
plt.show()

# Step 5: Identify top-shift positions
df = pd.DataFrame(diff_vals, columns=meta["Assay title"].tolist())
df["Abs Delta"] = df.abs().mean(axis=1)
pos_rank = df["Abs Delta"].nlargest(10).reset_index().rename(columns={"index":"Position"})
pos_rank["Genomic Position"] = pos_rank["Position"] + interval.start
pos_rank

# Step 6: Annotate genes using pyensembl
data = EnsemblRelease(105)
data.download(); data.index()

for pos in pos_rank["Genomic Position"]:
    genes = data.genes_at_locus(contig=chrom.replace('chr',''), position=int(pos))
    print(f"[chr22:{pos}] ->", [g.gene_name for g in genes])

# Step 7: Summary report
print("Variant:", chrom, position, ref, "→", alt)
print("\nTop Regions and Gene Hits:")
print(pos_rank[["Genomic Position","Abs Delta"]])