# Cardiovar: Variant Effect Prediction on RNA-seq

This notebook implements the workflow for analyzing the effect of genetic variants on RNA-seq levels using `alphagenome`.

## Step 1: Setup & Configuration

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
API_KEY = os.getenv("ALPHAGENOME_API_KEY")

# Configure plotting
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print(f"API Key loaded: {bool(API_KEY)}")

## Step 2: Data Loading & Processing

We define functions to fetch or simulate the $\Delta$ RNA-seq data.

In [None]:
def get_delta_rna_seq(chrom, pos, ref, alt, window_size=100):
    """
    Fetch or calculate Delta RNA-seq values for a variant.
    
    Args:
        chrom (str): Chromosome (e.g., 'chr22')
        pos (int): Position
        ref (str): Reference allele
        alt (str): Alternative allele
        window_size (int): Window size around variant to analyze
        
    Returns:
        tuple: (relative_coordinates, delta_values)
    """
    # TODO: Replace with actual alphagenome API call
    # Example: prediction = alphagenome.predict(chrom, pos, ref, alt)
    
    # --- SYNTHETIC DATA GENERATION FOR DEMO ---
    # Create relative coordinates centered at 0 (the variant)
    x = np.arange(-window_size, window_size + 1)
    
    # Generate a synthetic signal: a peak near the variant plus noise
    # Randomly decide if it's an increase or decrease
    direction = np.random.choice([-1, 1]) 
    signal = 3.5 * direction * np.exp(-0.02 * (x)**2) 
    
    # Add some random noise
    noise = np.random.normal(0, 0.3, len(x))
    
    delta_rna = signal + noise
    # ------------------------------------------
    
    return x, delta_rna

## Step 3: Visualization Logic

We implement the plotting function to visualize the genomic impact.

In [None]:
def plot_variant_effect(chrom, pos, ref, alt):
    """
    Analyze and plot the effect of a variant.
    """
    print(f"Analyzing variant: {chrom}:{pos} {ref}->{alt}...")
    
    # 1. Get Data
    rel_coords, delta_rna = get_delta_rna_seq(chrom, pos, ref, alt)
    
    # 2. Identify Top Hits (Max Absolute Change)
    top_idx = np.argmax(np.abs(delta_rna))
    top_pos = rel_coords[top_idx]
    top_val = delta_rna[top_idx]
    
    # 3. Plot
    plt.figure(figsize=(12, 6))
    
    # Main signal line
    plt.plot(rel_coords, delta_rna, label='$\\Delta$ RNA-seq', color='#2c3e50', linewidth=2)
    
    # Reference lines
    plt.axhline(0, color='gray', linestyle='--', alpha=0.5)
    plt.axvline(0, color='#e74c3c', linestyle=':', label='Variant Position (0)')
    
    # Highlight top hit
    plt.scatter(top_pos, top_val, color='#e74c3c', s=100, zorder=5, label=f'Max Effect ({top_val:.2f})')
    
    # Annotation
    plt.annotate(f'Pos: {top_pos} bp\nVal: {top_val:.2f}',
                 (top_pos, top_val),
                 xytext=(15, 15), textcoords='offset points',
                 bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="#e74c3c", alpha=0.8),
                 arrowprops=dict(arrowstyle='->', connectionstyle="arc3,rad=.2", color='#e74c3c'))
    
    # Styling
    plt.title(f"Variant Effect Prediction: {chrom}:{pos} {ref}â†’{alt}", fontsize=14, pad=15)
    plt.xlabel("Relative Genomic Coordinate (bp)", fontsize=12)
    plt.ylabel("$\\Delta$ RNA-seq Level", fontsize=12)
    plt.legend(loc='upper right')
    plt.tight_layout()
    
    plt.show()

## Step 4: Test End-to-End

Testing with a valid variant example.

In [None]:
# Test Case: Valid variant (chr22, 36191400 A->C)
plot_variant_effect("chr22", 36191400, "A", "C")

In [None]:
# Test Case: Another variant to show variability
plot_variant_effect("chr1", 12345678, "G", "T")