In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import fitz  # PyMuPDF

# STEP 1: Convert ECG PDF to high-res image using PyMuPDF
def pdf_to_image_fitz(pdf_path, output_image_path, zoom=4):
    doc = fitz.open(pdf_path)
    page = doc.load_page(0)
    mat = fitz.Matrix(zoom, zoom)
    pix = page.get_pixmap(matrix=mat)
    pix.save(output_image_path)
    return output_image_path

# STEP 2: Plot and save grid overlay for visual inspection
def plot_grid_overlay(image_path):
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig, ax = plt.subplots(figsize=(15, 18))
    ax.imshow(img_rgb)

    # Horizontal lines (Y-axis every 20px from 600 to 800)
    for y in range(600, 3081, 20):
        ax.axhline(y, color='orange', linestyle='--', linewidth=0.8)
        ax.text(110, y, f'{y}', color='orange', fontsize=7, verticalalignment='bottom')

    # Vertical lines (X-axis every 50px from 0 to 100)
    for x in range(0, 101, 50):
        ax.axvline(x, color='blue', linestyle='--', linewidth=0.8)
        ax.text(x, 590, f'{x}', color='blue', fontsize=9, rotation=90)

    ax.set_title("Grid Overlay: Horizontal Lines (Y=600–800), Vertical Lines (X=0–100)")
    grid_path = os.path.join(os.path.dirname(image_path), "grid_overlay_preview.png")
    plt.savefig(grid_path)
    plt.close()
    print(f"Grid overlay saved to: {grid_path}")
# STEP 3: Crop ECG leads using precise vertical ranges (no fixed division)
def crop_12_leads_precise(image_path, output_folder):
    img = cv2.imread(image_path)
    os.makedirs(output_folder, exist_ok=True)
    img_height, img_width = img.shape[:2]

    # Manually determined Y ranges for each lead (clean signal zones)
    y_ranges = [
        (660, 780), (840, 988), (990, 1120),   # I, II, III
        (1180, 1310), (1380, 1440), (1510, 1610), # aVR, aVL, aVF
        (1670, 1820), (1890, 2080), (2150,2350), # V1, V2, V3
        (2415, 2625), (2690, 2860), (2920, 3060) # V4, V5, V6
    ]
    lead_names = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF',
                  'V1', 'V2', 'V3', 'V4', 'V5', 'V6']

    lead_paths = []
    for (y1, y2), lead_name in zip(y_ranges, lead_names):
        lead_img = img[y1:y2, 93:]  # full width
        path = os.path.join(output_folder, f"lead_{lead_name}.png")
        cv2.imwrite(path, lead_img)
        lead_paths.append((lead_name, path))

    return lead_paths

# STEP 4: Estimate pixels per mm using image size and printed scale
def estimate_pixel_scale(image_path, duration_sec=10, mm_per_mv=10, mv_span=2.0):
    img = cv2.imread(image_path)
    h, w = img.shape[:2]
    total_mm_x = duration_sec * 25
    total_mm_y = mv_span * mm_per_mv
    pixels_per_mm_x = w / total_mm_x
    pixels_per_mm_y = h / total_mm_y
    print(f"Image size: {w}px × {h}px")
    print(f"Pixels/mm → X: {pixels_per_mm_x:.2f}, Y: {pixels_per_mm_y:.2f}")
    return pixels_per_mm_x, pixels_per_mm_y

# STEP 5: Extract ECG waveform (time in s, amplitude in mV)
def extract_signal(image_path, pixels_per_mm_x, pixels_per_mm_y, mm_per_mv=10, mm_per_sec=25):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(255 - gray, 180, 255, cv2.THRESH_BINARY)
    h, w = binary.shape
    signal_y = []
    for x in range(w):
        col = binary[:, x]
        white_pixels = np.where(col == 255)[0]
        signal_y.append(np.median(white_pixels) if white_pixels.size > 0 else np.nan)
    signal_y = np.array(signal_y)
    valid = ~np.isnan(signal_y)
    pixel_x = np.arange(w)[valid]
    signal_y = signal_y[valid]
    mm_x = pixel_x / pixels_per_mm_x
    time_sec = mm_x / mm_per_sec
    baseline_px = np.median(signal_y)
    mm_y = -(signal_y - baseline_px) / pixels_per_mm_y
    mv = mm_y * mm_per_mv
    return time_sec, mv

# STEP 6: Save extracted data and plot
def save_csv_and_plot(time, mv, lead_name, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    csv_path = os.path.join(output_folder, f"lead_{lead_name}_data.csv")
    with open(csv_path, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Time (s)", "Amplitude (mV)"])
        writer.writerows(zip(time, mv))
    plt.figure(figsize=(10, 3))
    plt.plot(time, mv, label=f"Lead {lead_name}")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude (mV)")
    plt.title(f"ECG Lead {lead_name}")
    plt.grid(True)
    plt.tight_layout()
    plot_path = os.path.join(output_folder, f"lead_{lead_name}_plot.png")
    plt.savefig(plot_path)
    plt.close()
    return csv_path, plot_path

# MAIN PROCESSING FUNCTION
def process_ecg_pdf_precise(pdf_path, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    image_path = os.path.join(output_dir, "ecg_image.png")
    pdf_to_image_fitz(pdf_path, image_path)

    leads_dir = os.path.join(output_dir, "leads")
    results_dir = os.path.join(output_dir, "results")

    lead_images = crop_12_leads_precise(image_path, leads_dir)
    pixels_per_mm_x, pixels_per_mm_y = estimate_pixel_scale(lead_images[0][1])

    summary = []
    for lead_name, lead_path in lead_images:
        time, mv = extract_signal(lead_path, pixels_per_mm_x, pixels_per_mm_y)
        csv_file, plot_file = save_csv_and_plot(time, mv, lead_name, results_dir)
        summary.append((lead_name, csv_file, plot_file))
    return summary

# EXECUTION BLOCK
if __name__ == "__main__":
    pdf_input_path = "20250620-162409-5.pdf"
    output_dir = r"D:\iisc\Project\ECG_2"
    os.makedirs(output_dir, exist_ok=True)
     # Step 1: Convert PDF to image
    image_path = os.path.join(output_dir, "ecg_image.png")
    pdf_to_image_fitz(pdf_input_path, image_path)

    # Step 2: Generate and save grid overlay
    plot_grid_overlay(image_path)
    result = process_ecg_pdf_precise(pdf_input_path, output_dir)
    df_summary = pd.DataFrame(result, columns=["Lead", "CSV File", "Plot File"])
    print(df_summary)


Grid overlay saved to: D:\iisc\Project\ECG_2\grid_overlay_preview.png
Image size: 2171px × 120px
Pixels/mm → X: 8.68, Y: 6.00
   Lead                                         CSV File  \
0     I    D:\iisc\Project\ECG_2\results\lead_I_data.csv   
1    II   D:\iisc\Project\ECG_2\results\lead_II_data.csv   
2   III  D:\iisc\Project\ECG_2\results\lead_III_data.csv   
3   aVR  D:\iisc\Project\ECG_2\results\lead_aVR_data.csv   
4   aVL  D:\iisc\Project\ECG_2\results\lead_aVL_data.csv   
5   aVF  D:\iisc\Project\ECG_2\results\lead_aVF_data.csv   
6    V1   D:\iisc\Project\ECG_2\results\lead_V1_data.csv   
7    V2   D:\iisc\Project\ECG_2\results\lead_V2_data.csv   
8    V3   D:\iisc\Project\ECG_2\results\lead_V3_data.csv   
9    V4   D:\iisc\Project\ECG_2\results\lead_V4_data.csv   
10   V5   D:\iisc\Project\ECG_2\results\lead_V5_data.csv   
11   V6   D:\iisc\Project\ECG_2\results\lead_V6_data.csv   

                                          Plot File  
0     D:\iisc\Project\ECG_2\results\lea