In [None]:
import pandas as pd
train_df = pd.read_csv("/kaggle/input/physionet-ecg-image-digitization/train.csv")
test_df = pd.read_csv("/kaggle/input/physionet-ecg-image-digitization/test.csv")

print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)
train_df.head()


In [None]:
import cv2
import matplotlib.pyplot as plt
import os

sample_id = "735384893"
sample_path = f"/kaggle/input/physionet-ecg-image-digitization/train/{sample_id}"

# Get the first image file
img_files = sorted([f for f in os.listdir(sample_path) if f.endswith(".png")])
print("Found image files:", img_files)

# Load the first image
img_path = os.path.join(sample_path, img_files[0])
img = cv2.imread(img_path)

plt.figure(figsize=(10, 4))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title(f"ECG Image - {img_files[0]}")
plt.axis("off")
plt.show()


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

# --- 0. IMAGE LOADING ---
# NOTE: Ensure this path correctly loads the image you are currently testing.
sample_id = "735384893"
sample_path = f"/kaggle/input/physionet-ecg-image-digitization/train/{sample_id}"
img_files = sorted([f for f in os.listdir(sample_path) if f.endswith(".png")])

if img_files:
    img_path = os.path.join(sample_path, img_files[0])
    img = cv2.imread(img_path)
else:
    print("Error: No image found in the specified directory. Please check the path.")
    img = None

if img is not None:
    # =========================================================================
    # === 1. IMAGE PREPROCESSING (Aggressive Cleaning) ===
    # =========================================================================

    # Convert to grayscale and invert colors
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    inverted = cv2.bitwise_not(gray)

    # Apply binary threshold (Fixed at 150, as per your previous code)
    _, thresh = cv2.threshold(inverted, 150, 255, cv2.THRESH_BINARY)
    
    # 1. Morphological OPENING: Removes small noise specks
    kernel_open = np.ones((2, 2), np.uint8)
    cleaned = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel_open)

    # 2. CRITICAL FIX: AGGRESSIVE MORPHOLOGICAL CLOSING: Bridges gaps in the waveform
    # Kernel (9, 1) is used to aggressively connect broken segments horizontally.
    kernel_close = np.ones((9, 1), np.uint8) 
    cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel_close)

    # =========================================================================
    # === 2. VERIFICATION PLOT ===
    # =========================================================================
    plt.figure(figsize=(10, 6))
    plt.imshow(cleaned, cmap='gray')
    plt.title('Final Cleaned ECG Image (Ready for Tracing)')
    plt.show()

    print("\nImage cleaning complete. The variable 'cleaned' now holds the binary image.")
else:
    # Define cleaned as None so the next code block doesn't crash
    cleaned = None

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import re
from scipy.signal import butter, filtfilt, detrend
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

# === PATHS ===
train_path = "/kaggle/input/physionet-ecg-image-digitization/train"
test_path = "/kaggle/input/physionet-ecg-image-digitization/test"
refined_path = "/kaggle/working/final_digitized_signals"
os.makedirs(refined_path, exist_ok=True)
output_path = "/kaggle/working/submission.csv"

# === STANDARD LEADS ===
LEAD_NAMES = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF',
              'V1', 'V2', 'V3', 'V4', 'V5', 'V6']

# === CPU FILTER HELPERS ===
def butter_lowpass_filter(data, cutoff=40, fs=500, order=4):
    b, a = butter(order, cutoff / (0.5 * fs), btype='low')
    y = filtfilt(b, a, data)
    return y

def refine_signal(signal):
    signal = np.array(signal, dtype=np.float32)
    signal = np.nan_to_num(signal)
    signal = detrend(signal)
    signal = butter_lowpass_filter(signal)
    signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal) + 1e-9)
    return signal

# === IMAGE TO SIGNAL DIGITIZATION ===
def extract_signals_from_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise FileNotFoundError(image_path)
    image = cv2.bitwise_not(image)
    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    edges = cv2.Canny(image, 50, 150)

    h, w = edges.shape
    lead_h = h // 12
    leads = []
    for i in range(12):
        y1, y2 = i * lead_h, (i + 1) * lead_h
        lead_img = edges[y1:y2, :]
        signal = np.mean(lead_img, axis=0)
        signal = refine_signal(signal)
        leads.append(signal)
    return np.array(leads)

# === DIGITIZE ALL TRAIN IMAGES ===
print("‚öôÔ∏è Digitizing and refining all ECG images (CPU)...\n")
for folder in tqdm(os.listdir(train_path)):
    folder_path = os.path.join(train_path, folder)
    if not os.path.isdir(folder_path):
        continue

    png_files = [f for f in os.listdir(folder_path) if f.endswith(".png")]
    if not png_files:
        continue

    img_path = os.path.join(folder_path, png_files[0])
    out_csv = os.path.join(refined_path, f"{folder}.csv")
    if os.path.exists(out_csv):
        continue

    try:
        signals = extract_signals_from_image(img_path)
        df = pd.DataFrame(signals.T, columns=LEAD_NAMES)
        df.to_csv(out_csv, index=False)
    except Exception as e:
        print(f"‚ùå Error {folder}: {e}")

print("\n‚úÖ All ECG signals refined and digitized (CPU used)!")
print(f"üìÇ Output: {refined_path}")

# === RMS Helper ===
def rms(signal):
    signal = np.array(signal, dtype=np.float32)
    return np.sqrt(np.mean(np.square(signal)))

# === CREATE SUBMISSION ===
print("\nüß† Generating submission file...")
test_ids = []
for root, _, files in os.walk(test_path):
    for file in files:
        if file.endswith(".png"):
            test_ids.append(os.path.splitext(file)[0])
test_ids = sorted(list(set(test_ids)))

submission_rows = []
for file_id in tqdm(test_ids):
    num_match = re.findall(r"\d+", file_id)
    matched_csv = None
    for csv_file in os.listdir(refined_path):
        if any(num in csv_file for num in num_match):
            matched_csv = os.path.join(refined_path, csv_file)
            break

    if matched_csv and os.path.exists(matched_csv):
        df = pd.read_csv(matched_csv)
        for i, lead in enumerate(LEAD_NAMES):
            value = rms(df[lead])
            submission_rows.append({"id": f"{file_id}_{i}_{lead}", "value": value})
    else:
        for i, lead in enumerate(LEAD_NAMES):
            submission_rows.append({"id": f"{file_id}_{i}_{lead}", "value": 0.0})

submission = pd.DataFrame(submission_rows)
submission.to_csv(output_path, index=False)

print("\n‚úÖ submission.csv created successfully in full Kaggle format!")
print(f"üìÇ Saved to: {output_path}")
print(submission.head(12))
