<div style="border: 2px solid black; border-radius: 10px; padding: 15px; text-align: left; font-family: Arial, sans-serif; width: 80%; max-width: 700px; margin: auto;">
  <h1>üìä ECG Image to Time-Series Digitization</h1>
  
  <h4>Introduction</h4>
  <ul>
    <li>This notebook is designed for the <strong>PhysioNet ECG Digitization Competition</strong>.</li>
    <li>We aim to extract 12-lead ECG time series from ECG images (scans, photos, or printed outputs) using computer vision and deep learning models.</li>
    <li>The extracted time-series can then be used for cardiovascular diagnosis and AI-based analysis.</li>
    <li>We will preprocess ECG images, extract lead signals, build a CNN-based regression model, train it, and create predictions in the required submission format.</li>
  </ul>

  <h4>Key Features</h4>
  <ul>
    <li>Preprocessing: Grayscale conversion, contrast enhancement, grid removal, and binarization.</li>
    <li>Lead Extraction: Extraction of individual 12-lead ECG signals from images.</li>
    <li>Modeling: CNN-based regression to predict time-series from ECG images.</li>
    <li>Evaluation: Predictions aligned and smoothed to match ground-truth signals, ready for submission.</li>
  </ul>

  <h4>Reference Notebook ‚Äì EDA</h4>
  <ul>
    <li>For exploratory data analysis and insights about the ECG dataset, check out my <a href="https://www.kaggle.com/code/abdullah0a/eda-ecg-image-digitization" target="_blank">EDA notebook</a>. This notebook helps understand the data distribution, missing values, and lead signal patterns before modeling.</li>
  </ul>
</div>


In [None]:
import os, gc
import cv2
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.amp import autocast, GradScaler
from scipy.signal import butter, filtfilt, resample
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm.notebook import tqdm

In [None]:
# Config

class Config:
    batch_size = 8
    epochs = 3
    lr = 1e-4
    num_workers = 2
    img_size = (1240, 1024)
    target_length = 5000  # For training lead II
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = Config()

LEADS = ['I','II','III','aVR','aVL','aVF','V1','V2','V3','V4','V5','V6']


In [None]:
# ECG Image Processor

class ECGImageProcessor:
    def __init__(self):
        # Approximate lead positions (relative)
        self.lead_positions = {
            'I': (0.1, 0.15), 'II': (0.1, 0.3), 'III': (0.1, 0.45),
            'aVR': (0.1, 0.6), 'aVL': (0.1, 0.75), 'aVF': (0.1, 0.9),
            'V1': (0.55, 0.15), 'V2': (0.55, 0.3), 'V3': (0.55, 0.45),
            'V4': (0.55, 0.6), 'V5': (0.55, 0.75), 'V6': (0.55, 0.9)
        }
    
    def preprocess_image(self, image):
        """Convert to grayscale, enhance contrast, remove grid, binarize."""
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) if len(image.shape)==3 else image
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        enhanced = clahe.apply(gray)
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
        opened = cv2.morphologyEx(enhanced, cv2.MORPH_OPEN, kernel)
        _, binary = cv2.threshold(opened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return binary
    
    def extract_lead_signal(self, image, lead_name, fs=500, target_length=None):
        """Extract approximate lead signal from ROI"""
        h, w = image.shape[:2]
        x_ratio, y_ratio = self.lead_positions[lead_name]
        lead_x = int(w * x_ratio)
        lead_y = int(h * y_ratio)
        
        roi_width = int(w * 0.4)
        roi_height = int(h * 0.08)
        roi_x = max(0, lead_x - roi_width//2)
        roi_y = max(0, lead_y - roi_height//2)
        roi = image[roi_y:roi_y+roi_height, roi_x:roi_x+roi_width]
        if roi.size == 0:
            return np.zeros(target_length, dtype=np.float32)
        
        signal_y = []
        for col in range(roi.shape[1]):
            dark_pixels = np.where(roi[:,col] < 128)[0]
            signal_y.append(np.mean(dark_pixels) if len(dark_pixels)>0 else roi.shape[0]/2)
        if not signal_y:
            signal_y = [roi.shape[0]/2] * roi.shape[1]
        ecg_signal = roi_height - np.array(signal_y)
        ecg_signal = (ecg_signal - ecg_signal.mean()) / (ecg_signal.std()+1e-8)
        if target_length:
            ecg_signal = resample(ecg_signal, target_length)
        return ecg_signal.astype(np.float32)


In [None]:
#Dataset
class ECGDataset(Dataset):
    def __init__(self, df, image_dir, transform=None, is_train=True):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        self.is_train = is_train
        self.processor = ECGImageProcessor()
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        base_id = str(row['id'])
        # Try multiple images if missing
        img = None
        img_types = ['0001','0003','0004','0005'] if self.is_train else ['']
        for t in img_types:
            path = os.path.join(self.image_dir, base_id, f"{base_id}-{t}.png") if t else os.path.join(self.image_dir, f"{base_id}.png")
            if os.path.exists(path):
                img = cv2.imread(path)
                if img is not None:
                    break
        if img is None:
            img = np.ones((config.img_size[1], config.img_size[0], 3), dtype=np.uint8)*255
            print(f"Missing image for {base_id}")
        
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            img_tensor = self.transform(image=img_rgb)['image']
        else:
            img_tensor = torch.from_numpy(cv2.resize(img_rgb, config.img_size).transpose(2,0,1)).float()/255.0
        
        if self.is_train:
            # Load CSV target
            csv_path = os.path.join(self.image_dir, base_id, f"{base_id}.csv")
            try:
                sig_df = pd.read_csv(csv_path)
                target_signal = sig_df['II'].values.astype(np.float32)
                if len(target_signal) > config.target_length:
                    target_signal = target_signal[:config.target_length]
                else:
                    target_signal = np.pad(target_signal, (0, config.target_length - len(target_signal)), 'constant')
                if target_signal.std()>0:
                    target_signal = (target_signal - target_signal.mean()) / target_signal.std()
            except:
                target_signal = np.zeros(config.target_length, dtype=np.float32)
            # Extract input feature from image
            extracted_signal = self.processor.extract_lead_signal(self.processor.preprocess_image(img_rgb), 'II', target_length=config.target_length)
            return img_tensor, torch.FloatTensor(extracted_signal), torch.FloatTensor(target_signal), base_id
        else:
            return img_tensor, base_id


In [None]:
#Model

class ECGNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3,64,3,padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.Conv2d(64,64,3,padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64,128,3,padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.Conv2d(128,128,3,padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128,256,3,padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256,256,3,padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.AdaptiveAvgPool2d((4,4))
        )
        self.regressor = nn.Sequential(
            nn.Linear(256*4*4,1024), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(1024,512), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(512, config.target_length)
        )
    def forward(self,x):
        f = self.cnn(x)
        f = f.view(f.size(0),-1)
        return self.regressor(f)


In [None]:
#Loss

class ECGLoss(nn.Module):
    def __init__(self, eps=1e-8):
        super().__init__()
        self.eps = eps
    def forward(self,pred,target):
        noise = target - pred
        signal_power = torch.sum(target**2, dim=1)
        noise_power = torch.sum(noise**2, dim=1)
        snr = signal_power/(noise_power+self.eps)
        # Smoothness regularizer
        diff2 = (pred[:,2:] - 2*pred[:,1:-1] + pred[:,:-2])**2
        smooth = torch.mean(diff2)
        return -torch.mean(torch.log(snr+self.eps)) + 0.1*smooth



In [None]:
#Training
train_df = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/train.csv').head(500)  # small for demo
train_transform = A.Compose([
    A.Resize(*config.img_size),
    A.HorizontalFlip(p=0.3),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=5, p=0.3),
    A.GridDistortion(p=0.1),
    A.GaussNoise(p=0.2),
    A.RandomBrightnessContrast(p=0.3),
    A.MotionBlur(p=0.1),
    A.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
    ToTensorV2()
])
train_dataset = ECGDataset(train_df, '/kaggle/input/physionet-ecg-image-digitization/train', transform=train_transform, is_train=True)
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)

model = ECGNet()
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
model.to(config.device)

criterion = ECGLoss()
optimizer = optim.AdamW(model.parameters(), lr=config.lr, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.5)

best_loss = float('inf')
for epoch in range(config.epochs):
    model.train()
    running_loss = 0
    for images, extracted, targets, _ in tqdm(train_loader):
        images, targets = images.to(config.device), targets.to(config.device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),1.0)
        optimizer.step()
        running_loss += loss.item()
    epoch_loss = running_loss/len(train_loader)
    scheduler.step(epoch_loss)
    print(f"Epoch {epoch+1}/{config.epochs}, Loss={epoch_loss:.5f}")
    if epoch_loss<best_loss:
        best_loss=epoch_loss
        torch.save(model.state_dict(),'best_ecg_model.pth')


In [None]:
#Test & Submission

def smooth_ecg(x, fs=500, lowcut=0.5, highcut=40):
    nyq = 0.5*fs
    b,a = butter(2,[lowcut/nyq, highcut/nyq],btype='band')
    return filtfilt(b,a,x)

test_df = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/test.csv')
test_transform = A.Compose([A.Resize(*config.img_size), A.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]), ToTensorV2()])
test_dataset = ECGDataset(test_df, '/kaggle/input/physionet-ecg-image-digitization/test', transform=test_transform, is_train=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model.load_state_dict(torch.load('best_ecg_model.pth', map_location=config.device))
model.eval()

submission_data = []
processor = ECGImageProcessor()
for images, base_ids in tqdm(test_loader):
    base_id = base_ids[0]
    images = images.to(config.device)
    with torch.no_grad():
        pred = model(images).cpu().numpy().flatten()
    row = test_df[test_df['id']==int(base_id)].iloc[0]
    num_rows = row['number_of_rows']
    if len(pred) > num_rows:
        pred = pred[:num_rows]
    elif len(pred) < num_rows:
        pred = np.pad(pred, (0,num_rows-len(pred)),'edge')
    pred = smooth_ecg(pred, fs=row['fs'])
    for i in range(num_rows):
        submission_data.append({'id':f"{base_id}_{i}_{row['lead']}", 'value':float(pred[i])})

submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission.csv', index=False)
submission_df.head()

<div style="border: 2px solid #FFA500; border-radius: 10px; padding: 10px; background-color: #FFF5E6; text-align: center; font-family: Arial, sans-serif; width: 80%; max-width: 600px; margin: auto;">
  <h3 style="color: #FFA500;">üëç <strong>Enjoyed this guide?</strong></h3>
  <p style="color: #333333;">If you found this guide helpful, please consider giving it an upvote! Your support helps us continue to create valuable content and improve our resources.</p>
  <p style="font-size: 16px; color: #FF8C00;">Thank you! üòä</p>
</div>
