Forked from:
https://www.kaggle.com/code/ambrosm/ecg-original-explained-baseline  
Many thanks for the great base notebook.

In `convert_scanned_color`, I replaced the first and last `n_timesteps / 125` samples with their neighboring values to reduce edge artifacts caused by dark markings near the signal boundaries.

# Explained baseline for the *PhysioNet - Digitization of ECG Images* competition

This notebook contributes two innovations to the competition:
1. It shows how to find the markers in a scanned ECG (object detection in class `MarkerFinder`).
1. It visualizes how to extract the time series from images of types 3 and 11 (top-down plane sweep function `convert_scanned_color()`).

Why did I choose types 3 and 11? For two reasons: First, the scanned images have higher quality than the mobile photos, and they always have the same scale (80 pixels per mV). Second, the color makes it easy to distinguish the black ECG lines from the red gridlines.

## Training data overview

There are 977 electrocardiograms (ids) in train, accounting for 84 GByte. Every electrocardiogram has 9 PNG files and one CSV file.

There are nine image types per ECG:
- 0001 Original color ECG image generated by ECG-image-kit.
- 0003 Image printed in color and scanned in color. → processed by `convert_scanned_color()`
- 0004 Image printed in color and scanned in black and white.
- 0005 Mobile photos of color printed images.
- 0006 Mobile photos of ECGs on the screen of laptop.
- 0009 Mobile photos of stained and soaked printed ECGs.
- 0010 Mobile photos of printed ECGs with extensive damage.
- 0011 Scans of printed ECG images with mold in color. → processed by `convert_scanned_color()`
- 0012 Scans of printed ECG images with mold in black and white.

The sampling frequencies in train are 250, 256, 500, 512, 1000, 1025 per second.


In [None]:
import pandas as pd
import numpy as np
import cv2
from glob import glob
import matplotlib.pyplot as plt
from collections import defaultdict
from tqdm import tqdm

In [None]:
# Competition metric
# From https://www.kaggle.com/code/metric/physionet-ecg-signal-extraction-metric
from typing import Tuple

import numpy as np
import pandas as pd

import scipy.optimize
import scipy.signal


LEADS = ['I', 'II', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
MAX_TIME_SHIFT = 0.2
PERFECT_SCORE = 384


class ParticipantVisibleError(Exception):
    pass


def compute_power(label: np.ndarray, prediction: np.ndarray) -> Tuple[float, float]:
    if label.ndim != 1 or prediction.ndim != 1:
        raise ParticipantVisibleError('Inputs must be 1-dimensional arrays.')
    finite_mask = np.isfinite(prediction)
    if not np.any(finite_mask):
        raise ParticipantVisibleError("The 'prediction' array contains no finite values (all NaN or inf).")

    prediction[~np.isfinite(prediction)] = 0
    noise = label - prediction
    p_signal = np.sum(label**2)
    p_noise = np.sum(noise**2)
    return p_signal, p_noise


def compute_snr(signal: float, noise: float) -> float:
    if noise == 0:
        # Perfect reconstruction
        snr = PERFECT_SCORE
    elif signal == 0:
        snr = 0
    else:
        snr = min((signal / noise), PERFECT_SCORE)
    return snr


def align_signals(label: np.ndarray, pred: np.ndarray, max_shift: float = float('inf')) -> np.ndarray:
    if np.any(~np.isfinite(label)):
        raise ParticipantVisibleError('values in label should all be finite')
    if np.sum(np.isfinite(pred)) == 0:
        raise ParticipantVisibleError('prediction can not all be infinite')

    # Initialize the reference and digitized signals.
    label_arr = np.asarray(label, dtype=np.float64)
    pred_arr = np.asarray(pred, dtype=np.float64)

    label_mean = np.mean(label_arr)
    pred_mean = np.mean(pred_arr)

    label_arr_centered = label_arr - label_mean
    pred_arr_centered = pred_arr - pred_mean

    # Compute the correlation between the reference and digitized signals and locate the maximum correlation.
    correlation = scipy.signal.correlate(label_arr_centered, pred_arr_centered, mode='full')

    n_label = np.size(label_arr)
    n_pred = np.size(pred_arr)

    lags = scipy.signal.correlation_lags(n_label, n_pred, mode='full')
    valid_lags_mask = (lags >= -max_shift) & (lags <= max_shift)

    max_correlation = np.nanmax(correlation[valid_lags_mask])
    all_max_indices = np.flatnonzero(correlation == max_correlation)
    best_idx = min(all_max_indices, key=lambda i: abs(lags[i]))
    time_shift = lags[best_idx]
    start_padding_len = max(time_shift, 0)
    pred_slice_start = max(-time_shift, 0)
    pred_slice_end = min(n_label - time_shift, n_pred)
    end_padding_len = max(n_label - n_pred - time_shift, 0)
    aligned_pred = np.concatenate((np.full(start_padding_len, np.nan), pred_arr[pred_slice_start:pred_slice_end], np.full(end_padding_len, np.nan)))

    def objective_func(v_shift):
        return np.nansum((label_arr - (aligned_pred - v_shift)) ** 2)

    if np.any(np.isfinite(label_arr) & np.isfinite(aligned_pred)):
        results = scipy.optimize.minimize_scalar(objective_func, method='Brent')
        vertical_shift = results.x
        aligned_pred -= vertical_shift
    return aligned_pred


def _calculate_image_score(group: pd.DataFrame) -> float:
    """Helper function to calculate the total SNR score for a single image group."""

    unique_fs_values = group['fs'].unique()
    if len(unique_fs_values) != 1:
        raise ParticipantVisibleError('Sampling frequency should be consistent across each ecg')
    sampling_frequency = unique_fs_values[0]
    if sampling_frequency != int(len(group[group['lead'] == 'II']) / 10):
        raise ParticipantVisibleError('The sequence_length should be sampling frequency * 10s')
    sum_signal = 0
    sum_noise = 0
    for lead in LEADS:
        sub = group[group['lead'] == lead]
        label = sub['value_true'].values
        pred = sub['value_pred'].values

        aligned_pred = align_signals(label, pred, int(sampling_frequency * MAX_TIME_SHIFT))
        p_signal, p_noise = compute_power(label, aligned_pred)
        sum_signal += p_signal
        sum_noise += p_noise
    return compute_snr(sum_signal, sum_noise)


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    """
    Compute the mean Signal-to-Noise Ratio (SNR) across multiple ECG leads and images for the PhysioNet 2025 competition.
    The final score is the average of the sum of SNRs over different lines, averaged over all unique images.
    Args:
        solution: DataFrame with ground truth values. Expected columns: 'id' and one for each lead.
        submission: DataFrame with predicted values. Expected columns: 'id' and one for each lead.
        row_id_column_name: The name of the unique identifier column, typically 'id'.
    Returns:
        The final competition score.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> row_id_column_name = "id"
    >>> solution = pd.DataFrame({'id': ['343_0_I', '343_1_I', '343_2_I', '343_0_III', '343_1_III','343_2_III','343_0_aVR', '343_1_aVR','343_2_aVR',\
    '343_0_aVL', '343_1_aVL', '343_2_aVL', '343_0_aVF', '343_1_aVF','343_2_aVF','343_0_V1', '343_1_V1', '343_2_V1','343_0_V2', '343_1_V2','343_2_V2',\
    '343_0_V3', '343_1_V3', '343_2_V3','343_0_V4', '343_1_V4', '343_2_V4', '343_0_V5', '343_1_V5','343_2_V5','343_0_V6', '343_1_V6','343_2_V6',\
    '343_0_II', '343_1_II','343_2_II', '343_3_II', '343_4_II', '343_5_II','343_6_II', '343_7_II','343_8_II','343_9_II','343_10_II','343_11_II'],\
    'fs': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\
    'value':[0.1,0.3,0.4,0.6,0.6,0.4,0.2,0.3,0.4,0.5,0.2,0.7,0.2,0.3,0.4,0.8,0.6,0.7, 0.2,0.3,-0.1,0.5,0.6,0.7,0.2,0.9,0.4,0.5,0.6,0.7,0.1,0.3,0.4,\
    0.6,0.6,0.4,0.2,0.3,0.4,0.5,0.2,0.7,0.2,0.3,0.4]})
    >>> submission = solution.copy()
    >>> round(score(solution, submission, row_id_column_name), 4)
    25.8433
    >>> submission.loc[0, 'value'] = 0.9 # Introduce some noise
    >>> round(score(solution, submission, row_id_column_name), 4)
    13.6291
    >>> submission.loc[4, 'value'] = 0.3 # Introduce some noise
    >>> round(score(solution, submission, row_id_column_name), 4)
    13.0576

    >>> solution = pd.DataFrame({'id': ['343_0_I', '343_1_I', '343_2_I', '343_0_III', '343_1_III','343_2_III','343_0_aVR', '343_1_aVR','343_2_aVR',\
    '343_0_aVL', '343_1_aVL', '343_2_aVL', '343_0_aVF', '343_1_aVF','343_2_aVF','343_0_V1', '343_1_V1', '343_2_V1','343_0_V2', '343_1_V2','343_2_V2',\
    '343_0_V3', '343_1_V3', '343_2_V3','343_0_V4', '343_1_V4', '343_2_V4', '343_0_V5', '343_1_V5','343_2_V5','343_0_V6', '343_1_V6','343_2_V6',\
    '343_0_II', '343_1_II','343_2_II', '343_3_II', '343_4_II', '343_5_II','343_6_II', '343_7_II','343_8_II','343_9_II','343_10_II','343_11_II'],\
    'fs': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\
    'value':[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]})
    >>> round(score(solution, submission, row_id_column_name), 4)
    -384
    >>> submission = solution.copy()
    >>> round(score(solution, submission, row_id_column_name), 4)
    25.8433

    >>> # test alignment
    >>> label = np.array([0, 1, 2, 1, 0])
    >>> pred = np.array([0, 1, 2, 1, 0])
    >>> aligned = align_signals(label, pred)
    >>> expected_array = np.array([0, 1, 2, 1, 0])
    >>> np.allclose(aligned, expected_array, equal_nan=True)
    True

    >>> # Test 2: Vertical shift (DC offset) should be removed
    >>> label = np.array([0, 1, 2, 1, 0])
    >>> pred = np.array([10, 11, 12, 11, 10])
    >>> aligned = align_signals(label, pred)
    >>> expected_array = np.array([0, 1, 2, 1, 0])
    >>> np.allclose(aligned, expected_array, equal_nan=True)
    True

    >>> # Test 3: Time shift should be corrected
    >>> label = np.array([0, 0, 1, 2, 1, 0., 0.])
    >>> pred = np.array([1, 2, 1, 0, 0, 0, 0])
    >>> aligned = align_signals(label, pred)
    >>> expected_array = np.array([np.nan, np.nan, 1, 2, 1, 0, 0])
    >>> np.allclose(aligned, expected_array, equal_nan=True)
    True
    
    >>> # Test 4: max_shift constraint prevents optimal alignment
    >>> label = np.array([0, 0, 0, 0, 1, 2, 1]) # Peak is far
    >>> pred = np.array([1, 2, 1, 0, 0, 0, 0])
    >>> aligned = align_signals(label, pred, max_shift=10)
    >>> expected_array = np.array([ np.nan, np.nan, np.nan, np.nan, 1, 2, 1])
    >>> np.allclose(aligned, expected_array, equal_nan=True)
    True

    """
    for df in [solution, submission]:
        if row_id_column_name not in df.columns:
            raise ParticipantVisibleError(f"'{row_id_column_name}' column not found in DataFrame.")
        if df['value'].isna().any():
            raise ParticipantVisibleError('NaN exists in solution/submission')
        if not np.isfinite(df['value']).all():
            raise ParticipantVisibleError('Infinity exists in solution/submission')

    submission = submission[['id', 'value']]
    merged_df = pd.merge(solution, submission, on=row_id_column_name, suffixes=('_true', '_pred'))
    merged_df['image_id'] = merged_df[row_id_column_name].str.split('_').str[0]
    merged_df['row_id'] = merged_df[row_id_column_name].str.split('_').str[1].astype('int64')
    merged_df['lead'] = merged_df[row_id_column_name].str.split('_').str[2]
    merged_df.sort_values(by=['image_id', 'row_id', 'lead'], inplace=True)
    image_scores = merged_df.groupby('image_id').apply(_calculate_image_score, include_groups=False)
    return max(float(10 * np.log10(image_scores.mean())), -PERFECT_SCORE)

# Reading the metadata

In [None]:
train = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/train.csv')
test = pd.read_csv('/kaggle/input/physionet-ecg-image-digitization/test.csv')


# The average ECG

For regression tasks, the average of the true labels is often a good baseline. Many public notebooks just submit this baseline with a little added noise for leaderboard scores between 0.14 and 0.18. The score difference compared to a zero submission is tiny.

We compute the mean time series per lead so that we can predict this mean for those image types which our model cannot handle.

In [None]:
def fit_mean_model(train, verbose=False):
    """Compute minima, maxima and means of the time series"""
    mean_dict = defaultdict(list)
    for idx, row in tqdm(train.iterrows(), total=len(train)):
        labels = pd.read_csv(f'/kaggle/input/physionet-ecg-image-digitization/train/{row.id}/{row.id}.csv')
        for lead in labels.columns:
            values = labels[lead]
            values = values[~values.isna()]
            mean_dict[lead].append(values)
    
    for lead in mean_dict.keys():
        # Resample every time series to 20000 samples
        mean_dict[lead] = [
            np.interp(np.linspace(0, len(values)-1, 20000), np.arange(len(values)), values)
            for values in mean_dict[lead]
        ]

        # Stack all ECGs
        mean_dict[lead] = np.stack(mean_dict[lead])

        # Plot the mean ECG
        if verbose:
            m = mean_dict[lead].mean(axis=0)
            # s = mean_dict[lead].std(axis=0)
            plt.figure(figsize=(12, 2))
            plt.title(f"Mean curve for {lead}")
            plt.plot(m)
            # plt.plot(m-s/30)
            # plt.plot(m+s/30)
            plt.axhline(0, color='gray')
            plt.ylabel('mV')
            plt.show()

    return mean_dict

def validate_mean_model(val, mean_dict):
    snr_list = []
    for idx, row in tqdm(val.iterrows(), total=len(val)):
        labels = pd.read_csv(f'/kaggle/input/physionet-ecg-image-digitization/train/{row.id}/{row.id}.csv')
        # Evaluate the signal-to-noise ratio
        sum_signal = 0
        sum_noise = 0
        for lead in labels.columns:
            label = labels[lead]
            label = label[~ label.isna()]
            pred = mean_dict[lead].mean(axis=0)
            pred = np.interp(np.linspace(0, 1, len(label)), np.linspace(0, 1, len(pred)), pred)
            assert len(label) == len(pred)
    
            aligned_pred = align_signals(label, pred, int(row.fs * MAX_TIME_SHIFT))
            p_signal, p_noise = compute_power(label, aligned_pred)
            sum_signal += p_signal
            sum_noise += p_noise
    
        snr = compute_snr(sum_signal, sum_noise)
        # print(f"{idx=:4d} id={row.id} SNR: {snr:.2f}")
        snr_list.append(snr)
    
    snr = np.array(snr_list).mean()
    val_score = max(float(10 * np.log10(snr)), -PERFECT_SCORE)
    print(f"# Validation SNR for mean prediction: {snr:.2f} {val_score=:.2f}")

# Validate the mean model
train_test_split_loc = 780
mean_dict = fit_mean_model(train.iloc[:train_test_split_loc], verbose=True)
validate_mean_model(train.iloc[train_test_split_loc:], mean_dict)

# Refit the mean model to the full dataset
mean_dict = fit_mean_model(train, verbose=False)


# Finding the lead endpoints

Before decoding an image, it's good to know the coordinates of the 17 lead endpoints in the ECG. The following cell defines the class `MarkerFinder`, which determines these points. 13 points are found by the pattern matching function `cv2.matchTemplate()`; the right endpoints of the four lines are found as linear combinations of the other vectors.

In [None]:
class MarkerFinder:
    """This class finds the 13 markers in scanned ecg images and guesses the 4 line ends."""
    # From https://www.kaggle.com/code/ambrosm/ecg-original-explained-baseline
    
    def __init__(self, show_templates=False):
        # Derive the templates from type 1 images
        # np.max keeps the gridlines and markers and removes the ecg lines
        ima = np.max([
            cv2.imread('/kaggle/input/physionet-ecg-image-digitization/train/4292118763/4292118763-0001.png'),
            cv2.imread('/kaggle/input/physionet-ecg-image-digitization/train/4289880010/4289880010-0001.png'),
            cv2.imread('/kaggle/input/physionet-ecg-image-digitization/train/4284351157/4284351157-0001.png'),
        ], axis=0)

        # Template points in global coordinates of type 1 images
        absolute_points = np.zeros((17, 2), dtype=int)
        for i in range(3):
            absolute_points[5 * i] = np.array([707 + 284 * i, 118]) # y, x
            for j in range(1, 5):
                absolute_points[5 * i + j] = np.array([707 + 284 * i, 118 + 492 * j])
        absolute_points[5 * 3] = np.array([1535, 118])
        absolute_points[5 * 3 + 1] = np.array([1535, 118 + 492 * 4])

        # Top left corner of template rectangle
        template_positions = [None] * 17
        for i in range(len(absolute_points)):
            if absolute_points[i][1] < 118 + 492 * 4:
                if i % 5 == 0:
                    template_positions[i] = (absolute_points[i][0] - 87, absolute_points[i][1] - 50) # y, x
                else:
                    template_positions[i] = (absolute_points[i][0] - 37, absolute_points[i][1] - 13)

        # Height and width of the templates
        template_sizes = np.array([(105, 60)] * 17) # height, width

        # Convert the points to relative coordinates (inside the template)
        template_points = [np.array([absolute_points[i][0] - template_positions[i][0],
                                     absolute_points[i][1] - template_positions[i][1]])
                           if template_positions[i] is not None
                           else None
                           for i in range(len(absolute_points))]

        # Save the template matrices
        templates = [None] * 17
        for i in range(len(template_positions)):
            if template_points[i] is not None:
                template = (ima[template_positions[i][0]:template_positions[i][0]+template_sizes[i][0],
                            template_positions[i][1]:template_positions[i][1]+template_sizes[i][1]])
                templates[i] = template

        # Plot the template matrices
        if show_templates:
            _, axs = plt.subplots(4, 4, figsize=(5, 7))
            for i in range(len(template_positions)):
                if template_points[i] is not None:
                    template = templates[i].copy()
                    cv2.rectangle(template,
                                  (template_points[i][1]-1, template_points[i][0]-1),
                                  (template_points[i][1]+1, template_points[i][0]+1), 
                                  [255, 0, 0], 2)
                    axs[i // 5, i % 5].imshow(template)
            for i in range(13, len(axs.ravel())):
                axs.ravel()[i].axis('off')
            plt.tight_layout()
            plt.suptitle('The templates for the 13 markers', y=1.01)
            plt.show()

        self._absolute_points = absolute_points
        self._template_positions = template_positions
        self._template_sizes = template_sizes
        self._template_points = template_points
        self._templates = templates
        
    def find_markers(self, ima, warn=False, plot=False, title=''):
        """Return 17 markers as list of size-2 integer arrays (row, column)

        Parameters:
        ima: array of shape (1652, height, 3)
        """
        
        if ima.shape[0] != 1652:
            raise ValueError("Implemented only for scanned images (image types 3, 4, 11, 12)")

        markers = [None] * 17

        # Find 13 template-based markers
        for j in range(len(self._templates)):
            if self._template_points[j] is not None:
                t = self._template_positions[j][0]-100
                l = max(self._template_positions[j][1]-100, 0)
                search_range = (ima[t:self._template_positions[j][0]+100+self._template_sizes[j][0],
                                l:self._template_positions[j][1]+250+self._template_sizes[j][0]])
                res = cv2.matchTemplate(search_range, self._templates[j], cv2.TM_CCOEFF)
                min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    
                top_left = max_loc
                if warn and max_val < 3e7:
                    bottom_right = (top_left[0] + self._templates[j].shape[1], top_left[1] + self._templates[j].shape[0])
                    print(j, top_left, max_val)
                    search_range = search_range.copy()
                    cv2.rectangle(search_range, top_left, bottom_right, 0, 2)
                    plt.imshow(search_range)
                    plt.show()
                markers[j] = np.array((t + top_left[1] + self._template_points[j][0], l + top_left[0] + self._template_points[j][1]))

        # Guess the ends of the first three lines (can be outside the bounding box of the image)
        for i in range(3):
            m = markers[5 * i + 3] * 2 - markers[5 * i + 2]
            markers[5 * i + 4] = m

        # Guess the end of the fourth line (can be outside the bounding box of the image)
        markers[16] = ((markers[14] * (284 + 260) - markers[9] * 260) / 284).astype(int)

        if plot:
            ima = ima.copy()
            for m in markers:
                if m is not None:
                    cv2.rectangle(ima, (m[1]-40, m[0]-40), (m[1]+40, m[0]+40), (255, 0, 0), 2)
            # plt.figure(figsize=(12, 8))
            plt.imshow(ima)
            plt.title(title)
            plt.show()

        return markers

    # def baseline(self, i):
    #     """y coordinate of ith baseline in type 1 images"""
    #     if i not in [0, 1, 2, 3]:
    #         raise ValueError("i must be in [0, 1, 2, 3]")
    #     return self._absolute_points[5 * i][0]
        
    @staticmethod
    def lead_info(lead):
        """Specify which markers mark the begin and the end of a lead."""
        begin, end = {
            'I': (0, 1),
            'II-subset': (5, 6),
            'III': (10, 11),
            'aVR': (1, 2),
            'aVL': (6, 7),
            'aVF': (11, 12),
            'V1': (2, 3),
            'V2': (7, 8),
            'V3': (12, 13),
            'V4': (3, 4),
            'V5': (8, 9),
            'V6': (13, 14),
            'II': (15, 16),
        }[lead]
        return begin // 5, begin, end

    def demo(self, ima, warn=False, title=''):
        """Plot the image with red markers"""
        markers = self.find_markers(ima, warn, plot=True, title=title)

mf = MarkerFinder(show_templates=False)

ima = cv2.imread('/kaggle/input/physionet-ecg-image-digitization/train/1026034238/1026034238-0011.png') # correct
mf.demo(ima, warn=False, title='Scanned ECG with 17 line endpoints')

# Converting the images

We define the function `convert_scanned_color()`, which converts an image to twelve time series. This function fulfills the main task of the competition, but it works only for images of types 3 and 11. It does not yet generalize to images with a black grid in the background or to mobile photos.

The algorithm sweeps the image from top to bottom. The first black pixels detected during the sweep define the first line. The sweep continues over the white pixels below the line, and the next black pixels define the second line. An so on for the third and fourth line.

After we have the four lines, we use the markers found by `MarkerFinder` to select the segments which form the 12 leads.

In [None]:
def find_line_by_topdown_sweep(ima):
    """Find the topmost black line in an image and remove it.

    Parameters:
    ima: 2d boolean image array (False = black, True = white), will be updated

    Return values:
    top: topmost black pixel in every column of the matrix
    bottom: topmost white pixel in every column of the matrix below the topmost black pixel
    """
    # Find the topmost black pixels
    top = np.argmin(ima, axis=0) # topmost black (False) pixel per column; 0 if there are no black pixels

    # Paint black everything above
    mask = np.tile(np.arange(len(ima)).reshape(-1, 1), reps=(1, ima.shape[1]))
    mask = mask >= top # True for lower part of image
    ima &= mask # paint black whatever is above the line

    # Find the topmost white pixels
    bottom = np.argmax(ima, axis=0) # topmost white (True) pixel per column; 0 if there are no white pixels

    # Paint white everything above
    bottomx = np.maximum(bottom, np.median(top) + 100) # overpaints the letters
    mask = np.tile(np.arange(len(ima)).reshape(-1, 1), reps=(1, ima.shape[1]))
    mask = mask < bottomx # True for upper part of image
    ima |= mask # paint white whatever is above the line
    ima[:,:-1] |= mask[:,1:]
    ima[:,1:] |= mask[:,:-1]

    return top, bottom

def get_lead_from_top_bottom(tops, bottoms, lead, number_of_rows, markers):
    """Extract and resample one lead from an ECG line.
    
    Parameters:
    tops: list of 4 arrays of shape (image_width, )
    bottoms: list of 4 arrays of shape (image_width, )
    lead: one of the 12 lead labels (string)
    number_of_rows: number of samples required (int)
    markers: 17 markers as list of size-2 integer arrays (row, column)
    """

    # Select the markers and determine the baseline
    line, begin, end = mf.lead_info(lead)
    top = tops[line]
    bottom = bottoms[line]
    begin, end = markers[begin], markers[end]
    baseline = np.linspace(begin[0], end[0], end[1] - begin[1])

    pred0 = (top[begin[1]:end[1]] + bottom[begin[1]:end[1]]) / 2
    if len(pred0) < len(baseline):
        print(f"smaller: {len(pred0)} < {len(baseline)}")
    baseline = baseline[:len(pred0)] # in case end is outside the image
    pred = baseline - pred0

    # Scale
    pred /= 80 # 80 pixels = 1 mV

    # Fix pixels obscured by the markers
    if lead in ['aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']:
        # first four values can be obscured by the marker
        pred[:4][pred[:4] > 0.2] = pred[4]
    if lead in ['I', 'II-subset', 'III', 'aVR', 'aVL', 'aVF', 'V1', 'V2', 'V3']:
        # last five pixels can be obscured by the marker
        pred[-5:][pred[-5:] > 0.2] = pred[-6]
    if lead in ['I', 'II-subset', 'III', 'II']:
        # first pixel can be obscured by the marker
        if 0.9 < pred[0] and pred[0] < 1.1 and pred[1] < 0.5:
            pred[0] = pred[1]

    # Upsample
    pred = np.interp(np.linspace(0, 1, number_of_rows),
                     np.linspace(0, 1, len(pred)),
                     pred)
    
    # Fix implausible predictions
    pred = np.where(np.abs(pred) <= 0.9, pred, 0)
        
    return pred

def convert_scanned_color(ima, markers, n_timesteps, verbose=False):
    """Convert a scanned color image (type 3 or 11) to 12 leads.

    The function first extracts the four lines from the image. As
    the four lines have nonnegligible width, we construct two lists:
    - tops = y coordinates of the topmost black pixels in the lines
    - bottoms = y coordinates of the topmost white pixels below the lines
    Either list is a list of 4 arrays of shape (image_width, )

    Parameters:
    ima: 3-channel BGR image with height 1652 and width ≈2200.
    markers: 17 markers as list of size-2 integer arrays (row, column)
    n_timesteps: number of samples required per lead (dict)

    Returns:
    preds: dict with 12 time series
    """
    # Crop the image and convert to black and white
    # We use only the red channel (channel 2) so that the red gridlines disappear
    # False = black, True = white
    # The text at the top of the image is discarded.
    crop_top = 400
    ima = ima[crop_top:, :, 2] > 160

    # Denoise single and double black pixels
    iima = ima.astype(np.uint8)
    ima = (iima[:-2, :-2] + iima[:-2, 1:-1] + iima[:-2, 2:]
           + iima[1:-1, :-2] + iima[1:-1, 1:-1] + iima[1:-1, 2:]
           + iima[2:, :-2] + iima[2:, 1:-1] + iima[2:, 2:]) >= 7

    # Plot the denoised black-and-white image
    if verbose:
        plt.figure(figsize=(6, 4))
        plt.imshow(ima)
        plt.title('Denoised black-and-white')
        # plt.savefig('ima.png')
        plt.show()
    
    # Find the four lines
    tops, bottoms = [], []
    for i in range(4):
        top, bottom = find_line_by_topdown_sweep(ima)
        tops.append(top)
        bottoms.append(bottom)

    # Transform to global coordinates
    tops = [t + crop_top for t in tops]
    bottoms = [b + crop_top for b in bottoms]

    # Extract the twelve leads from the four lines
    # (as the first part of II is duplicated, we extract it twice
    # and take the average)
    n_timesteps['II-subset'] = n_timesteps['I']
    preds = {}
    for i, lead in enumerate(LEADS + ['II-subset']):
        pred = get_lead_from_top_bottom(tops, bottoms, lead, n_timesteps[lead], markers)
        preds[lead] = pred

    preds['II'][:len(preds['II-subset'])] = (preds['II'][:len(preds['II-subset'])] + preds['II-subset']) / 2
    del preds['II-subset']

    # === ★ ===
    for lead, sig in preds.items():
        sig = np.asarray(sig)
        n = len(sig)
        if lead not in n_timesteps:
            continue
        replace_len = int(n_timesteps[lead] / 125)

        if replace_len > 0 and replace_len * 2 < n:
            # 始端部分を置換
            sig[:replace_len] = sig[replace_len]
            # 終端部分を置換
            sig[-replace_len:] = sig[-replace_len - 1]

        preds[lead] = sig
    # === ★ ===

    # Apply Einthoven's law
    apply_einthoven(preds)

    return preds

def apply_einthoven(preds):
    """Apply Einthoven's law to improve the predictions.
    
    Parameters:
    pred: dict of time series, will be updated
    """
    residual = preds['I'] + preds['III'] - preds['II'][:len(preds['III'])]
    correction = residual / 3
    preds['I'] -= correction
    preds['III'] -= correction
    preds['II'][:len(preds['III'])] += correction
    
    residual = preds['aVR'] + preds['aVL'] + preds['aVF']
    correction = residual / 3
    preds['aVR'] -= correction
    preds['aVL'] -= correction
    preds['aVF'] -= correction


# Validation

We convert a few training images, plot the output and compute the signal-to-noise ratio. If you look at the diagrams closely, you'll easily get ideas for improvements.

Validation is important: If you only get feedback from five submissions per day, your progress will be much too slow.

In [None]:
def validate_algorithm(train, image_types, convert):
    """Convert a few training images, plot the output and compute the signal-to-noise ratio"""
    snr_list = []
    index_list = []
    is_first_ecg = True
    for idx, row in train.iterrows():
        # print(idx, row.id)
        labels = pd.read_csv(f'/kaggle/input/physionet-ecg-image-digitization/train/{row.id}/{row.id}.csv')
        png_paths = sorted(glob(f'/kaggle/input/physionet-ecg-image-digitization/train/{row.id}/{row.id}-*.png'))
        for path in png_paths:
            img_type = int(path[-8:-4])
            # ima = cv2.imread(path)
            # shape = ima.shape
            # assert (img_type == 1) <= (shape == (1700, 2200, 3)) # 200 pixels per inch on Letter paper
            # assert (img_type == 3) <= (shape[0] == 1652)
            # assert (img_type == 4) <= (shape[0] == 1652)
            # assert (img_type == 5) <= (len(shape) == 3 and shape[2] == 3)
            # assert (img_type == 6) <= ((shape == (4000, 3000, 3) or (shape == (3000, 4000, 3))))
            # assert (img_type == 9) <= ((shape == (3024, 4032, 3) or (shape == (4032, 3024, 3))))
            # assert (img_type == 10) <= ((shape == (3024, 4032, 3) or (shape == (4032, 3024, 3))))
            # assert (img_type == 11) <= (shape[0] == 1652)
            # assert (img_type == 12) <= (shape[0] == 1652)
            if img_type in image_types:
                ima = cv2.imread(path)

                # Find the 17 line endpoints
                markers = mf.find_markers(ima, plot=is_first_ecg, title='Image with 17 markers')

                # Convert the image to 12 leads
                n_timesteps = {lead: (~ labels[lead].isna()).sum() for lead in LEADS}
                preds = convert(ima, markers, n_timesteps, verbose=is_first_ecg)
                
                # Evaluate the signal-to-noise ratio, plot y_true vs. y_pred
                if is_first_ecg:
                    _, axs = plt.subplots(6, 2, figsize=(12, 18))
                sum_signal = 0
                sum_noise = 0
                for i, lead in enumerate(LEADS):
                    label = labels[lead]
                    label = label[~ label.isna()]
                    pred = preds[lead]
            
                    aligned_pred = align_signals(label, pred, int(row.fs * MAX_TIME_SHIFT))
                    p_signal, p_noise = compute_power(label, aligned_pred)
                    sum_signal += p_signal
                    sum_noise += p_noise
    
                    if is_first_ecg:
                        ax = axs.T.ravel()[i]
                        ax.set_title(lead)
                        ax.plot(label.values, label='y_true')
                        ax.plot(pred, label='y_pred')
                        ax.set_xlabel('timestep')
                        ax.set_ylabel('mV')
                        ax.legend()
                if is_first_ecg:
                    plt.tight_layout()
                    plt.suptitle('y_true vs. y_pred', y=1.01)
                    plt.show()
                snr = compute_snr(sum_signal, sum_noise)
                print(f"{idx=:4d} id={row.id:10d} {img_type=:2d} SNR: {snr:5.2f}")
                snr_list.append(snr)
                index_list.append([idx, img_type])
    
                if is_first_ecg:
                    print('\n')
            else:
                snr_list.append(1)
                index_list.append([idx, img_type])
        is_first_ecg = False
    
    snr = np.array(snr_list).mean()
    val_score = max(float(10 * np.log10(snr)), -PERFECT_SCORE)
    print(f"# Average SNR: {snr:.2f} {val_score=:.2f}")
    snr_df = pd.DataFrame(index_list, columns=['idx', 'type'])
    snr_df['snr'] = snr_list
    snr_df.to_csv('~snr.csv', index=False)

validate_algorithm(train.iloc[100:110], image_types=[3, 11], convert=convert_scanned_color)


# Convert the test images

We convert all test images of img_types 3 and 11 (the images which were scanned in color). For all other images (i.e., grayscale images and mobile photos), we submit the average training label.

In [None]:
def is_color_image(ima):
    """ Test if a 3-channel image has colors."""
    return ima.std(axis=2).mean() != 0

In [None]:
submission_data = []
old_id = None
leads = None
for idx, row in test.iterrows():
    if row.id != old_id:
        path = f"/kaggle/input/physionet-ecg-image-digitization/test/{row.id}.png"
        # path = '/kaggle/input/physionet-ecg-image-digitization/train/1006427285/1006427285-0004.png'
        # path = '/kaggle/input/physionet-ecg-image-digitization/train/1006427285/1006427285-0011.png'
        ima = cv2.imread(path)
        shape = ima.shape
        good_shape = shape[0] == 1652 # scanned images have 1652 rows
        
        if good_shape and is_color_image(ima): # the image has red gridlines -> interpret the image
            # Find the 17 line endpoints
            markers = mf.find_markers(ima)

            # Convert the image to 12 time series
            n_timesteps = {lead: row.fs * 10 if lead == 'II' else row.fs * 10 // 4 for lead in LEADS}
            preds = convert_scanned_color(ima, markers, n_timesteps, verbose=False)

        else: # we cannot interpret the image -> predict the mean
            preds = None
            
        old_id = row.id

    if row.lead == 'II':
        assert row.number_of_rows == row.fs * 10
    else:
        assert row.number_of_rows == row.fs * 10 // 4

    if preds is not None:
        pred = preds[row.lead]
    else:
        pred = mean_dict[row.lead].mean(axis=0)
        pred = np.interp(np.linspace(0, 1, row.number_of_rows),
                         np.linspace(0, 1, len(pred)),
                         pred)
    assert len(pred) == row.number_of_rows

    for timestep in range(row.number_of_rows):
        signal_id = f"{row.id}_{timestep}_{row.lead}"
        submission_data.append({
            'id': signal_id,
            'value': pred[timestep]
        })

submission_df = pd.DataFrame(submission_data)
print(f"Length: {len(submission_df)}")
submission_df.to_csv('submission.csv', index=False)
!head submission.csv