In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import math
import random
import matplotlib.pyplot as plt
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)


import os
i=0
for dirname, _, filenames in os.walk('/kaggle/input'):
    i = i+1
    if i> 5:
        break
    for filename in filenames:
        print(os.path.join(dirname, filename))


Table of content:

1. Read and check train.csv
2. Read and display example signal
3. Display example images 
4. Preprocess images (Remove background, image rotation ... TBC)

In [None]:
# Paths to data
TRAIN_DIR = "/kaggle/input/physionet-ecg-image-digitization/train/"
TRAIN_META = "/kaggle/input/physionet-ecg-image-digitization/train.csv"

# Read and check train.csv

In [None]:
#Read train.csv
train_meta = pd.read_csv(TRAIN_META)
print("Train metadata shape:", train_meta.shape)
train_meta.head()


In [None]:
train_meta.info() #non-nulls, integer columns

# Read and display example signal

In [None]:
sample_id = train_meta['id'].iloc[0]
signal_path = os.path.join(TRAIN_DIR, str(sample_id), f"{sample_id}.csv")
signal = pd.read_csv(signal_path)
print("Signal shape:", signal.shape)
signal.head()


In [None]:
plt.figure(figsize=(12,6))
for lead in ['I','II','III']:
    plt.plot(signal[lead].values, label=lead)
plt.title(f"ECG Leads for {sample_id}")
plt.xlabel("Sample index")
plt.ylabel("Amplitude (mV)")
plt.legend()
plt.show()


# Display example images

folder_files_dict - it is a dictionary object
fields:
file name
file path
image

In [None]:
# function for displayng images from patient's folder

def display_images(folder_files_list):
    image_files = [f for f in folder_files_list if f.endswith(('.png', '.jpg', '.jpeg'))]
    image_files.sort()


    cols = 3
    rows = math.ceil(len(image_files) / cols)
    plt.figure(figsize=(15, 5 * rows))

    for i, filename in enumerate(image_files): 
        #img_path = os.path.join(folder_path, filename)
        f_name = os.path.basename(filename)
        img = cv2.imread(filename)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.subplot(rows, cols, i + 1)
        plt.imshow(img_rgb)
        plt.title(f_name)
        plt.axis('off')

    plt.tight_layout()
    plt.show()


In [None]:
#Random folder display -> helps detect image problems.
folder_list = os.listdir(TRAIN_DIR)
rand_folder = random.choice(folder_list)
folder_path = os.path.join(TRAIN_DIR, rand_folder)
list_of_files = [] #<------ List of files currently being processed to test image processing function performance.
for element in os.listdir(folder_path):
    if element.endswith(('.png', '.jpg', '.jpeg')):
        list_of_files.append(os.path.join(folder_path, element))
display_images(list_of_files)


**problems**: 
* extra bordes/backgroung
* rotatet images
* stains
* noise
* images taken from an angle

# Preprocess images

In [None]:
#help function for 2 images comparision
def compare(org_img, changed_img, change_name):
    
    plt.figure(figsize=(10, 5))


    plt.subplot(1, 2, 1)
    if len(org_img.shape) == 2 or org_img.shape[2] == 1: #Check if gray
        plt.imshow(org_img,cmap='gray')
    else:
        img_rgb = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
        plt.imshow(img_rgb)
    plt.title("Original")
    plt.axis("off")


    plt.subplot(1, 2, 2)
    if len(changed_img.shape) == 2 or changed_img.shape[2] == 1:
            plt.imshow(changed_img,cmap='gray')
    else:
        img_rgb = cv2.cvtColor(changed_img, cv2.COLOR_BGR2RGB)
        plt.imshow(img_rgb)
    plt.title(change_name)
    plt.axis("off")

    plt.tight_layout()
    plt.show()

### Remove background - cropping function

In [None]:
def cropping(image_path):

    img = cv2.imread(image_path)
    original = img.copy()

    
    alpha = 1.5
    beta = 20
    img_con = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
    # sometimes image need to be: brighten or darken to remove background
    possible_images = [img,
                       img_con,
                       cv2.convertScaleAbs(img, alpha=1.0, beta=-70),
                       cv2.convertScaleAbs(img, alpha=1.5, beta=50),
                       cv2.convertScaleAbs(img_con, alpha=1.0, beta=-70),
                       cv2.convertScaleAbs(img_con, alpha=1.5, beta=50)
                       ]
    for image in possible_images:
        

        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        blur = cv2.GaussianBlur(gray, (5,5), 0)
        edges = cv2.Canny(blur, 50, 150)


        kernel = np.ones((5,5), np.uint8)
        edges = cv2.dilate(edges, kernel, iterations=2)
        edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)


        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if not contours:
            print(" X Nie znaleziono konturÃ³w. X")
            return image


        largest = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest)


        cv2.rectangle(original, (x, y), (x+w, y+h), (0, 255, 0), 3)


        ekg_cropped = img[y:y+h, x:x+w]

        if image.shape[:2] == ekg_cropped.shape[:2]:
            pass
        else:
            return ekg_cropped

    print("Cropping image - failed!")
    return original

In [None]:
#get random image from folder
rand_image_path = random.choice(list_of_files)
original = cv2.imread(rand_image_path)
print(rand_image_path)

Checking how preprocessing function works on pictures with background

In [None]:
cropperd_image = cropping(rand_image_path)

In [None]:
compare(original,cropperd_image, 'Cropped')

### Fix skewed images - 

In [None]:
def order_points(pts):
    pts = pts.reshape(4, 2)
    rect = np.zeros((4, 2), dtype="float32")
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

In [None]:
def stright(image_path):
    image = cv2.imread(image_path)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blur, 50, 150)

    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)

    doc_contour = None
    for c in contours:
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
        if len(approx) == 4:
            doc_contour = approx
            break

    rect = order_points(doc_contour)
    (tl, tr, br, bl) = rect

    widthA = np.linalg.norm(br - bl)
    widthB = np.linalg.norm(tr - tl)
    heightA = np.linalg.norm(tr - br)
    heightB = np.linalg.norm(tl - bl)
    maxWidth = int(max(widthA, widthB))
    maxHeight = int(max(heightA, heightB))

    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")

    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))


    gray_warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray_warped, 50, 150)
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 100, minLineLength=40, maxLineGap=10)

    angles = []
    if lines is not None:
        for x1, y1, x2, y2 in lines[:, 0]:
            angle = math.degrees(math.atan2(y2 - y1, x2 - x1))
            if -60 < angle < 60:
                angles.append(angle)

    if len(angles) > 0:
        mean_angle = np.median(angles)
    else:
        mean_angle = 0


    (h, w) = warped.shape[:2]
    center = (w // 2, h // 2)
    M_rot = cv2.getRotationMatrix2D(center, mean_angle, 1.0)
    rotated = cv2.warpAffine(warped, M_rot, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
    
    return rotated


In [None]:
striht_image = stright(rand_image_path)

In [None]:
compare(original,striht_image, 'Strightened')

### Remove grid

In [None]:
def remove_grid(image_path):
    
    img = cv2.imread(image_path)


    if len(img.shape) == 2 or img.shape[2] == 1:
        mode = "grayscale"
    else:
        mode = "color"

    if mode == "color":
        img_float = img.astype(np.float32) / 255.0
        B, G, R = cv2.split(img_float)
        sum_rgb = R + G + B + 1e-8
        r_norm = R / sum_rgb
        grid_mask = (r_norm > 0.94).astype(np.uint8) * 255
        mask_inv = cv2.bitwise_not(grid_mask)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        result = cv2.bitwise_and(gray, gray, mask=mask_inv)
        _, binary = cv2.threshold(result, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    else:
        gray = img if len(img.shape) == 2 else cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray_norm = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
        _, binary = cv2.threshold(gray_norm, 180, 255, cv2.THRESH_BINARY_INV)
        binary = cv2.bitwise_not(binary)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    binary_clean = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1)

    return binary_clean

In [None]:
no_grid = remove_grid(rand_image_path)

In [None]:
compare(original,no_grid, 'Grid removed')

In [None]:
no_greed_good_quality_image = remove_grid('/kaggle/input/physionet-ecg-image-digitization/train/10140238/10140238-0001.png')

In [None]:
compare(original,no_greed_good_quality_image, 'Grid removed')