In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import re
from skimage.filters import threshold_otsu, gaussian
from skimage import measure
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from skimage.io import imread
from skimage import color
from skimage.transform import resize
import matplotlib.pyplot as plt

# Define the base output directory
base_output_dir = '/content/drive/MyDrive/MAJOR_PROJECT/preprocessed_data/'

# Function to extract only signal from images
def extract_signal_leads(Leads, folder_name, parent, output_dir):
    for x, y in enumerate(Leads):
        fig1, ax1 = plt.subplots()
        #set fig size
        #fig1.set_size_inches(20, 20)

        #converting to gray scale
        grayscale = color.rgb2gray(y)
        #smoothing image
        blurred_image = gaussian(grayscale,sigma=0.7)
        #thresholding to distinguish foreground and background
        #using otsu thresholding for getting threshold value
        global_thresh = threshold_otsu(blurred_image)

        #creating binary image based on threshold
        binary_global = blurred_image < global_thresh

        #resize image
        if x!=12:
            binary_global = resize(binary_global, (300, 450))

        ax1.imshow(binary_global,cmap="gray")
        ax1.axis('off')
        ax1.set_title("pre-processed Leads {} image".format(x+1))
        plt.close('all')
        plt.ioff()
        #save the image
        fig1.savefig(os.path.join(output_dir, 'images', folder_name, 'Lead_{x}_preprocessed_Signal.png'.format(x=x+1)))

        fig7, ax7 = plt.subplots()
        plt.gca().invert_yaxis()

        #find contour and get only the necessary signal contour
        contours = measure.find_contours(binary_global,0.8)
        contours_shape = sorted([x.shape for x in contours])[::-1][0:1]
        for contour in contours:
            if contour.shape in contours_shape:
                test = resize(contour, (255, 2))
                ax7.plot(test[:, 1], test[:, 0],linewidth=1,color='black')
        ax7.axis('image')
        ax7.set_title("Contour {} image".format(x+1))
        plt.close('all')
        plt.ioff()
        #save the image
        fig7.savefig(os.path.join(output_dir, 'images', folder_name, 'Lead_{x}_Contour_Signal.png'.format(x=x+1)))
        lead_no = x
        scale_csv_1D(test, lead_no, folder_name, output_dir)


def Convert_Image_Lead(image_file, parent_folder, output_dir):
    image = imread(os.path.join(parent_folder, image_file), plugin='matplotlib')
    #dividing the ECG leads from 1-13 from the above image
    Lead_1 = image[300:600, 150:643]
    Lead_2 = image[300:600, 646:1135]
    Lead_3 = image[300:600, 1140:1626]
    Lead_4 = image[300:600, 1630:2125]
    Lead_5 = image[600:900, 150:643]
    Lead_6 = image[600:900, 646:1135]
    Lead_7 = image[600:900, 1140:1626]
    Lead_8 = image[600:900, 1630:2125]
    Lead_9 = image[900:1200, 150:643]
    Lead_10 = image[900:1200, 646:1135]
    Lead_11 = image[900:1200, 1140:1626]
    Lead_12 = image[900:1200, 1630:2125]
    Lead_13 = image[1250:1480, 150:2125]

    #list of leads
    Leads=[Lead_1,Lead_2,Lead_3,Lead_4,Lead_5,Lead_6,Lead_7,Lead_8,Lead_9,Lead_10,Lead_11,Lead_12,Lead_13]

    #folder_name to store lead_images
    folder_name= re.sub('.jpg', '',image_file)

    #loop through leads and create seperate images
    for x, y in enumerate(Leads):
        fig, ax = plt.subplots()
        ax.imshow(y)
        ax.axis('off')
        ax.set_title("Leads {0}".format(x+1))

        # Ensure the output directory for images exists
        image_output_dir = os.path.join(output_dir, 'images', folder_name)
        if not os.path.exists(image_output_dir):
            os.makedirs(image_output_dir)

        # Save the image
        plt.close('all')
        plt.ioff()
        fig.savefig(os.path.join(image_output_dir, 'Lead_{x}_Signal.png'.format(x=x+1)))

    extract_signal_leads(Leads, folder_name, parent_folder, output_dir)


def convert_csv(test, lead_no, folder_name, output_dir):
    target = folder_name[0:2]
    df = pd.DataFrame(test, columns=['X', 'Y'])
    df['Target'] = target
    fig5, ax5 = plt.subplots()
    # Convert to CSV
    df.to_csv(os.path.join(output_dir, 'csv', folder_name, '{lead_no}.csv'.format(lead_no=lead_no+1)), index=False)


#Scaling csv data using MinMaxScaler
def scale_csv(test, lead_no, folder_name, output_dir):
    target = folder_name[0:2]
    scaler = MinMaxScaler()
    fit_transform_data = scaler.fit_transform(test)
    Normalized_Scaled = pd.DataFrame(fit_transform_data, columns=['X', 'Y'])
    Normalized_Scaled = Normalized_Scaled.T
    Normalized_Scaled['Target'] = target
    if os.path.isfile(os.path.join(output_dir, 'csv', 'Scaled_{lead_no}.csv'.format(lead_no=lead_no+1))):
        Normalized_Scaled.to_csv(os.path.join(output_dir, 'csv', 'Scaled_{lead_no}.csv'.format(lead_no=lead_no+1)), mode='a', header=False, index=False)
    else:
        Normalized_Scaled.to_csv(os.path.join(output_dir, 'csv', 'Scaled_{lead_no}.csv'.format(lead_no=lead_no+1, folder_name=folder_name)), index=False)


def scale_csv_1D(test, lead_no, folder_name, output_dir):
    target = folder_name[0:2]
    scaler = MinMaxScaler()
    fit_transform_data = scaler.fit_transform(test)
    Normalized_Scaled = pd.DataFrame(fit_transform_data[:, 0], columns=['X'])
    fig6, ax6 = plt.subplots()
    ax6.plot(Normalized_Scaled, linewidth=1, color='black', linestyle='solid')
    fig6.savefig(os.path.join(output_dir, 'images', folder_name, 'ID_Lead_{lead_no}_Signal.png'.format(lead_no=lead_no+1)))
    Normalized_Scaled = Normalized_Scaled.T
    Normalized_Scaled['Target'] = target
    if os.path.isfile(os.path.join(output_dir, 'csv', 'scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1))):
        Normalized_Scaled.to_csv(os.path.join(output_dir, 'csv', 'scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1)), mode='a', header=False, index=False)
    else:
        Normalized_Scaled.to_csv(os.path.join(output_dir, 'csv', 'scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1, folder_name=folder_name)), index=False)


normal_parent_dir = '/content/drive/MyDrive/MAJOR_PROJECT/ECG_IMAGES_DATASET/Normal Person ECG Images (284x12=3408)'
abnormal_parent_dir = '/content/drive/MyDrive/MAJOR_PROJECT/ECG_IMAGES_DATASET/ECG Images of Patient that have abnormal heartbeat (233x12=2796)'
MI_parent_dir = '/content/drive/MyDrive/MAJOR_PROJECT/ECG_IMAGES_DATASET/ECG Images of Myocardial Infarction Patients (240x12=2880)'
MI_history_parent_dir = '/content/drive/MyDrive/MAJOR_PROJECT/ECG_IMAGES_DATASET/ECG Images of Patient that have History of MI (172x12=2064)'

# Types of heart disease
Types_ECG = {'Abnormal_hear_beat': abnormal_parent_dir,
             'MI': MI_parent_dir,
             'History_MI': MI_history_parent_dir,
             'Normal': normal_parent_dir}

# Loop through folder/files and create separate images of different leads
for types, folder in Types_ECG.items():
    output_dir = os.path.join(base_output_dir, types)
    if not os.path.exists(output_dir):
        os.makedirs(os.path.join(output_dir, 'images'))
        os.makedirs(os.path.join(output_dir, 'csv'))

    for files in os.listdir(folder):
        if files.endswith(".jpg"):
            Convert_Image_Lead(files, folder, output_dir)
