# ECG Image Dataset 
Here I load and read the data, then label each folder with its respective name:


*   **MI**
*   **HMI**
*   **AbnHB**
*   **Normal**

## First Step Segmentation
Moreover, I create a function to Crope the ECG Report and leave just the ECG Signal.

Then I extract the 12leads from the ECG Signal.

## Second Step GLCM 

* feature extraction with GLCM _Energy_

## Third Step  Hexaxial Mapping 

* Not done yet..


#### Ayoub Berdeddouch

# Loading Libraries

In [1]:
import os
import os.path
import glob
from pathlib import Path

import pandas as pd                                     
import numpy as np                                      
import matplotlib.pyplot as plt
import seaborn as sns

import cv2            
from PIL import Image
import datetime as dt
import tensorflow as tf                                 

# Scikit-learn
from sklearn.model_selection import train_test_split    
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, classification_report, recall_score, precision_score, f1_score, roc_auc_score, roc_curve

#Tensorflow _ Keras
from tensorflow.keras.utils import plot_model
from tensorflow.keras.initializers import RandomUniform
from tensorflow.keras import datasets, layers, models, losses, Model, optimizers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization, GlobalAveragePooling2D, SpatialDropout2D
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop

## VGG Architectures 
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.mobilenet import MobileNet

# Model Visualizer
#to  install it run 
#!pip install visualkeras
#import visualkeras

# Loading Data 

## First Method

In [None]:
!wget "https://md-datasets-cache-zipfiles-prod.s3.eu-west-1.amazonaws.com/gwbz3fsgp8-2.zip"

In [None]:
!unzip gwbz3fsgp8-2.zip
# done

## Loading & Reading Images of **MI** (=Myocardial Infarction) Patiens

In [4]:
# MI patients data
ECG_MI_dir_ = Path('/content/ECG Images of Myocardial Infarction Patients (240x12=2880)')

ECG_MI_filepaths = list(ECG_MI_dir_.glob(r'**/*.jpg'))
# Mapping the labels
MI_labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], ECG_MI_filepaths))
# Paths & labels femalee eyes
ECG_MI_filepaths = pd.Series(ECG_MI_filepaths, name = 'File').astype(str)
MI_labels = pd.Series(MI_labels, name='Label')

# Concatenating...
MI_df = pd.concat([ECG_MI_filepaths, MI_labels], axis=1)
MI_df['Label'] = "MI"

In [None]:
# Image Example of MI
figure = plt.figure(figsize=(20,10))
x = plt.imread(MI_df["File"][0])
plt.imshow(x)
plt.xlabel(x.shape)
plt.title(MI_df["Label"][10])

## Loading & Reading Images of **HMI** (= History of Myocardial Infarction) Patiens

In [6]:
# HMI patients data
ECG_HMI_dir_ = Path('/content/ECG Images of Patient that have History of MI (172x12=2064)')

ECG_HMI_filepaths = list(ECG_HMI_dir_.glob(r'**/*.jpg'))
# Mapping the labels
HMI_labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], ECG_HMI_filepaths))
# Paths & labels femalee eyes
ECG_HMI_filepaths = pd.Series(ECG_HMI_filepaths, name = 'File').astype(str)
HMI_labels = pd.Series(HMI_labels, name='Label')

# Concatenating...
HMI_df = pd.concat([ECG_HMI_filepaths, HMI_labels], axis=1)
HMI_df['Label'] = "HMI"

In [None]:
# Image Example of MI
figure = plt.figure(figsize=(20,10))
x = plt.imread(HMI_df["File"][0])
plt.imshow(x)
plt.xlabel(x.shape)
plt.title(HMI_df["Label"][10])

## Loading & Reading Images of **AbnHB** (= Abnormal Heartbeat) Patiens

In [8]:
# AbnHB patients data
ECG_AbnHB_dir_ = Path('/content/ECG Images of Patient that have abnormal heartbeat (233x12=2796)')

ECG_AbnHB_filepaths = list(ECG_HMI_dir_.glob(r'**/*.jpg'))
# Mapping the labels
AbnHB_labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], ECG_AbnHB_filepaths))
# Paths & labels femalee eyes
ECG_AbnHB_filepaths = pd.Series(ECG_AbnHB_filepaths, name = 'File').astype(str)
AbnHB_labels = pd.Series(AbnHB_labels, name='Label')

# Concatenating...
AbnHB_df = pd.concat([ECG_AbnHB_filepaths, AbnHB_labels], axis=1)
AbnHB_df['Label'] = "ABNORMAL"

In [None]:
# Image Example of MI
figure = plt.figure(figsize=(20,10))
x = plt.imread(AbnHB_df["File"][0])
plt.imshow(x)
plt.xlabel(x.shape)
plt.title(AbnHB_df["Label"][10])

## Loading & Reading Images of **NORMAL** (= NORMAL Heartbeat) Patiens

In [10]:
# Normal patients data
ECG_Normal_dir_ = Path('/content/Normal Person ECG Images (284x12=3408)')

ECG_Normal_filepaths = list(ECG_Normal_dir_.glob(r'**/*.jpg'))
# Mapping the labels
Normal_labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], ECG_Normal_filepaths))
# Paths & labels femalee eyes
ECG_Normal_filepaths = pd.Series(ECG_Normal_filepaths, name = 'File').astype(str)
Normal_labels = pd.Series(Normal_labels, name='Label')

# Concatenating...
Normal_df = pd.concat([ECG_Normal_filepaths, Normal_labels], axis=1)
Normal_df['Label'] = "NORMAL"

In [None]:
# Image Example of MI
figure = plt.figure(figsize=(20,10))
x = plt.imread(Normal_df["File"][0])
plt.imshow(x)
plt.xlabel(x.shape)
plt.title(Normal_df["Label"][10])

### Then We concatenate our Dataframes.

In [None]:
# concat
df_ECG = pd.concat([MI_df, HMI_df, AbnHB_df,Normal_df],ignore_index=True)

## Cropping the ECG images
**img_crop** function below:

In [None]:
#from PIL import Image
#import matplotlib.pyplot as plt

def im_crop(image,left=71.5, top= 287.5, right=2102, bottom= 1228):
  """ This function is used to crop the image and get just the ECG signals.
      input: 
        image : the image (jpg,png,...Etc)
        left: location in left of image
        top: location in top of image
        right: location in right of image
        bottom: location in bottom of image

        ######
        # choices from paper: left=71.5, top= 287.5, right=2102, bottom= 1228
        ######
      output: 
        img_out: the cropped image.
  """
  img = Image.open(image) # for example : MI_df["File"][0]
  img_out = img.crop((left, top, right, bottom))
  plt.figure(figsize=(20,10))
  plt.subplot(121)
  plt.imshow(img)
  plt.title("Original")

  plt.subplot(122)
  plt.imshow(img_out)
  plt.title("Cropped MI_df[\"File\"][0] ")
  plt.show()

  return img_out


# Test 
img_cc = im_crop(MI_df["File"][0])

## Now below to Crop the 12 leads from the ECG Signal Image Cropped

In [13]:
#from PIL import Image
#import matplotlib.pyplot as plt

def im_crop_12leads(image, x, y,title, width= 315, height= 315):
  """ This function is used to crop the image and get 12 leads of  the ECG signals.
      input: 
        image : the image cropped of ECG Signal
        left: location in left of image
        top: location in top of image
        right: location in right of image
        bottom: location in bottom of image

        ######
        # choices from paper: left=71.5, top= 287.5, right=2102, bottom= 1228
        ######
      output: 
        img_out: the cropped image.
  """
  img_out = image.crop((x, y, width + x , y+ height)).convert('L') # Converting Images to Grayscale
  print(type(img_out))

  plt.figure(figsize=(20,10))
  plt.subplot(121)
  plt.imshow(image)
  plt.title("Original")

  plt.subplot(122)
  plt.imshow(img_out)
  plt.title(title)
  plt.show()

  return img_out


In [None]:
I_img = im_crop_12leads(img_cc,120.5,0.5,"I Lead")

In [None]:
II_img  = im_crop_12leads(img_cc,120.5,315.5,'II Lead') 

In [None]:
III_img = im_crop_12leads(img_cc,120.5,630.5,'III Lead');

In [None]:
V1_img = im_crop_12leads(img_cc,1133.5,0.5,'V1 Lead')

In [None]:
V2_img = im_crop_12leads(img_cc,1133.5,315.5,'V2 Lead');

In [None]:
V3_img  = im_crop_12leads(img_cc,1133.5,630.5,'V3 Lead');

In [None]:
V4_img = im_crop_12leads(img_cc,1639.5,0.5,'V4 Lead')

In [None]:
V5_img = im_crop_12leads(img_cc,1639.5,315.5,'V5 Lead');

In [None]:
V6_img  = im_crop_12leads(img_cc,1639.5,630.5,'V6 Lead');

In [None]:
aVL_img = im_crop_12leads(img_cc,672.5,315.5,'aVL Lead')

In [None]:
aVR_img = im_crop_12leads(img_cc,672.5,0.5,'aVR Lead')

In [None]:
aVF_img = im_crop_12leads(img_cc,672.5,630.5,'aVF Lead');

## Below the Function to get All the 12 Leads at once.

In [None]:
#from PIL import Image
#import matplotlib.pyplot as plt

def im_crop_12leads(image, width= 315, height= 315):
  """ This function is used to crop the image and get 12 leads of  the ECG signals.
      input: 
        image : the image cropped of ECG Signal
        width = 315
        height = 315

        ######
        # choices from paper: left=71.5, top= 287.5, right=2102, bottom= 1228
        ######
      output: 
        12 img_out: 12 leads ECG
  """

  
  I_img   = image.crop((120.5, 0.5, width + 120.5 , 0.5 + height)).convert('L') # Converting Images to Grayscale 
  aVR_img = image.crop((672.5, 0.5, width + 672.5 , 0.5 + height)).convert('L')
  V1_img  = image.crop(((1133.5, 0.5, width + (1133.5 , 0.5+ height)).convert('L')
  V4_img  = image.crop((1639.5, 0.5, width + 1639.5 , 0.5 + height)).convert('L')
  II_img  = image.crop((120.5, 315.5, width + 120.5 , 315.5+ height)).convert('L')
  aVL_img = image.crop((672.5, 315.5, width + 672.5 , 315.5+ height)).convert('L')
  V2_img  = image.crop((1133.5, 315.5, width + 1133.5 , 315.5+ height)).convert('L')
  V5_img  = image.crop((1639.5, 0.5, width + 1639.5 , 0.5+ height)).convert('L')
  III_img = image.crop((120.5, 630.5, width + 120.5 , 630.5+ height)).convert('L')
  aVF_img = image.crop((672.5, 630.5, width + 672.5 , 630.5+ height)).convert('L')
  V3_img  = image.crop((1133.5, 630.5, width + 1133.5 , 630.5+ height)).convert('L')
  V6_img  = image.crop((1639.5, 630.5, width + 1639.5 , 630.5+ height)).convert('L')
    
    
  plt.figure(figsize=(20,10))
  plt.subplot(411)
  plt.imshow(image)
  plt.title("Original")

  plt.subplot(412)
  plt.imshow(I_img)
  plt.title("I Lead")
  
  plt.subplot(413)
  plt.imshow(II_img)
  plt.title("II Lead")

  plt.subplot(414)
  plt.imshow(III_img)
  plt.title("III Lead")

  plt.subplot(421)
  plt.imshow(V1_img)
  plt.title("V1 Lead")

  plt.subplot(422)
  plt.imshow(V2_img)
  plt.title("V2 Lead")

  plt.subplot(423)
  plt.imshow(V3_img)
  plt.title("V3 Lead")

  plt.subplot(424)
  plt.imshow(V4_img)
  plt.title("V4 Lead")

  plt.subplot(431)
  plt.imshow(V5_img)
  plt.title("V5 Lead")

  plt.subplot(432)
  plt.imshow(V6_img)
  plt.title("V6 Lead")

  plt.subplot(433)
  plt.imshow(aVR_img)
  plt.title("aVR Lead")

  plt.subplot(434)
  plt.imshow(aVL_img)
  plt.title("aVL Lead")

  plt.subplot(441)
  plt.imshow(aVF_img)
  plt.title("aVF Lead")

  plt.show()

  return I_img,II_img,III_img,V1_img,V2_img,V3_img,V4_img,V5_img,V6_img, aVR_img,aVL_img, aVF_img


# GLCM 

## Calculate properties of **G**ray-**l**evel **C**o-occurrence **M**atrix

* Extracting Energy Features.

In [30]:
def fast_glcm(img, vmin=0, vmax=255, nbit=8, kernel_size=5):
    mi, ma = vmin, vmax
    ks = kernel_size
    h,w = img.shape

    # digitize
    bins = np.linspace(mi, ma+1, nbit+1)
    gl1 = np.digitize(img, bins) - 1
    gl2 = np.append(gl1[:,1:], gl1[:,-1:], axis=1)

    # make glcm
    glcm = np.zeros((nbit, nbit, h, w), dtype=np.uint8)
    for i in range(nbit):
        for j in range(nbit):
            mask = ((gl1==i) & (gl2==j))
            glcm[i,j, mask] = 1

    kernel = np.ones((ks, ks), dtype=np.uint8)
    for i in range(nbit):
        for j in range(nbit):
            glcm[i,j] = cv2.filter2D(glcm[i,j], -1, kernel)

    glcm = glcm.astype(np.float32)
    return glcm


def fast_glcm_mean(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm mean
    '''
    h,w = img.shape
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    mean = np.zeros((h,w), dtype=np.float32)
    for i in range(nbit):
        for j in range(nbit):
            mean += glcm[i,j] * i / (nbit)**2

    return mean


def fast_glcm_std(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm std
    '''
    h,w = img.shape
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    mean = np.zeros((h,w), dtype=np.float32)
    for i in range(nbit):
        for j in range(nbit):
            mean += glcm[i,j] * i / (nbit)**2

    std2 = np.zeros((h,w), dtype=np.float32)
    for i in range(nbit):
        for j in range(nbit):
            std2 += (glcm[i,j] * i - mean)**2

    std = np.sqrt(std2)
    return std


def fast_glcm_contrast(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm contrast
    '''
    h,w = img.shape
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    cont = np.zeros((h,w), dtype=np.float32)
    for i in range(nbit):
        for j in range(nbit):
            cont += glcm[i,j] * (i-j)**2

    return cont


def fast_glcm_dissimilarity(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm dissimilarity
    '''
    h,w = img.shape
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    diss = np.zeros((h,w), dtype=np.float32)
    for i in range(nbit):
        for j in range(nbit):
            diss += glcm[i,j] * np.abs(i-j)

    return diss


def fast_glcm_homogeneity(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm homogeneity
    '''
    h,w = img.shape
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    homo = np.zeros((h,w), dtype=np.float32)
    for i in range(nbit):
        for j in range(nbit):
            homo += glcm[i,j] / (1.+(i-j)**2)

    return homo


def fast_glcm_ASM(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm asm, energy
    '''
    h,w = img.shape
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    asm = np.zeros((h,w), dtype=np.float32)
    for i in range(nbit):
        for j in range(nbit):
            asm  += glcm[i,j]**2

    ene = np.sqrt(asm)
    return asm, ene


def fast_glcm_max(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm max
    '''
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    max_  = np.max(glcm, axis=(0,1))
    return max_


def fast_glcm_entropy(img, vmin=0, vmax=255, nbit=8, ks=5):
    '''
    calc glcm entropy
    '''
    glcm = fast_glcm(img, vmin, vmax, nbit, ks)
    pnorm = glcm / np.sum(glcm, axis=(0,1)) + 1./ks**2
    ent  = np.sum(-pnorm * np.log(pnorm), axis=(0,1))
    return ent


In [None]:
# Conveting PIL.Image.Image to np Array.
# reference : https://app.pluralsight.com/guides/importing-image-data-into-numpy-arrays
img = np.asarray(I_img)
print(type(img))
plt.imshow(img)
img.shape

In [None]:
mean = fast_glcm_mean(img)
std = fast_glcm_std(img)
cont = fast_glcm_contrast(img)
diss = fast_glcm_dissimilarity(img)
homo = fast_glcm_homogeneity(img)
asm, ene = fast_glcm_ASM(img)
ma = fast_glcm_max(img)
ent = fast_glcm_entropy(img)

plt.figure(figsize=(10,4.5))
fs = 15
plt.subplot(2,5,1)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(img)
plt.title('I_lead original', fontsize=fs)

plt.subplot(2,5,2)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(mean)
plt.title('I_lead mean', fontsize=fs)

plt.subplot(2,5,3)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(std)
plt.title('I_lead std', fontsize=fs)

plt.subplot(2,5,4)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(cont)
plt.title('I_lead contrast', fontsize=fs)

plt.subplot(2,5,5)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(diss)
plt.title('I_lead dissimilarity', fontsize=fs)

plt.subplot(2,5,6)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(homo)
plt.title('I_lead homogeneity', fontsize=fs)

plt.subplot(2,5,7)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(asm)
plt.title('I_lead ASM', fontsize=fs)

plt.subplot(2,5,8)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(ene)
plt.title('I_lead energy', fontsize=fs)

plt.subplot(2,5,9)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(ma)
plt.title('I_lead max', fontsize=fs)

plt.subplot(2,5,10)
plt.tick_params(labelbottom=False, labelleft=False)
plt.imshow(ent)
plt.title('I_lead entropy', fontsize=fs)

plt.tight_layout(pad=0.8)
plt.savefig('I_lead_output.jpg')
plt.show()

# Hexaxial Mapping 