Heart Images: Categorizing the Images

In [1]:
import os
import cv2  
import pandas as pd

In [2]:
# Define the paths to your image folders
healthy_folder_training = "Resources/heart_images/train/train_healthy"
healthy_folder_testing = "Resources/heart_images/test/test_healthy"
unhealthy_folder_training = "Resources/heart_images/train/train_unhealthy"
unhealthy_folder_testing = "Resources/heart_images/test/test_unhealthy"

# Function to read images from a folder and categorize them
def categorize_images(folder_path, category):
    image_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):  # Assuming images are jpg or png
            image_path = os.path.join(folder_path, filename)
            # Read image using OpenCV 
            image = cv2.imread(image_path)  # For OpenCV
            # Append image data along with category to the list
            image_data.append({"Image_Path": image_path, "Category": category})
    return image_data

# Categorize healthy images
healthy_images_train = categorize_images(healthy_folder_training, "Healthy")
healthy_images_test = categorize_images(healthy_folder_testing, "Healthy")

# Categorize unhealthy images
unhealthy_images_train = categorize_images(unhealthy_folder_training, "Unhealthy")
unhealthy_images_test = categorize_images(unhealthy_folder_testing, "Unhealthy")

In [3]:
# Combine the test images
all_images_test = healthy_images_test + unhealthy_images_test

# Combine the training images
all_images_train = healthy_images_test + unhealthy_images_train

In [4]:
# Create a DataFrame from the test images combined list
df_test = pd.DataFrame(all_images_test)

In [5]:
# Create a DataFrame from the training images combined list
df_train = pd.DataFrame(all_images_train)

In [6]:
# Save test DataFrame to Excel file
excel_file_path_1 = "test_data.xlsx"
df_test.to_excel(excel_file_path_1, index=False)

print("Excel file saved successfully.")

Excel file saved successfully.


In [7]:
# Save training DataFrame to Excel file
excel_file_path_2 = "training_data.xlsx"
df_train.to_excel(excel_file_path_2, index=False)

print("Excel file saved successfully.")

Excel file saved successfully.


Heart Images: Machine Learning Section 

In [8]:
# Dependencies
import numpy as np 
import pandas as pd
from glob import glob 
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [9]:
# Load the VGG19 model
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [10]:
# Path for train and test datasets
train_path = 'Resources/heart_images/train'
test_path = 'Resources/heart_images/test'
IMAGE_SIZE = [224, 224] #Default image size for VGG16
folders = glob('Resources/heart_images/train/*') #Get number of classes

# ImageDataGenerator can help perform augumentation on existing images. This way, we get more diverse train set.
train_datagen = ImageDataGenerator(rescale = 1./255, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)

#Through flow_from_directory - we create an array of images that can be used for training. 
training_set = train_datagen.flow_from_directory(
'Resources/heart_images/train',
                                                 target_size = (224, 224),
                                                 batch_size =464,
                                                 class_mode = 'categorical')

test_set = test_datagen.flow_from_directory('Resources/heart_images/test',
                                            target_size = (224, 224),
                                            batch_size = 464,
                                            class_mode = 'categorical')

Found 464 images belonging to 2 classes.
Found 464 images belonging to 2 classes.


In [11]:
# Use this line for VGG19 network. Create a VGG19 model, and removing the last layer that is classifying 1000 images. This will be replaced with images classes we have. 
vgg = VGG19(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)
# This sets the base that the layers are not trainable. If we'd want to train the layers with custom data, these two lines can be ommitted. 
for layer in vgg.layers:
  layer.trainable = False
x = Flatten()(vgg.output) #Output obtained on vgg16 is now flattened. 
prediction = Dense(len(folders), activation='softmax')(x) # We have 5 classes, and so, the prediction is being done on len(folders) - 5 classes
#Creating model object 
model = Model(inputs=vgg.input, outputs=prediction)
model.summary()

In [12]:
# Define an ImageDataGenerator for data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

In [13]:
# Flow training images in batches using the generator 
training_generator = train_datagen.flow_from_directory(
    'Resources/heart_images/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 464 images belonging to 2 classes.


In [15]:
#resize images
import cv2
# Define the path to the folder containing images
folder_path = 'Resources/heart_images/train'

# Resize input images to match model's expected input size
resized_images = []
for filename in os.listdir(folder_path):
    image_path = os.path.join(folder_path, filename)
    image = cv2.imread(image_path)
    if image is not None:
        resized_image = cv2.resize(image, (224, 224))
        resized_images.append(resized_image)

In [16]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
# Train the model using fit
history = model.fit(
    training_generator,
    epochs=10,
    steps_per_epoch=len(training_generator),
    verbose=1
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 7s/step - accuracy: 0.5359 - loss: 1.7952
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(value)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 6s/step - accuracy: 0.6633 - loss: 1.0142
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 901us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 6s/step - accuracy: 0.6991 - loss: 0.5731
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 649us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 6s/step - accuracy: 0.7626 - loss: 0.4795
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 6s/step - accuracy: 0.7908 - loss: 0.4711
Epoch 10/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 906us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00


In [None]:
# Evaluate the model on test data
loss, accuracy = model.evaluate(test_set)

# Print the loss and accuracy
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')

In [None]:
#Save the model
model.save('HeartImageClassification.h5')