# IMAGE PREPROCESSING (IMAGE TRANSFORMATIONS AND FEATURE SELECTION)
Transformations:
1. Resize Image Sizes to 300x300
2. Convert Image to Grayscale (1 channel)
3. Normalize the Image using normalization type: NORM_MINMAX
4. Apply CLAHE to enhance contrast
5. Gaussian Blur to remove noise

Feature Selection:
1. Use Canny Edge Detection to preserve edges (use median of image as threshold)


In [1]:
# IMPORT LIBRARIES
import os
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## APPLY IMAGE TRANSFORMATIONS AND FEATURE SELECTION TO THE IMAGES

In [2]:
# SET PATHS (original images)
path_to_train_frac = "./CF_copy/train/fracture/"
path_to_train_normal = "./CF_copy/train/normal/"
path_to_val_frac = "./CF_copy/val/fracture/"
path_to_val_normal = "./CF_copy/val/normal/"

# CREATE NEW FOLDERS (if not already created)
os.makedirs("./CF_processed/train/fracture/", exist_ok=True)
os.makedirs("./CF_processed/train/normal/", exist_ok=True)
os.makedirs("./CF_processed/val/fracture/", exist_ok=True)
os.makedirs("./CF_processed/val/normal/", exist_ok=True)

# SET PATHS (preprocessed images)
path_to_train_frac_preprocessed = "./CF_processed/train/fracture/"
path_to_train_normal_preprocessed = "./CF_processed/train/normal/"
path_to_val_frac_preprocessed = "./CF_processed/val/fracture/"
path_to_val_normal_preprocessed = "./CF_processed/val/normal/"


In [3]:
def img_preprocessing(img_path, size):
    # Read the image
    img = cv.imread(img_path)

    # Resize the image
    res_img = cv.resize(img, size)

    # Convert the image to grayscale
    gray = cv.cvtColor(res_img, cv.COLOR_BGR2GRAY)

    # Normalize the image
    normalizedImg = cv.normalize(gray,  None, 0, 255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_8UC1)

    # Opening
    kernel = np.ones((1,1),np.uint8)
    opening = cv.morphologyEx(normalizedImg, cv.MORPH_OPEN, kernel)

    # Apply CLAHE
    clahe = cv.createCLAHE(clipLimit=5.0, tileGridSize=(10,10))
    cl1 = clahe.apply(opening)

    # Apply Gaussian Blur
    blur = cv.GaussianBlur(cl1,(9,9),0)

    # Apply Canny Edge Detection
    cannyImg = cv.Canny(blur, np.percentile(blur, 50), np.percentile(blur, 50))

    return cannyImg

### Process the Training Images

In [4]:
# Create a dataset of preprocessed images
df_train = pd.DataFrame(columns=['image_data', 'label'])

In [5]:
# Preprocess images (Train - Fractured)
for filename in os.listdir(path_to_train_frac):
    img_path = os.path.join(path_to_train_frac, filename)
    img = img_preprocessing(img_path, (500, 500))
    # Save the image
    cv.imwrite(os.path.join(path_to_train_frac_preprocessed, filename), img)
    # Append the image to the dataframe (using concat)
    df_train = pd.concat([df_train, pd.DataFrame({'image_data': [img], 'label': [1]})], ignore_index=True)

In [6]:
df_train

Unnamed: 0,image_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
1895,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1896,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1897,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1898,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1


In [7]:
# Preprocess images (Train - Normal)
for filename in os.listdir(path_to_train_normal):
    img_path = os.path.join(path_to_train_normal, filename)
    img = img_preprocessing(img_path, (500, 500))
    # Save the image
    cv.imwrite(os.path.join(path_to_train_normal_preprocessed, filename), img)
    # Append the image to the dataframe (using concat)
    df_train = pd.concat([df_train, pd.DataFrame({'image_data': [img], 'label': [0]})], ignore_index=True)

In [8]:
df_train

Unnamed: 0,image_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
3795,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
3796,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
3797,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
3798,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0


In [9]:
# Create a directory that will contain the pickled dataframe
os.makedirs("./pickled_data", exist_ok=True)

In [10]:
# Save the dataframe (pickle)
df_train.to_pickle("./pickled_data/df_train.pkl")

### Process the Test Images

In [11]:
# Create a dataset of preprocessed images
df_val = pd.DataFrame(columns=['image_data', 'label'])

In [12]:
# Preprocess images (Test - Fractured)
for filename in os.listdir(path_to_val_frac):
    img_path = os.path.join(path_to_val_frac, filename)
    img = img_preprocessing(img_path, (500, 500))
    # Save the image
    cv.imwrite(os.path.join(path_to_val_frac_preprocessed, filename), img)
    # Append the image to the dataframe (using concat)
    df_val = pd.concat([df_val, pd.DataFrame({'image_data': [img], 'label': [1]})], ignore_index=True)

In [13]:
df_val

Unnamed: 0,image_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
195,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
196,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
197,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
198,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1


In [14]:
# Preprocess images (Test - Normal)
for filename in os.listdir(path_to_val_normal):
    img_path = os.path.join(path_to_val_normal, filename)
    img = img_preprocessing(img_path, (500, 500))
    # Save the image
    cv.imwrite(os.path.join(path_to_val_normal_preprocessed, filename), img)
    # Append the image to the dataframe (using concat)
    df_val = pd.concat([df_val, pd.DataFrame({'image_data': [img], 'label': [0]})], ignore_index=True)

In [15]:
df_val

Unnamed: 0,image_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
395,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
396,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
397,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
398,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0


In [16]:
# Save the dataframe (pickle)
df_val.to_pickle("./pickled_data/df_val.pkl")