In [2]:
import os

import pandas as pd
import torchvision.io
from sklearn.model_selection import train_test_split

os.listdir()

['Mexico_data_preprocessing.ipynb',
 'preprocessing.ipynb',
 'RSNA_data_preprocessing.ipynb',
 'test_set_rsna.ipynb']

In [3]:
os.chdir("../")
main_dir = os.getcwd()
main_dir

'D:\\David Mexico\\Bone age estimation\\bone_age_estimation'

In [7]:
age_df = pd.read_csv("data/Mexico_private_dataset/mexico_preprocessed_dataset.csv")
age_df.head()

Unnamed: 0,id,age,gender,boneage,radiologist,year_entry,path,exists
0,1,192,0,204,JSA,2019,data\Mexico_private_dataset\preprocessed\1.png,True
1,2,173,0,192,JSA,2019,data\Mexico_private_dataset\preprocessed\2.png,True
2,3,48,0,36,JSA,2019,data\Mexico_private_dataset\preprocessed\3.png,True
3,4,48,1,60,JSA,2019,data\Mexico_private_dataset\preprocessed\4.png,True
4,5,163,0,162,JSA,2019,data\Mexico_private_dataset\preprocessed\5.png,True


In [None]:
# Map id to path
age_df['path'] = age_df['id'].map(lambda x: os.path.join(main_dir,
                                                         'data',
                                                         'rsna-bone-age',
                                                         'training',
                                                         'preprocessed',
                                                         '{}.png'.format(x)))

In [8]:
# Checking if all the images exist
age_df['exists'] = age_df['path'].map(os.path.exists)
print(age_df['exists'].sum(), 'images found of', age_df.shape[0], 'total')
# Drop row if exist column is false does not exist
age_df = age_df[age_df['exists']]

341 images found of 341 total


In [None]:
# Convert boolean male values to gender 0(male) and 1(female)
age_df['gender'] = age_df['male'].map(lambda x: 0 if x else 1)

In [4]:
age_df = pd.read_csv("data/mexico_private_dataset/mexico_complete_dataset.csv")

In [10]:
boneage_mean = age_df['boneage'].mean()
boneage_div = 2 * age_df['boneage'].std()
# we don't want normalization for now
boneage_mean = 0
boneage_div = 1.0
age_df['boneage_zscore'] = age_df['boneage'].map(lambda x: (x - boneage_mean) / boneage_div)
age_df.dropna(inplace=True)

# Creating bins for the boneage
age_df['boneage_category'] = pd.cut(age_df['boneage'], 10)
len(age_df)

448

In [6]:
# Splitting the data into train and validation based on the boneage_category
age_df['boneage_category'] = pd.cut(age_df['boneage'], 10)
train_df, valid_df = train_test_split(age_df,
                                          test_size=0.10,
                                          random_state=2018,
                                          stratify=age_df['boneage_category'])
# Saving the dataframe with boneage, path, id and gender information
train_df = train_df[['id', 'boneage', 'path', 'gender']]
validation_df = valid_df[['id', 'boneage', 'path', 'gender']]

In [7]:
# save dataframe not index
train_df.to_csv('data/Mexico_private_dataset/train_preprocessed.csv', index=False)
validation_df.to_csv('data/Mexico_private_dataset/valid_preprocessed.csv', index=False)

In [8]:
len(train_df), len(validation_df)

(403, 45)

In [9]:
len(train_df) + len(validation_df)

448

In [None]:
import glob
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [None]:
train_df = pd.read_csv('data/rsna-bone-age/training/train_df.csv')

In [None]:
train_50 = train_df.head(50)
train_3 = train_df.head(3)
train_10 = train_df.head(10)

In [None]:
#plot the images using the path column in the train_50 dataframe and plot 4 image per row
row, columns = 10, 5
fig, ax = plt.subplots(10, 5, figsize=(20, 30))
for i, idx in enumerate(train_50.index):
    path = train_50['path'].loc[idx]
    ax[i // columns, i % columns].imshow(plt.imread(path), cmap='gray')
    ax[i // columns, i % columns].axis('off')
    ax[i // columns, i % columns].set_title('Age:{}'.format(train_50['boneage'].loc[idx]))
plt.show()


In [None]:
import torchvision.transforms as transforms
import albumentations as A
import cv2
from albumentations.pytorch import ToTensorV2

# albumentation transformation

transform = A.Compose([
    A.Resize(1024, 1024),
    A.CLAHE(),
    ToTensorV2()
])

In [None]:
for i, idx in enumerate(train_50.index):
    path = train_50['path'].loc[idx]
    # read image and add transformation
    img = cv2.imread(path)
    transformed = transform(image=img)['image']
    # plot both img and transformed side by side
    fig, ax = plt.subplots(1, 2, figsize=(20, 20))
    ax[0].imshow(img, cmap='gray')
    ax[0].axis('off')
    ax[0].set_title('Original Image')
    ax[1].imshow(transformed.permute(1, 2, 0), cmap='gray')
    ax[1].axis('off')
    ax[1].set_title('Transformed Image')
    plt.show()


In [None]:
# clip the scan based on the first and last non-zero pixels
def clip_image(img, plot=False):
    # sum across the rows and columns
    row_sum = np.sum(img, axis=1)
    col_sum = np.sum(img, axis=0)
    # find the first and last non-zero values
    row_first, col_first = np.argmax(row_sum > 0), np.argmax(col_sum > 0)
    row_last, col_last = len(row_sum) - np.argmax(row_sum[::-1] > 0), len(col_sum) - np.argmax(col_sum[::-1] > 0)
    # clip the image
    img = img[row_first:row_last, col_first:col_last]
    if plot:
        plt.imshow(img, cmap='gray')
        plt.axis('off')
        plt.show()
    return img

In [None]:
# plot the image before and after clipping
for i, idx in enumerate(train_10.index):
    path = train_10['path'].loc[idx]
    # read image and add transformation
    img = cv2.imread(path, 0)
    clipped = clip_image(img, plot=False)
    # plot both img and transformed side by side
    fig, ax = plt.subplots(1, 2, figsize=(20, 20))
    ax[0].imshow(img, cmap='gray')
    ax[0].axis('off')
    ax[0].set_title('Original Image')
    ax[1].imshow(clipped, cmap='gray')
    ax[1].axis('off')
    ax[1].set_title('Clipped Image')
    plt.title('ID:{}'.format(train_10['id'].loc[idx]))
    plt.show()

In [None]:
# histogram equalization and clipping
def equalize_image(img, plot=False):
    # histogram equalization
    img = cv2.equalizeHist(img)
    if plot:
        plt.imshow(img, cmap='gray')
        plt.axis('off')
        plt.show()
    return img

# plot the intensity histogram and image side by side
def plot_intensity_hist(img):
    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    ax[0].hist(img.ravel(), bins=256)
    ax[0].set_title('Intensity Histogram')
    ax[0].set_xlabel('Intensity')
    ax[0].set_ylabel('Count')
    ax[1].imshow(img, cmap='gray')
    ax[1].axis('off')
    ax[1].set_title('Image')
    plt.show()

In [None]:
train = train_3
for i, idx in enumerate(train.index):
    path = train['path'].loc[idx]
    # read image and add transformation
    img = cv2.imread(path, 0)
    plot_intensity_hist(img)

# Preprocessed

In [None]:
import glob
import numpy as np
import pandas as pd

In [None]:
age_df = pd.read_csv("data/rsna-bone-age/training/train.csv")
age_df.head()

In [None]:
preprocessed_id = glob.glob("data/rsna-bone-age/training/preprocessed/*.png")

In [None]:
preprocessed_id = [int(os.path.basename(i).split(".")[0]) for i in preprocessed_id]

In [None]:
len(preprocessed_id)

In [None]:
preprocessed_df = age_df.loc[age_df['id'].isin(preprocessed_id)]

In [None]:
len(preprocessed_df)

# Brightness of the image

In [None]:
import glob
from PIL import Image
import numpy as np
from skimage import exposure

In [None]:
images = glob.glob("data/rsna-bone-age/training/preprocessed/2014.*")

In [None]:
i = 0
for image_path in images:
    i += 1
    image = Image.open(image_path)
    clahe = exposure.equalize_adapthist(np.array(image.convert("L")))

    clahe_image = (clahe * 255).astype(np.uint8)
    clahe_image = Image.fromarray(clahe_image)

    image.save(os.path.join("data/rsna-bone-age/training/preprocessed_clahe/",os.path.basename(image_path)))
    clahe_image.save(os.path.join("data/rsna-bone-age/training/preprocessed_clahe/", "__" + os.path.basename(image_path)))
    if i == 100:
        break