### Download Mura Data 
***

In [None]:
mura_link = 'Enter Your Link Here'

#### Upgrade Libraries if not upgraded in colab
***

In [None]:
# !pip install --upgrade scikit-learn
# !pip install --upgrade wget

### Import libraries required for download
***

In [None]:
import wget
import os
import zipfile
import pandas as pd
import cv2
from cv2 import imread, imshow
from skimage.transform import rescale, resize

In [None]:
if os.path.exists('./mura_data'):
    print("It exists no need to download")
else:
    wget.download(mura_link,'./mura_zip')
    with zipfile.ZipFile('mura_zip', 'r') as zip_ref:
        zip_ref.extractall('./mura_data')

### Validate paths and create the augmented pandas files
***

In [None]:
ROOT_FOLDER = './mura_data/MURA-v1.1'

In [None]:
BODY_PARTS = {
'XR_ELBOW' : 0,
'XR_FINGER' : 1,
'XR_FOREARM': 2,
'XR_HAND' : 3,
'XR_HUMERUS': 4,
'XR_SHOULDER': 5,
'XR_WRIST': 6
}

In [None]:
def transform_pandas_file(mode):
    _data = pd.read_csv(ROOT_FOLDER + f'/{mode}_image_paths.csv',header=None)
    _data.columns = ['img_path']
    def extract_label(x):
        return  x.split('_')[-1].split('/')[0]
  
    def check_existence(x):
        if os.path.exists(ROOT_FOLDER[:-9] + x):
            return 'Exist'
        else:
            return 'Miss'

    def extract_part(x):
        return BODY_PARTS[x.split('/')[2]]
  
    _data['label'] = _data['img_path'].apply(lambda x: extract_label(x))
    _data['valid'] = _data['img_path'].apply(lambda x: check_existence(x))
    _data['part'] = _data['img_path'].apply(lambda x: extract_part(x))
    _data.to_csv(ROOT_FOLDER + f'/{mode}_image_paths_aug.csv', index=False)
    print("Saved file at: " + ROOT_FOLDER + f'/{mode}_image_paths_aug.csv')
    return


transform_pandas_file('train')
transform_pandas_file('valid')

In [None]:
## Confirm that all exist so there is no need to validate them later
new_train_df = pd.read_csv(ROOT_FOLDER + '/train_image_paths_aug.csv')
assert (new_train_df['valid'] == 'Exist').all()

new_valid_df = pd.read_csv(ROOT_FOLDER + '/valid_image_paths_aug.csv')
assert (new_valid_df['valid'] == 'Exist').all()

### Convert all images to Grayscale
***

In [None]:
# GRAY_ROOT_FOLDER = './mura_data/MURA-v1.1-Gray'

# def makedir(path):
#     try:
#         os.mkdir(path)
#     except:
#         return

# def open_and_convert(src, name, dest):
#     _x = imread(src + name)
#     _x = cv2.cvtColor(_x, cv2.COLOR_BGR2GRAY)
#     _f = dest+name
#     makedir('/'.join(_f.split('/')[:-2]))
#     makedir('/'.join(_f.split('/')[:-1]))
#     cv2.imwrite(_f,  _x)
#     return
    
# train_df = pd.read_csv(ROOT_FOLDER + '/train_image_paths_aug.csv')
# val_df   = pd.read_csv(ROOT_FOLDER + '/valid_image_paths_aug.csv')

# makedir(GRAY_ROOT_FOLDER)
# makedir(GRAY_ROOT_FOLDER+'/train')
# GRAY_ROOT_PREFIX = GRAY_ROOT_FOLDER+'/train'
# makedir(GRAY_ROOT_PREFIX+'/XR_ELBOW')
# makedir(GRAY_ROOT_PREFIX+'/XR_FINGER')
# makedir(GRAY_ROOT_PREFIX+'/XR_FOREARM')
# makedir(GRAY_ROOT_PREFIX+'/XR_HAND')
# makedir(GRAY_ROOT_PREFIX+'/XR_HUMERUS')
# makedir(GRAY_ROOT_PREFIX+'/XR_SHOULDER')
# makedir(GRAY_ROOT_PREFIX+'/XR_WRIST')

# makedir(GRAY_ROOT_FOLDER+'/valid')
# GRAY_ROOT_PREFIX = GRAY_ROOT_FOLDER+'/valid'
# makedir(GRAY_ROOT_PREFIX+'/XR_ELBOW')
# makedir(GRAY_ROOT_PREFIX+'/XR_FINGER')
# makedir(GRAY_ROOT_PREFIX+'/XR_FOREARM')
# makedir(GRAY_ROOT_PREFIX+'/XR_HAND')
# makedir(GRAY_ROOT_PREFIX+'/XR_HUMERUS')
# makedir(GRAY_ROOT_PREFIX+'/XR_SHOULDER')
# makedir(GRAY_ROOT_PREFIX+'/XR_WRIST')

# train_image_paths = train_df['img_path'].values
# train_labels = train_df['label'].values

# val_image_paths = val_df['img_path'].values
# val_labels = val_df['label'].values

# src = ROOT_FOLDER + '/'
# dest = GRAY_ROOT_FOLDER + '/'
# for index, img_path in enumerate(train_image_paths):
#     name = img_path[10:]
#     open_and_convert(src,name,dest)
#     if index % 1000 == 0 and index != 0:
#         print(f"{index} train images converted to grayscale...")

# for index, img_path in enumerate(val_image_paths):
#     name = img_path[10:]
#     open_and_convert(src,name,dest)
#     if index % 1000 == 0 and index != 0:
#         print(f"{index} val images converted to grayscale...")

### Delete the old images and rename the gray folder to the old one
***

In [None]:
# ### Move the files to the Gray Folder
# !mv './mura_data/MURA-v1.1/*.csv' './mura_data/MURA-v1.1-Gray/'

In [None]:
# !rm -rf './mura_data/MURA-v1.1'
# !mv './mura_data/MURA-v1.1-Gray' './mura_data/MURA-v1.1'

### Zoom in to discard edge information and resize to 224 by 224
***

In [None]:
def resize_and_center_crop(name, dim=(224,224)):
    _x = imread(name)
    _x = cv2.resize(src=_x, dsize=(300,300), interpolation=cv2.INTER_LINEAR)
    width, height = _x.shape[1], _x.shape[0]
    crop_width = dim[0] if dim[0] < _x.shape[1] else _x.shape[1]
    crop_height = dim[1] if dim[1] < _x.shape[0] else _x.shape[0] 
    mid_x, mid_y = int(width/2), int(height/2)
    cw2, ch2     = int(crop_width/2), int(crop_height/2) 
    crop_img = _x[mid_y-ch2:mid_y+ch2, mid_x-cw2:mid_x+cw2]
    cv2.imwrite(name,  crop_img)
    return

train_df = pd.read_csv(ROOT_FOLDER + '/train_image_paths_aug.csv')
val_df   = pd.read_csv(ROOT_FOLDER + '/valid_image_paths_aug.csv')

train_image_paths = train_df['img_path'].values
val_image_paths = val_df['img_path'].values



for index, img_path in enumerate(train_image_paths):
    name = ROOT_FOLDER + '/' + img_path[10:]
    resize_and_center_crop(name)
    if index % 1000 == 0 and index != 0:
        print(f"{index} train images cropped...")

for index, img_path in enumerate(val_image_paths):
    name = ROOT_FOLDER + '/' + img_path[10:]
    resize_and_center_crop(name)
    if index % 1000 == 0 and index != 0:
        print(f"{index} val images cropped...")