In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from my_dataset import *
from ultralytics import YOLO
import cv2
%load_ext autoreload
%autoreload 2

### Preprocessing For MURA Dataset

In [2]:
train_mura = pd.read_csv('../MURA-v1.1/train_image_paths.csv', header=None, names=['paths'])
test_mura = pd.read_csv('../MURA-v1.1/valid_image_paths.csv',header=None, names=['paths'])
train_mura_labels = pd.read_csv('../MURA-v1.1/train_labeled_studies.csv',header=None, names=['labels'])
test_mura_labels = pd.read_csv('../MURA-v1.1/valid_labeled_studies.csv',header=None, names=['labels'])
train_mura['indices'] = train_mura.paths.str[:-10]
test_mura['indices'] = test_mura.paths.str[:-10]
# full MURA with all parts
# can select parts by filtering indices
train_data = pd.merge(train_mura, train_mura_labels, left_on='indices', right_index=True)
test_data = pd.merge(test_mura, test_mura_labels, left_on='indices', right_index=True)

In [7]:
# construct base dataset: hand and finger only
# hand_finger = train_data[train_data['indices'].str.contains('HAND') | train_data['indices'].str.contains('FINGER')].reset_index()
# hand_finger_test = test_data[test_data['indices'].str.contains('HAND') | test_data['indices'].str.contains('FINGER')].reset_index()
se = train_data[train_data['indices'].str.contains('SHOULDER') | train_data['indices'].str.contains('ELBOW')].reset_index()
se_test = test_data[test_data['indices'].str.contains('SHOULDER') | test_data['indices'].str.contains('ELBOW')].reset_index()


In [8]:
resize = 256
trans = transforms.Compose([transforms.Resize((resize, resize)),
                            transforms.Lambda(lambda img: img.convert('RGB')),
                            transforms.ToTensor()])
classes = {'shoulder':0, 'elbow':1}
# classes = {'hand':0, 'finger':1}
# base_dataset = ImageSet(hand_finger.paths, hand_finger.labels, trans, classes, root_adj='../')
# base_testset = ImageSet(hand_finger_test.paths, hand_finger_test.labels, trans, classes, root_adj='../')
base_dataset = ImageSet(se.paths, se.labels, trans, classes, root_adj='../')
base_testset = ImageSet(se_test.paths, se_test.labels, trans, classes, root_adj='../')

In [9]:
batch_size = 1
base_trainloader = DataLoader(base_dataset, batch_size=batch_size, shuffle=True)
base_testloader = DataLoader(base_testset, batch_size=batch_size, shuffle=True)

In [11]:
for idx, x in enumerate(base_trainloader):
    image, label = x
    image = image.detach().numpy()[0].transpose(1,2,0)
    # image = image.astype(np.uint8)
    image = (image * 255).astype(np.uint8)
    if label[0][1] == 0.5:
        label = label.detach().numpy().astype(str).tolist()[0]
        with open(f'../shoulder_elbow_datasets/labels/train/image{idx+1}.txt', 'w') as f:
            label = ' '.join(label)
            f.write(label)
    cv2.imwrite(f'../shoulder_elbow_datasets/images/train/image{idx+1}.jpg', image)




### Preprocessing for Bone_Fracture

In [None]:
def get_paths(root_f, root_nf):
    paths = [root_f+ x for x in os.listdir(root_f)]
    paths += [root_nf + x for x in os.listdir(root_nf)]
    paths = [x for x in paths if 'copy' not in x.lower()] # remove copies try this tmr
    return paths

In [2]:
root = '../Bone_Fracture_Binary_Classification/'
train_paths = get_paths(root+'train/fractured/', root+'train/not fractured/')
train_labels = [0 if 'not' in i else 1 for i in train_paths]
test_paths = get_paths(root+'test/fractured/', root+'test/not fractured/')
test_labels = [0 if 'not' in i else 1 for i in test_paths]
val_paths = get_paths(root+'val/fractured/', root+'val/not fractured/')
val_labels = [0 if 'not' in i else 1 for i in val_paths]

NameError: name 'os' is not defined

In [56]:
resize = 256
trans = transforms.Compose([transforms.Resize((resize, resize)),
                            transforms.Lambda(lambda img: img.convert('RGB')),
                            transforms.ToTensor()])
classes = {'bone':0}

In [57]:
train_ds = ImageSet(train_paths, train_labels, trans, classes)
test_ds = ImageSet(test_paths, test_labels, trans, classes)
val_ds = ImageSet(val_paths, val_labels, trans, classes)

In [58]:
train_dl = DataLoader(train_ds, batch_size=1, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=1, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=1, shuffle=True)

In [61]:
for idx, x in enumerate(train_dl):
    image, label = x
    image = image.detach().numpy()[0].transpose(1,2,0)
    # image = image.astype(np.uint8)
    image = (image * 255).astype(np.uint8)
    if label[0][1] == 0.5:
        label = label.detach().numpy().astype(str).tolist()[0]
        with open(f'../bone_fracture/labels/train/image{idx+1}.txt', 'w') as f:
            label = ' '.join(label)
            f.write(label)
    cv2.imwrite(f'../bone_fracture/images/train/image{idx+1}.jpg', image)
    