In [1]:
import torch
from torch.utils.data import Dataset
from skimage import io
import numpy as np
import cv2
import os
import pandas as pd
import torch.nn as nn
import albumentations as A
import time

  from .autonotebook import tqdm as notebook_tqdm


In [3]:

class debug_Configuration:
    def __init__(self):
        # CWDE: self.init MUST BE LISTED FIRST
        self.init = {
            'PROJECT_NAME': 'Segmentation Trial',
            'MODEL_NAME': 'MyModel',
            'RUN_NAME': time.strftime('%Y-%m-%d-%H-%M-%S'),
            'WANDB_RUN_GROUP': 'miller-lab',
            'FAST_DEV_RUN': False,  # Runs inputted batches (True->1) and disables logging and some callbacks
            'MAX_EPOCHS': 25,
            'MAX_STEPS': -1,    # -1 means it will do all steps and be limited by epochs
            'STRATEGY': None    # This is the training strategy. Should be 'ddp' for multi-GPU (like HPG)
        }
        self.etl = {
            'RAW_DATA_FILE': -1,    # -1 means it will create a full data csv from the image directory, using all images in the image directory
            #'RAW_DATA_FILE': 'my_data.csv',
            'DATA_DIR': "data",
            'VAL_SIZE':  0.1,       # looks sus
            'TEST_SIZE': 0.1,      # I'm not sure these two mean what we think
            #'random_state': np.random.randint(1,50)
            # HHG2TG lol; deterministic to aid reproducibility
            'RANDOM_STATE': 42,

            'CUSTOM_TEST_SET': False,
            'TEST_SET_NAME': '/my/test/set.csv'
        }

        self.dataset = {
            'DATA_NAME': 'Ten_Dogs_64KP',
            'IMAGE_HEIGHT': 1024,
            'IMAGE_WIDTH': 1024,
            'MODEL_TYPE': 'tib',        # specifies that it's a femur model. how should we do this? not clear this is still best...
            'CLASS_LABELS': {0: 'bone', 1: 'background'},
            'IMG_CHANNELS': 1,      # Is this differnt from self.module['NUM_IMAGE_CHANNELS']
            'IMAGE_THRESHOLD': 0,
            'SUBSET_PIXELS': True,
            'USE_ALBUMENTATIONS': True,
            'NUM_KEY_POINTS' : 64,
        }

        self.datamodule = {
            # *** CHANGE THE IMAGE DIRECTORY TO YOUR OWN ***
            'IMAGE_DIRECTORY': '/media/sasank/LinuxStorage/Dropbox (UFL)/Canine Kinematics Data/TPLO_Ten_Dogs_grids',
            
            # Z. Curran:  '/home/curran.z/blue_zhe.jiang/curran.z/BM/images'
            # CWDE: "C:/Users/cwell/Documents/jtml_data/TPLO_Ten_Dogs_grids"
            # CWDE: '/home/driggersellis.cw/jtml_data/TPLO_Ten_Dogs_grids/' 
            
            #'IMAGE_DIRECTORY': 'C:/Users/cwell/Documents/jtml_data/TPLO_Ten_Dogs_grids',
            # *** CHANGE THE CHECKPOINT PATH TO YOUR OWN FOR TESTING ***
            #'CKPT_FILE': 'path/to/ckpt/file.ckpt',  # used when loading model from a checkpoint
            # used when loading model from a checkpoint, such as in testing
            
            # Z. Curran : '/home/curran.z/blue_zhe.jiang/curran.z/BM/Bone-Meal/checkpoints/'
            # CWDE: "C:/Users/cwell/Documents/jtml_data/Checkpoints/"
            # CWDE: '/home/driggersellis.cw/jtml_data/Bone-Meal/checkpoints/' 
            
            'CKPT_FILE': 'C:/Users/cwell/Documents/jtml_data/Checkpoints/' + self.init['WANDB_RUN_GROUP'] + self.init['MODEL_NAME'] + '.ckpt', 
            'BATCH_SIZE': 4,
            'SHUFFLE': True,        # Only for training, for test and val this is set in the datamodule script to False
            'NUM_WORKERS': 4,   # This number seems fine for local but on HPG, we have so many cores that a number like 4 seems better.
            'PIN_MEMORY': False,
            #'SUBSET_PIXELS': True,
            'USE_NAIVE_TEST_SET': False
        }

        # hyperparameters for training
        self.hparams = {
            'LOAD_FROM_CHECKPOINT': False,
            'learning_rate': 1e-3
        }
        
        
        # network params
        self.net = {
            # 'hrt_small', 'hrnet'
            'BACKBONE': 'hrt_small', # the name of the backbone identified in backbone_selector. Currently have planned support for hrt and hrnet
            # 'seg_hrt', 'seg_hrnet'
            'ARCHITECTURE' :'seg_hrt', # name of the architecture_builder class file
            'DATA_MODULE' : 'segmentation_data_module'
        }
        
        # PARAMS FOR BACKBONES (Format: self.[name of backbone in self.net] = { params dict })
        
        # these are essentially params for the hrnet backbone's SegmentationNetModule class
        # they are an exception to the format established in the comment above.
        self.segmentation_net_module = {
                'NUM_IMG_CHANNELS': self.dataset['IMG_CHANNELS'],
                'LOSS' : 'torch_nn_bce_with_logits_loss'
        }
        
        # Params for HRT's segmentation_net_module. Defaults used from HRT's Base config
        self.hrt_segmentation_net = {
                'MODEL_CONFIG' : 'hrt_small',
                'LOSS' : 'torch_nn_bce_loss'
        }
        
        # PARAMS FOR LOSS FUNCTIONS (Format: self.[name of loss in self.backbone] = { params dict })
        
        # Params dict for BCEWithLogitsLoss, which takes no params in the origin model from Lightning Segmentation.
        self.torch_nn_bce_with_logits_loss = {
            # NO PARAMS
        }

        self.torch_nn_bce_loss = {
            # NO PARAMS
        }
        
        self.ohem_ce_loss = {
            'IGNORE_LABEL' : -1,
            'THRES' : 0.7,
            'MIN_KEPT' : 100000,
            'WEIGHT' : None
        }
        
        # Params for FSCELoss: TODO: insert actual params
        self.fsce_loss = {
            'ce_weight' : -1,
            'ce_reduction' : -1,
            'ce_ignore_index': -1
        }
        
        #TODO: add other params dicts for each loss function we have. Code will be extensible 

        # Commented out transforms do not support keypoints
        self.transform = \
        A.Compose([
        A.RandomGamma(always_apply=False, p = 0.5,gamma_limit=(10,300)),
        A.ShiftScaleRotate(always_apply = False, p = 0.5,shift_limit=(-0.06, 0.06), scale_limit=(-0.1, 0.1), rotate_limit=(-180,180), interpolation=0, border_mode=0, value=(0, 0, 0)),
        A.Blur(always_apply=False, blur_limit=(3, 10), p=0.2),
        A.Flip(always_apply=False, p=0.5),
        # A.ElasticTransform(always_apply=False, p=0.85, alpha=0.5, sigma=150, alpha_affine=50.0, interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None, approximate=False),
        A.InvertImg(always_apply=False, p=0.5),
        A.CoarseDropout(always_apply = False, p = 0.25, min_holes = 1, max_holes = 100, min_height = 25, max_height=25),
        A.MultiplicativeNoise(always_apply=False, p=0.25, multiplier=(0.1, 2), per_channel=True, elementwise=True)
    ],
    keypoint_params=A.KeypointParams(format='xy', remove_invisible=False),
    p=0.85)

In [41]:
class debug_LitJTMLDataset(Dataset):
    
    def __init__(self, config, evaluation_type, transform=None):
        """
        Args:
            config (config): Dictionary of vital constants about data.
            store_data_ram (boolean): Taken from config.
            evaluation_type (string): Dataset evaluation type (must be 'training', 'validation', or 'test')
            num_points (int): Taken from config.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        # Create local copies of the arguments
        self.config = config
        self.num_points = self.config.dataset['NUM_KEY_POINTS']
        self.transform = self.config.transform
        
        # Check that evaluation_type is valid and then store
        if evaluation_type in ['train', 'val', 'test', 'naive']:
            self.evaluation_type = evaluation_type
        else:
            raise Exception('Incorrect evaluation type! Must be either \'train\', \'val\', \'test\', or \'naive\'.')

        # Load the data from the big_data CSV file into a pandas dataframe
        self.data = pd.read_csv(os.path.join(self.config.etl['DATA_DIR'], self.config.dataset['DATA_NAME'], self.evaluation_type + '_' + self.config.dataset['DATA_NAME'] + '.csv'))
        
    
    
    #def __init__(self, config, dataset, img_dir):

        """
        # image check
        #print('Image directory: ' + self.config.data_constants["IMAGE_DIRECTORY"])
        for idx in range(0,len(self.images)):
            if os.path.isfile(self.img_dir + '/' + self.images[idx]) ==False:
                raise Exception('Error, cannot find file: ' + self.images[idx])
        
        #print(self.config.data_constants['MODEL_TYPE'])
        for i,j in enumerate(self.dataset[0,:]):
            #if j == self.config.data_constants['MODEL_TYPE']:
            if j == 'fem':
                self.labels = self.dataset[1:,i]
        
        # label check
        for idx in range(0,len(self.labels)):
            if os.path.isfile(self.img_dir + '/' + self.labels[idx]) ==False:
                raise Exception('Error, cannot find file: ' + self.labels[idx])
        """
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        # Get the row of the dataframe
        row = self.data.iloc[idx]

        # Get the image name
        image_name = row['Image address']

        # Get the image
        image = io.imread(os.path.join(self.config.datamodule['IMAGE_DIRECTORY'], image_name))

        # Get the keypoint labels and segmentation labels
        if self.config.dataset['MODEL_TYPE'] == 'fem':
            kp_label = row['Femur 2D KP points']
            seg_label = io.imread(os.path.join(self.config.datamodule['IMAGE_DIRECTORY'], row['Fem label address']))
        elif self.config.dataset['MODEL_TYPE'] == 'tib':
            kp_label = row['Tibia 2D KP points']
            seg_label = io.imread(os.path.join(self.config.datamodule['IMAGE_DIRECTORY'], row['Tib label address']))
        else:
            raise Exception('Incorrect model type! Must be either \'fem\' or \'tib\'.')

        kp_label = kp_label[2:-2]
        kp_label = kp_label.split(']' + os.linesep + ' [')
        kp_label = [np.array([float(x) for x in list(filter(None, kp.split(' ')))]) for kp in kp_label]
        kp_label = np.array(kp_label)
        kp_label[:, 1] = 1 - kp_label[:, 1]         # ! New kp_label preprocessing
        kp_label = kp_label * 1024
        

        # * Transformations
        # Albumenations
        image_no_transform = image
        if self.transform and self.config.dataset['USE_ALBUMENTATIONS'] == True:
            transformed = self.transform(image=image, mask=seg_label, keypoints=kp_label)
            image, seg_label, kp_label = transformed['image'], transformed['mask'], transformed['keypoints']

        # * Subset Pixels
        full_image = image             # Save full image (no subset_pixels) for visualization
        if self.config.dataset['SUBSET_PIXELS'] == True:
            label_dst = np.zeros_like(seg_label)
            label_normed = cv2.normalize(seg_label, label_dst, alpha = 0, beta = 1, norm_type = cv2.NORM_MINMAX)
            seg_label = label_normed

            kernel = np.ones((30,30), np.uint8)
            label_dilated = cv2.dilate(seg_label, kernel, iterations = 5)
            image_subsetted = cv2.multiply(label_dilated, image)
            image = image_subsetted

        # * Convert to tensors
        image = torch.FloatTensor(image[None, :, :]) # Store as byte (to save space) then convert when called in __getitem__. - What. What does this mean?
        full_image = torch.FloatTensor(full_image[None, :, :]) # Store as byte (to save space) then convert when called in __getitem__
        seg_label = torch.FloatTensor(seg_label[None, :, :])
        #kp_label = torch.FloatTensor(kp_label.reshape(-1))      # Reshape to 1D array so that it's 2*num_keypoints long
        kp_label = torch.FloatTensor(kp_label)          # kp_label is of shape (num_keypoints, 2)

        # CWDE: Removed so that segmentation can occur without implementing the KP data fully
        #assert (kp_label.shape[0], kp_label.shape[1]) == (self.num_points, 2), "Keypoint label shape is incorrect!"
        #assert (kp_label.shape[0], kp_label.shape[1]) == (self.num_points, 2), "index " + str(idx) + " kp_label.shape: " + str(kp_label.shape) + " self.num_points: " + str(self.num_points)
        assert len(kp_label) == self.num_points, "index " + str(idx) + " kp_label.shape: " + str(kp_label.shape) + " self.num_points: " + str(self.num_points)

        #print("kp_label.shape:")
        #print(kp_label.shape)

        # * Create a dictionary of the sample
        sample = {'image': image,
                    'img_name': image_name,
                    'kp_label': kp_label,
                    'seg_label': seg_label,
                    'full_image': full_image,
                    'image_no_transform': image_no_transform}
        
        # CWDE: hotfix for compatibility with segmentation code that just uses 'label'
        sample['label'] = seg_label

        # * Return the sample
        return sample

In [46]:
# Set the working directory to the root of the git repo since that's what the config file expects
os.chdir('/home/sasank/Documents/GitRepos/Bone-Meal/')
config = debug_Configuration()
dataset = debug_LitJTMLDataset(config, evaluation_type='test')

In [49]:
# For each dataset element, print the number of keypoints and the shape of the keypoints
for i in range(len(dataset)):
    #print(dataset[i]['kp_label'].shape)
    if dataset[i]['kp_label'].shape != (dataset.num_points, 2):
        print(i)
        print(dataset[i]['kp_label'].shape)
        print((dataset.num_points, 2))


AssertionError: index 6 kp_label.shape: torch.Size([63, 2]) self.num_points: 64

In [40]:
# Load the dataset CSV
dataset_csv = pd.read_csv("/home/sasank/Documents/GitRepos/Bone-Meal/data/Ten_Dogs_64KP/train_Ten_Dogs_64KP.csv")

# Print the number of keypoints and the shape of the keypoints for each row
for i in range(len(dataset_csv)):
    kp_label = dataset_csv.iloc[i]['Tibia 2D KP points']
    kp_label = kp_label[2:-2]
    kp_label = kp_label.split(']' + os.linesep + ' [')
    kp_label = [np.array([float(x) for x in list(filter(None, kp.split(' ')))]) for kp in kp_label]
    kp_label = np.array(kp_label)
    kp_label[:, 1] = 1 - kp_label[:, 1]         # ! New kp_label preprocessing
    kp_label = kp_label * 1024
    if kp_label.shape != (64, 2):
        print(i)
        print(kp_label.shape)
        print((64, 2))
    #print(kp_label.shape)
    #print((dataset.num_points, 2))