# Idea


Semi supervised data preparation 

1. Build a classifier or use ResNet without image-net weights for training a classifier 
2. Freeze the classifier weights and build a segmentation model 


The data preparation for the classifier. Select 2 classes - Stroma , Tumor

For data preparation for the classifier - using the mask and image name see if the classes are seen on the image and using pixel altering keep the region coresponding to the mask as such for the image and the rest of the region pixelated. 

Create a dataset like for 500 each or 100 each for stroma and tumor in their directories. 

In [19]:
import os
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm

In [24]:
def process_and_save_images(data_path, resize_shape, save_folder):
    images_path = os.path.join(data_path, "images")
    masks_path = os.path.join(data_path, "targets")
    file_list = os.listdir(images_path)

    # Create directories for output once
    damage_types = ['no_damage', 'minor_damage', 'major_damage', 'destroyed']
    for damage_type in damage_types:
        os.makedirs(os.path.join(save_folder, damage_type), exist_ok=True)

    for file_name in tqdm(file_list):
        image_path = os.path.join(images_path, file_name)
        mask_path = os.path.join(masks_path, file_name.replace('.png', '_target.png'))

        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, resize_shape, interpolation=cv2.INTER_LINEAR)

        mask = cv2.imread(mask_path, 0)  # Read mask as grayscale
        mask = cv2.resize(mask, resize_shape, interpolation=cv2.INTER_LINEAR)

        # Save class labels
        for i, label in enumerate(damage_types):
            if (mask == i).any():
                blank_image = np.zeros_like(image)
                coords = np.argwhere(mask == i)
                for x, y in coords:
                    blank_image[x, y] = image[x, y]
                Image.fromarray(blank_image).save(os.path.join(save_folder, label, file_name))

def apply_preprocessing(data_path, resize_shape, save_folder):
    process_and_save_images(data_path, resize_shape, save_folder)
    print('Saved Classifier Processed Dataset')


In [25]:
# Build Train Dataset 
DATA_PATH = "/home/aaimscadmin/IRP_DATA/XBD/train_images_labels_targets/train"
SAVE_FOLDER = "/home/aaimscadmin/IRP_DATA/XBD/classifier_dataset/train"

RESIZE_SHAPE=(256,256)
apply_preprocessing(DATA_PATH,RESIZE_SHAPE,SAVE_FOLDER)

100%|██████████| 5598/5598 [1:05:42<00:00,  1.42it/s]

Saved Classifier Processed Dataset





In [26]:
# Build Train Dataset 
DATA_PATH = "/home/aaimscadmin/IRP_DATA/XBD/test_images_labels_targets/test"
SAVE_FOLDER = "/home/aaimscadmin/IRP_DATA/XBD/classifier_dataset/test"

RESIZE_SHAPE=(256,256)
apply_preprocessing(DATA_PATH,RESIZE_SHAPE,SAVE_FOLDER)

100%|██████████| 1866/1866 [22:03<00:00,  1.41it/s]

Saved Classifier Processed Dataset



