# Computer Vision in Secret Sharing
### A Hackathon Submission for **INSIGHT**

- Team Name -  `WatchTower`
- Leader Name - `Abhiroop Sarkar`
- Track - `Safety and Security`


## Image Instance Segmentation using Mask-RCNN

### Importing Libraries

In [3]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt

import torch, torchvision
from torchvision import transforms as T
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [4]:
#img = Image.open('tester1.jpg')
#img

###  Importing Data

### Custom Dataset Training

In [7]:
import xml.etree.ElementTree as ET

class CustomDataset(Dataset):
    def __init__(self, img_dir, annot_dir, transforms=None):
        self.img_dir = img_dir
        self.annot_dir = annot_dir
        self.transforms = transforms
        self.image_files = sorted(os.listdir(img_dir))  # Sort the images to maintain consistency
    
    def parse_voc_xml(self, filename):
        # Parse the XML annotation file
        tree = ET.parse(filename)
        root = tree.getroot()
        
        boxes = []
        labels = []
        
        for obj in root.findall('object'):
            label = obj.find('name').text  # Get the object label
            
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
            
            boxes.append([xmin, ymin, xmax, ymax])  # Append bounding box coordinates
            labels.append(1)  # You can assign labels based on your dataset (e.g., class ID 1 for all)

        print(boxes, labels, "\n")
        
        return boxes, labels

    def __getitem__(self, idx):
        # Load image
        img_filename = self.image_files[idx]
        print(f"Loading image: {img_filename}")
        img_path = os.path.join(self.img_dir, img_filename)
        img = Image.open(img_path).convert("RGB")
        
        # Load and parse annotation
        annot_filename = img_filename.replace('.png', '.xml')  # Assuming XML files match image filenames
        print(f"Loading annotation: {annot_filename}")
        annot_path = os.path.join(self.annot_dir, annot_filename)
        boxes, labels = self.parse_voc_xml(annot_path)
        
        # Convert boxes and labels to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        # Dummy masks, as we don't have real ones
        masks = torch.ones((len(boxes), img.size[1], img.size[0]), dtype=torch.uint8)

        # Prepare target dictionary
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['masks'] = masks
        
        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.image_files)

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())  # Convert PIL images to tensors
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

"""# Usage
img_dir = 'images'  # Path to your images folder
annot_dir = 'annotations'  # Path to your annotations folder

# Define dataset
dataset = CustomDataset(img_dir=img_dir, annot_dir=annot_dir, transforms=get_transform(train=True))
dataset_test = CustomDataset(img_dir=img_dir, annot_dir=annot_dir, transforms=get_transform(train=False))"""

"# Usage\nimg_dir = 'images'  # Path to your images folder\nannot_dir = 'annotations'  # Path to your annotations folder\n\n# Define dataset\ndataset = CustomDataset(img_dir=img_dir, annot_dir=annot_dir, transforms=get_transform(train=True))\ndataset_test = CustomDataset(img_dir=img_dir, annot_dir=annot_dir, transforms=get_transform(train=False))"

In [19]:
def collate_fn(batch):
    return tuple(zip(*batch))

def get_model(num_classes):
    # Load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one (change number of classes for your dataset)
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    return model

def train_model():
    # Load the dataset
    dataset = CustomDataset(img_dir=r'Test\images', annot_dir=r'Test\annotations', transforms=get_transform(train=True))
    dataset_test = CustomDataset(img_dir=r'Test\images', annot_dir=r'Test\annotations', transforms=get_transform(train=False))

    # Split dataset into train and validation
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-10])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-10:])

    # Create the data loaders
    data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=0, collate_fn=collate_fn)
    data_loader_test = DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=0, collate_fn=collate_fn)

    # Define the number of classes
    num_classes = 2  # Modify according to your dataset (including the background class)

    # Load the model (with your get_model function)
    model = get_model(num_classes)

    # Move model to the device (CPU or GPU)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    # Define the optimizer and scheduler
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        # Train for one epoch
        model.train()
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            loss_dict = model(images, targets)

            # Total loss
            losses = sum(loss for loss in loss_dict.values())
            
            # Backpropagation
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        # Update the learning rate
        lr_scheduler.step()

        # Evaluate after each epoch (optional)
        evaluate(model, data_loader_test, device=device)

# Utility function for evaluation
def evaluate(model, data_loader, device):
    model.eval()
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            outputs = model(images)
            # Optionally, log outputs or compute metrics here

# Now you can call train_model to train your model
train_model()

Loading image: Cars37.png
Loading annotation: Cars37.xml
[[146, 114, 246, 163]] [1] 

Loading image: Cars15.png
Loading annotation: Cars15.xml
[[310, 146, 372, 171]] [1] 

Loading image: Cars20.png
Loading annotation: Cars20.xml
[[18, 135, 48, 152]] [1] 

Loading image: Cars25.png
Loading annotation: Cars25.xml
[[177, 139, 216, 159]] [1] 

Loading image: Cars16.png
Loading annotation: Cars16.xml
[[36, 175, 62, 186]] [1] 

Loading image: Cars23.png
Loading annotation: Cars23.xml
[[143, 172, 213, 211]] [1] 

Loading image: Cars0.png
Loading annotation: Cars0.xml
[[226, 125, 419, 173]] [1] 

Loading image: Cars21.png
Loading annotation: Cars21.xml
[[191, 165, 255, 192]] [1] 

Loading image: Cars31.png
Loading annotation: Cars31.xml
[[286, 173, 400, 211]] [1] 

Loading image: Cars5.png
Loading annotation: Cars5.xml
[[226, 218, 242, 240]] [1] 

Loading image: Cars18.png
Loading annotation: Cars18.xml
[[313, 286, 405, 311]] [1] 

Loading image: Cars6.png
Loading annotation: Cars6.xml
[[112, 

KeyboardInterrupt: 

In [13]:
dataset = CustomDataset(img_dir=r'Test\annotations', annot_dir=r'Test\annotations', transforms=get_transform(train=True))
dataset_test = CustomDataset(img_dir=r'Test\images', annot_dir=r'Test\annotations', transforms=get_transform(train=False))
print(f"Number of samples in the dataset: {len(dataset)}")
print(f"Number of samples in the dataset: {len(dataset_test)}")

Number of samples in the dataset: 41
Number of samples in the dataset: 41


In [3]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=torchvision.models.detection.MaskRCNN_ResNet50_FPN_Weights.DEFAULT)

### Defining Labels and Threshold

In [6]:
Labels = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

In [7]:
threshold = 0.8

### Prediction

In [8]:
def prediction_function(img, threshold):
  model.eval()
  transform = transforms.ToTensor()
  prediction = model([transform(img)])
  prediction
  p_score = list(prediction[0]['scores'].detach().numpy())
  pred_t = [p_score.index(x) for x in p_score if x>threshold][-1]
  masks = (prediction[0]['masks']>0.5).squeeze().detach().cpu().numpy()
  pred_class = [Labels[i] for i in list(prediction[0]['labels'].numpy())]
  pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(prediction[0]['boxes'].detach().numpy())]
  masks = masks[:pred_t+1]
  pred_boxes = pred_boxes[:pred_t+1]
  pred_class = pred_class[:pred_t+1]
  return masks, pred_boxes, pred_class

In [10]:
def random_colour_masks(image):
  cmap = plt.get_cmap('Spectral')
  r = np.zeros_like(image).astype(np.uint8)
  g = np.zeros_like(image).astype(np.uint8)
  b = np.zeros_like(image).astype(np.uint8)
  r[image == 1], g[image == 1], b[image == 1] = cmap(np.random.rand(3))[:3]
  coloured_mask = np.stack([r, g, b], axis=2)
  return coloured_mask

In [11]:
# Assuming the masks are binary (0 and 1), we sum them up to overlay
combined_mask = np.zeros_like(prediction[0]["masks"][0].cpu().detach().numpy())

for i in range(10):
    combined_mask += prediction[0]["masks"][i].cpu().detach().numpy()

# Normalize combined mask for better visibility (optional, if required)
#combined_mask = np.clip(combined_mask, 0, 1)

# Plot the combined mask
plt.imshow((combined_mask * 255).astype("uint8").squeeze(), cmap="bwr")
plt.show()

NameError: name 'prediction' is not defined

In [12]:
# Create a base combined mask with zeros
combined_mask = np.zeros_like(prediction[0]["masks"][0].cpu().detach().numpy())

# Loop through the first 10 masks to combine them
for i in range(10):
    mask = prediction[0]["masks"][i].cpu().detach().numpy()
    combined_mask += mask  # Add the mask to the combined one

# Normalize the combined mask to keep values between 0 and 1
combined_mask = np.clip(combined_mask, 0, 1)

# Plot the combined mask
plt.imshow((combined_mask * 255).astype("uint8").squeeze(), cmap="Spectral")

# Add labels at different positions for each mask
for i in range(10):
    mask = prediction[0]["masks"][i].cpu().detach().numpy()
    # Find the coordinates of the center of the mask
    y, x = np.where(mask.squeeze() > 0)

    if len(x) > 0 and len(y) > 0:
        x_center = int(np.mean(x))  # Mean x position
        y_center = int(np.mean(y))  # Mean y position
        plt.text(x_center, y_center, f'Mask {i+1}', color='red', fontsize=5,
                 bbox=dict(facecolor='white', alpha=0.5))  # Label the mask

# Turn off the axis and show the plot
plt.axis('off')
plt.show()


NameError: name 'prediction' is not defined

In [None]:
img.size

(1280, 853)

## Shamir's Secret Sharing on the Masked Segment

### Choose Instances as Special Region

### Architecture

## References
- Scene Parsing through ADE20K Dataset. B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso and A. Torralba. Computer Vision and Pattern Recognition (CVPR), 2017.
- He, K., Gkioxari, G., Dollár, P., & Girshick, R. (2017). Mask r-cnn. In Proceedings of the IEEE international conference on computer vision (pp. 2961-2969).
- Sardar, M. K., Pramanik, J., & Adhikari, A. (2023). (t,k,n) regional secret image sharing over finite fields. Signal Processing, (109082), 109082. doi:10.1016/j.sigpro.2023.109082

In [None]:
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
x

[tensor([[[0.7592, 0.6074, 0.5873,  ..., 0.5942, 0.9218, 0.6867],
          [0.8953, 0.6311, 0.5552,  ..., 0.0531, 0.4837, 0.8399],
          [0.7195, 0.9931, 0.9466,  ..., 0.3357, 0.7302, 0.0012],
          ...,
          [0.7118, 0.0025, 0.2239,  ..., 0.4436, 0.7800, 0.9346],
          [0.3350, 0.2276, 0.1702,  ..., 0.2011, 0.4758, 0.5872],
          [0.2102, 0.5470, 0.0388,  ..., 0.4487, 0.6133, 0.1579]],
 
         [[0.3922, 0.4709, 0.1454,  ..., 0.4448, 0.3925, 0.8387],
          [0.1777, 0.7338, 0.1363,  ..., 0.2063, 0.6893, 0.5158],
          [0.1371, 0.6803, 0.5220,  ..., 0.7026, 0.1099, 0.2729],
          ...,
          [0.9348, 0.2913, 0.2119,  ..., 0.6380, 0.3203, 0.3638],
          [0.5864, 0.7739, 0.3494,  ..., 0.1212, 0.4795, 0.4629],
          [0.0375, 0.0347, 0.6303,  ..., 0.1071, 0.3921, 0.2998]],
 
         [[0.0141, 0.4796, 0.1916,  ..., 0.5394, 0.7272, 0.4238],
          [0.9239, 0.6992, 0.4255,  ..., 0.1901, 0.8797, 0.6107],
          [0.4843, 0.9058, 0.0138,  ...,