# Notebook for SDD Hackathon 2024 wt Vortex.io 

## Library imports and dowloads

In [10]:
!git clone "https://github.com/facebookresearch/segment-anything.git"
!pip install -q supervision --upgrade supervision

Cloning into 'segment-anything'...
remote: Enumerating objects: 295, done.[K
remote: Total 295 (delta 0), reused 0 (delta 0), pack-reused 295[K
Receiving objects: 100% (295/295), 18.30 MiB | 17.63 MiB/s, done.
Resolving deltas: 100% (155/155), done.


In [3]:
import cv2
import supervision as sv
import torch
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import pandas as pd

## SAM initianlization

In [4]:
%cd segment-anything

/kaggle/working/segment-anything


In [5]:
!wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

In [6]:
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_h"


CHECKPOINT_PATH = "./sam_vit_h_4b8939.pth"
sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device=DEVICE)
mask_generator = SamAutomaticMaskGenerator(sam)

## Training

### Fragmentation

In [7]:
def get_fragments(image_path):
    image_bgr = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    sam_result = mask_generator.generate(image_rgb)
    masks = [o["segmentation"] for o in sam_result]
    all_mask = np.zeros(image_rgb.shape[:2])
    for mask in masks:
        all_mask = np.logical_or(all_mask, mask)
    remaining = np.logical_not(all_mask)
    fragments = [remaining.astype(int)]
    fragments.extend([cv2.resize(mask.astype('float32'), (320, 240)).astype(int) for mask in masks])
    return fragments

In [None]:
train_images_path = "/kaggle/input/water-segmentation-vtx/dataset/trainset/images/"
train_masks_path = "/kaggle/input/water-segmentation-vtx/dataset/trainset/masks/"
test_images_path = "/kaggle/input/water-segmentation-vtx/dataset/testset/images"
files = os.listdir(train_images_path)
all_fragments = {}

for file in tqdm(files):
    all_fragments[file] = get_fragments(train_images_path + file)

  8%|▊         | 71/891 [06:36<1:18:26,  5.74s/it]

In [None]:
# print("Saving fragments...")
# with open('training_fragments.pkl', 'wb') as f:
#     pickle.dump(all_fragments, f)

### Fragment labelization

In [None]:
def get_label(frag, file, show=False):
    mask_file = train_masks_path + file.replace(
        "jpg", "png"
    )
    mask = cv2.imread(mask_file)
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)//255
    test = mask * frag
    if show:
        plt.subplot(1,3,1)
        img = cv2.imread(train_images_path + file)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.imshow(img)
        plt.subplot(1,3,2)
        plt.imshow(mask, vmin=0, vmax=1)
        plt.subplot(1,3,3)
        plt.imshow(frag, vmin=0, vmax=1)
        plt.show()
    return np.sum(test) / np.sum(frag) > .7


file = list(all_fragments.keys())[0]
frag = all_fragments[file][2]
print(get_label(frag, file, show=True))

### Fragment encoding

In [None]:
# Load a pretrained ResNet model
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
# Remove the classification layer
model = torch.nn.Sequential(*list(model.children())[:-1])
# Set the model to evaluation mode
model.eval()

# Define preprocessing transformations
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def get_features(frag):
    input_tensor = preprocess(Image.fromarray(frag))
    input_batch = input_tensor.unsqueeze(0)
    with torch.no_grad():
        features = model(input_batch)
    return features.view(-1).numpy()

### Fragment aggregation

In [None]:
def add_file_fragments(directory, file, fragments, training_set):
    origin = cv2.imread(directory+file)
    origin = cv2.cvtColor(origin, cv2.COLOR_BGR2RGB)
    for frag in fragments:
        resized_frag = cv2.resize(frag, origin.shape)
        fragment = np.zeros(origin.shape)
        for i in range(3):
            fragment[:,:,i] = origin[:,:,i] * resized_frag.astype(int)
        training_set.append(
            {
                "filename": file,
                "label": get_label(resized_frag, file),
                "features": get_features(fragment.astype("uint8"))
            }
        )
    

# with open('training_fragments.pkl', 'rb') as f:
#     all_fragments = pickle.load(f)
training_set = []
for file in tqdm(all_fragments):
    fragments = all_fragments[file]
    add_file_fragments(train_images_path, file, fragments, training_set)
print(f"{len(training_set)} fragments in training set.")
print()
# print("Saving fragments...")
# with open('training_set.pkl', 'wb') as f:
#     pickle.dump(training_set, f)

## Testing

### Fragmentation

In [None]:
test_fragments = {}
print("Fragmenting pictures...")
for file in tqdm(files):
    test_fragments[file] = get_fragments(test_images_path + file)
print("Done!")

### Encoding

In [None]:
print("Getting fragment's features...")
testing_set = []
for file in tqdm(test_fragments):
    fragments = test_fragments[file]
    origin = cv2.imread(directory+file)
    origin = cv2.cvtColor(origin, cv2.COLOR_BGR2RGB)
    for frag in fragments:
        fragment = np.zeros(origin.shape)
        for i in range(3):
            fragment[:,:,i] = origin[:,:,i] * frag.astype(int)
        testing_set.append(
            {
                "filename": file,
                "mask": frag.astype(int),
                "features": get_features(fragment.astype("uint8"))
            }
        )
print("Done!")
print()
# print("Saving fragments...")
# with open('testing_set.pkl', 'wb') as f:
#     pickle.dump(testing_set, f)

### Labelization

In [None]:
def is_frag_water(frag, show=False):
    scores = [cosine_similarity(frag["features"].reshape(1,-1), 
                               o["features"].reshape(1,-1)
                              )[0][0] 
              for o in training_set]
    best = np.argmax(scores)
    if show:
        plt.imshow(training_set[best]["fragment"])
        plt.show()
    return training_set[best]["label"]


print("Labeling test fragments...")
# with open('testing_set.pkl', 'rb') as f:
#     testing_set = pickle.load(f)
for frag in testing_set:
    frag["label"] = is_frag_water(frag)
print("Done!")

### Mask creation 

In [None]:
print("Merging water masks...")
testing_masks = {}
for frag in testing_set:
    file = frag["file"]
    if file in testing_masks:
        mask = frag["mask"]
        testing_masks[file]["mask"] = testing_masks[file]["mask"] + cv2.resize(frag, testing_masks[file]["shape"])
    else:
        mask = {}
        mask["shape"] = cv2.imread(test_images_path+file).shape
        mask["mask"] = cv2.resize(frag, mask["shape"])
        testing_masks[file] = mask
print("Done!")

### Submission file generation

In [None]:
def boolean_array_to_rle(boolean_array):
    boolean_vector = boolean_array.copy().reshape(1,-1)[0]
    rle = []
    current_idx = 0
    current_value = -1
    for i in range(len(boolean_vector)):
        if boolean_vector[i] >= 1:
            if current_value != 1:
                current_value = 1
                current_idx = i+1
        else:
            if current_value >= 1:
                current_value = 0
                rle.append((current_idx, i-current_idx+1))
                current_idx = i+1
    return rle

def rle_to_str(rle):
    return " ".join([f"{e[0]} {e[1]}" for e in rle])

print("Creating submission file...")
files = []
rles = []
for file in testing_masks:
    mask = testing_masks[file]["mask"]
    name = file.replace("jpg", "png")
    rle = rle_to_str(boolean_array_to_rle(mask))
    files.append(name)
    rles.append(rle)
df = pd.DataFrame({'img_key': files, 'rle_mask': rles})
df.to_csv("SAM_submission.csv", index=False)
print("Done!")