# Create a SAM-ready 3LC Table using TableWriter

This notebook is a pre-requisite for running the `fine-tune-sam.ipynb` notebook, and is a modified version of the official colab tutorial from Encord which can be found [here](https://colab.research.google.com/drive/1F6uRommb3GswcRlPZWpkAQRMVNdVH7Ww).

It demonstrates how you can manually create a 3LC Table by iteratively adding its rows. By using a 3LC Table, as opposed to some other data format, you will be able to view and edit the samples of the dataset in the 3LC Dashboard. The Table created can be used to fine-tune Segment Anything Model (SAM), and is created from the Stamp Verification (StaVer) dataset on kaggle.

## Imports

In [None]:
import os
from pathlib import Path
from random import random

import cv2
import numpy as np
import tlc
from PIL import Image

## Project setup

In [None]:
PROJECT_NAME = "3LC Tutorials"
DATASET_NAME = "staver-dataset"
STAVER_DATASET_PATH = "../../../transient_data/stamp-verification-staver-dataset"  # Path to the dataset

## Downloading StaVer dataset

In [None]:
# Don't forget to set up your kaggle API key, see https://github.com/Kaggle/kaggle-api#api-credentials
# You can also manually download the dataset from https://www.kaggle.com/rtatman/stamp-verification-staver-dataset
!mkdir $STAVER_DATASET_PATH
!kaggle datasets download rtatman/stamp-verification-staver-dataset 
!unzip -o stamp-verification-staver-dataset.zip -d $STAVER_DATASET_PATH 

In [None]:
os.remove("stamp-verification-staver-dataset.zip")

## Create Table

In [None]:
STAVER_DATASET = Path(STAVER_DATASET_PATH).absolute()

In [None]:
def create_bboxes():
    # Exclude scans with zero or multiple bboxes (of the first 100)
    stamps_to_exclude = {
        "stampDS-00008",
        "stampDS-00010",
        "stampDS-00015",
        "stampDS-00021",
        "stampDS-00027",
        "stampDS-00031",
        "stampDS-00039",
        "stampDS-00041",
        "stampDS-00049",
        "stampDS-00053",
        "stampDS-00059",
        "stampDS-00069",
        "stampDS-00073",
        "stampDS-00080",
        "stampDS-00090",
        "stampDS-00098",
        "stampDS-00100",
    }.union(
        {
            "stampDS-00012",
            "stampDS-00013",
            "stampDS-00014",
        }
    )  # Exclude 3 scans that aren't the type of scan we want to be fine tuning for

    bbox_coords = {}
    for f in sorted((STAVER_DATASET / "ground-truth-maps" / "ground-truth-maps").iterdir())[:100]:
        k = f.stem[:-3]
        if k not in stamps_to_exclude:
            im = cv2.imread(f.as_posix())
            gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            contours, _ = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[-2:]
            if len(contours) > 1:
                x, y, w, h = cv2.boundingRect(contours[0])
                height, width, _ = im.shape
                bbox_coords[k] = {
                    "image_height": height,
                    "image_width": width,
                    "bb_list": [{"x0": float(x), "x1": float(x + w), "y0": float(y), "y1": float(y + h), "label": 0}],
                }

    return bbox_coords


def create_schema():
    schema = tlc.SampleType.from_structure(
        {
            "image": tlc.ImagePath,
            "mask": tlc.Schema(
                value=tlc.SegmentationMaskUrlStringValue(
                    map={
                        255.0: tlc.MapElement("background"),
                        0.0: tlc.MapElement("stamp"),
                    }
                )
            ),
            "My parameter A": tlc.Float,
            "My parameter B": tlc.Float,
        }
    ).schema

    schema.add_sub_schema(
        "prompt box", tlc.BoundingBoxListSchema({0.0: tlc.MapElement("Stamp")}, include_segmentation=False)
    )

    return schema


def create_table():
    table_writer = tlc.TableWriter(
        project_name=PROJECT_NAME,
        dataset_name=DATASET_NAME,
        column_schemas=create_schema().values,
    )

    bboxes = create_bboxes()

    for key in bboxes:
        table_writer.add_row(
            {
                "image": (STAVER_DATASET / "scans" / "scans" / f"{key}.png").as_posix(),
                "mask": (STAVER_DATASET / "ground-truth-pixel" / "ground-truth-pixel" / f"{key}-px.png").as_posix(),
                "prompt box": bboxes[key],
                "My parameter A": random(),
                "My parameter B": random(),
            }
        )

    table = table_writer.finalize()

    return table

In [None]:
table = create_table()

In [None]:
img = Image.open(table[0]["mask"])

In [None]:
np.unique(np.array(img))