# Create a SAM-ready 3LC Table using TableWriter

<div style="display: inline-flex; align-items: center; gap: 10px;">
        <a href="https://colab.research.google.com/github/3lc-ai/notebook-examples/blob/main/create_sam_dataset.ipynb"
        target="_blank"
            style="background-color: transparent; text-decoration: none; display: inline-flex; align-items: center;
            padding: 5px 10px; font-family: Arial, sans-serif;"> <img
            src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="height: 30px;
            vertical-align: middle;box-shadow: none;"/>
        </a> <a href="https://github.com/3lc-ai/notebook-examples/blob/main/create_sam_dataset.ipynb"
            style="text-decoration: none; display: inline-flex; align-items: center; background-color: #ffffff; border:
            1px solid #d1d5da; border-radius: 8px; padding: 2px 10px; color: #333; font-family: Arial, sans-serif;">
            <svg aria-hidden="true" focusable="false" role="img" class="octicon octicon-mark-github" viewBox="0 0 16 16"
            width="20" height="20" fill="#333"
            style="display:inline-block;user-select:none;vertical-align:text-bottom;overflow:visible; margin-right:
            8px;">
                <path d="M8 0c4.42 0 8 3.58 8 8a8.013 8.013 0 0 1-5.45 7.59c-.4.08-.55-.17-.55-.38 0-.27.01-1.13.01-2.2
                0-.75-.25-1.23-.54-1.48 1.78-.2 3.65-.88 3.65-3.95 0-.88-.31-1.59-.82-2.15.08-.2.36-1.02-.08-2.12 0
                0-.67-.22-2.2.82-.64-.18-1.32-.27-2-.27-.68 0-1.36.09-2 .27-1.53-1.03-2.2-.82-2.2-.82-.44 1.1-.16
                1.92-.08 2.12-.51.56-.82 1.28-.82 2.15 0 3.06 1.86 3.75 3.64 3.95-.23.2-.44.55-.51
                1.07-.46.21-1.61.55-2.33-.66-.15-.24-.6-.83-1.23-.82-.67.01-.27.38.01.53.34.19.73.9.82 1.13.16.45.68
                1.31 2.69.94 0 .67.01 1.3.01 1.49 0 .21-.15.45-.55.38A7.995 7.995 0 0 1 0 8c0-4.42 3.58-8 8-8Z"></path>
            </svg> <span style="vertical-align: middle; color: #333;">Open in GitHub</span>
        </a>
</div>

This notebook is a pre-requisite for running the `fine-tune-sam.ipynb` notebook, and is a modified version of the official colab tutorial from Encord which can be found [here](https://colab.research.google.com/drive/1F6uRommb3GswcRlPZWpkAQRMVNdVH7Ww).

It demonstrates how you can manually create a 3LC Table by iteratively adding its rows. By using a 3LC Table, as opposed to some other data format, you will be able to view and edit the samples of the dataset in the 3LC Dashboard. The Table created can be used to fine-tune Segment Anything Model (SAM), and is created from the Stamp Verification (StaVer) dataset on kaggle.

In [None]:
# Parameters
PROJECT_NAME = "SAM Example"
DATASET_NAME = "staver-dataset"
STAVER_DATASET_PATH = "stamp-verification-staver-dataset"  # Path to the dataset
INSTALL_DEPENDENCIES = False

In [None]:
%%capture
if INSTALL_DEPENDENCIES:
    %pip install tlc
    %pip install opencv-python
    %pip install kaggle

In [None]:
### HIDDEN CELL ###

# Reloads all modules every time before executing the Python code.
%load_ext autoreload
%autoreload 2

# Ensure notebook_tests on PATH
import os
import sys

sys.path.append('..')
import notebook_tests

# Optionally override the default test data path
if path := os.getenv("TLC_PUBLIC_EXAMPLES_TEST_DATA_PATH"):
    print(f"Using test data path: {path}")
    TEST_DATA_PATH = path

# Prints the current 3lc configuration
!3lc config --list

## Downloading StaVer Dataset

In [None]:
# Don't forget to set up your kaggle API key, see https://github.com/Kaggle/kaggle-api#api-credentials
# You can also manually download the dataset from https://www.kaggle.com/rtatman/stamp-verification-staver-dataset
!mkdir $STAVER_DATASET_PATH
!kaggle datasets download rtatman/stamp-verification-staver-dataset 
!unzip -o stamp-verification-staver-dataset.zip -d $STAVER_DATASET_PATH 
!rm stamp-verification-staver-dataset.zip

## Table creation

In [None]:
from __future__ import annotations
from pathlib import Path
from typing import Any
from random import random
import cv2
import tlc

STAVER_DATASET = Path(STAVER_DATASET_PATH).absolute()

In [None]:
def create_bboxes() -> dict[str, Any]:
    # Exclude scans with zero or multiple bboxes (of the first 100)
    stamps_to_exclude = {
        "stampDS-00008",
        "stampDS-00010",
        "stampDS-00015",
        "stampDS-00021",
        "stampDS-00027",
        "stampDS-00031",
        "stampDS-00039",
        "stampDS-00041",
        "stampDS-00049",
        "stampDS-00053",
        "stampDS-00059",
        "stampDS-00069",
        "stampDS-00073",
        "stampDS-00080",
        "stampDS-00090",
        "stampDS-00098",
        "stampDS-00100",
    }.union(
        {
            "stampDS-00012",
            "stampDS-00013",
            "stampDS-00014",
        }
    )  # Exclude 3 scans that aren't the type of scan we want to be fine tuning for

    bbox_coords = {}
    for f in sorted((STAVER_DATASET / "ground-truth-maps" / "ground-truth-maps").iterdir())[:100]:
        k = f.stem[:-3]
        if k not in stamps_to_exclude:
            im = cv2.imread(f.as_posix())
            gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            contours, _ = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[-2:]
            if len(contours) > 1:
                x, y, w, h = cv2.boundingRect(contours[0])
                height, width, _ = im.shape
                bbox_coords[k] = {
                    "image_height": height,
                    "image_width": width,
                    "bb_list": [{"x0": x, "x1": x + w, "y0": y, "y1": y + h, "label": 0}],
                }

    return bbox_coords


def create_schema() -> tlc.Schema:
    schema = tlc.SampleType.from_structure(
        {
            "image": tlc.ImagePath,
            "mask": tlc.ImagePath,
            "My parameter A": tlc.Float,
            "My parameter B": tlc.Float,
        }
    ).schema

    schema.add_sub_schema(
        "prompt box", tlc.BoundingBoxListSchema({0.0: tlc.MapElement("Stamp")}, include_segmentation=False)
    )

    return schema


def create_table() -> tlc.Table:
    table_writer = tlc.TableWriter(
        project_name=PROJECT_NAME,
        dataset_name=DATASET_NAME,
        column_schemas=create_schema().values,
    )

    bboxes = create_bboxes()

    for key in bboxes.keys():
        table_writer.add_row(
            {
                "image": (STAVER_DATASET / "scans" / "scans" / f"{key}.png").as_posix(),
                "mask": (STAVER_DATASET / "ground-truth-pixel" / "ground-truth-pixel" / f"{key}-px.png").as_posix(),
                "prompt box": bboxes[key],
                "My parameter A": random(),
                "My parameter B": random(),
            }
        )

    table = table_writer.finalize()

    return table

In [None]:
table = create_table()