# Download and Install SAM

In [1]:
!pip install git+https://github.com/facebookresearch/segment-anything.git
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-i4dvfhi6
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-i4dvfhi6
  Resolved https://github.com/facebookresearch/segment-anything.git to commit 6fdee8f2727f4506cfbbe553e23b895e27956588
  Preparing metadata (setup.py) ... [?25ldone
[?25h--2024-08-07 10:07:26--  https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 18.164.78.72, 18.164.78.121, 18.164.78.128, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|18.164.78.72|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2564550879 (2.4G) [binary/octet-stream]
Saving to: 'sam_vit_h_4b8939.pth'


2024-08-07 10:07:35 (271 MB/s) - 'sam_vit_h_4b8939.pth' saved [2564550879/2564550879]



# Imports 

In [2]:
import torch
import torchvision
import sys
import cv2
import matplotlib.pyplot as plt
import os
import numpy as np 
import pandas as pd 
from PIL import Image
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
import zipfile

# Create New Dataset Directories 

In [7]:
directory = os.path.join("/kaggle/working/", "Segmented Dental Radiography")
os.makedirs(directory, exist_ok=True)




In [8]:
df = pd.read_csv("/kaggle/input/dental-radiography/train/_annotations.csv")
classes = df['class'].unique()
classes = list(classes)
classes.append("Normal")

for i in range(len(classes)):
    os.makedirs(os.path.join("/kaggle/working/Segmented Dental Radiography/train", classes[i]), exist_ok=True)
    os.makedirs(os.path.join("/kaggle/working/Segmented Dental Radiography/test", classes[i]), exist_ok=True)
    os.makedirs(os.path.join("/kaggle/working/Segmented Dental Radiography/valid", classes[i]), exist_ok=True)




# Function for Getting Mask Bounding Box

In [9]:
def get_bounding_box(mask):

    coords = np.column_stack(np.where(mask))
    
    if coords.size == 0:
        return (0, 0, 0, 0)

    y_min, x_min = coords.min(axis=0)
    y_max, x_max = coords.max(axis=0)
    
    return (x_min, y_min, x_max, y_max)




# Function for Zipping Folder 

In [10]:
def zip_folder(folder_path):
    zip_file_path = f"{folder_path}.zip"
    with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, start=os.path.dirname(folder_path))
                zipf.write(file_path, arcname)




# Function for Creating New Dataset Using SAM
- segment the annotated unhealthy teeth using SAM predictor
- segment healthy teeth using SAM automatic mask generator after blacking out unhealthy areas 

In [11]:
def create_data(folder_path_1, folder_path_2, annotation):
    df = pd.read_csv(annotation)
    
    for filename in os.listdir(folder_path_1):
        if filename.endswith(('.jpg')): 

            image_path = os.path.join(folder_path_1, filename)
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            result = df[df['filename'] == filename]

            for i in range(len(result)):
                input_box = np.array([result.iloc[i]["xmin"], result.iloc[i]["ymin"], result.iloc[i]["xmax"], result.iloc[i]["ymax"]])
                
                predictor.set_image(image)
                masks, _, _ = predictor.predict(
                    point_coords=None,
                    point_labels=None,
                    box=input_box[None, :],
                    multimask_output=False,
                )
      
                x_min, y_min, x_max, y_max = get_bounding_box(masks[0])
                
                
                # Ensure cropping coordinates are within image bounds
                x_min = max(x_min, 0)
                y_min = max(y_min, 0)
                x_max = min(x_max, image.shape[1] - 1)
                y_max = min(y_max, image.shape[0] - 1)


                cropped_image = image[y_min:y_max + 1, x_min:x_max + 1]
                cropped_image_pil = Image.fromarray(cropped_image)
                resized_image = cropped_image_pil.resize((64, 64))
                resized_image.save(os.path.join(folder_path_2, result.iloc[i]["class"], filename[:-4] + f'_{i}.jpg'))
                
                
                
            for i in range(len(result)):
                input_box = np.array([result.iloc[i]["xmin"], result.iloc[i]["ymin"], result.iloc[i]["xmax"], result.iloc[i]["ymax"]])
                # Ensure coordinates are within image bounds
                x_min = max(input_box[0], 0)
                y_min = max(input_box[1], 0)
                x_max = min(input_box[2], image.shape[1] - 1)
                y_max = min(input_box[3], image.shape[0] - 1)
    
                image[y_min:y_max + 1, x_min:x_max + 1] = 0
                

            masks = mask_generator.generate(image)
            
            for i in range(len(masks)):
            
                x_min, y_min, x_max, y_max = get_bounding_box(masks[i]["segmentation"])
                        
                # Ensure cropping coordinates are within image bounds
                x_min = max(x_min, 0)
                y_min = max(y_min, 0)
                x_max = min(x_max, image.shape[1] - 1)
                y_max = min(y_max, image.shape[0] - 1)

                cropped_image = image[y_min:y_max + 1, x_min:x_max + 1]
            
                if not (cropped_image.shape[0] > 100 or cropped_image.shape[1] > 100):
                    if np.sum(cropped_image == 0) < 0.6 * cropped_image.shape[0] * cropped_image.shape[1]:
                        cropped_image_pil = Image.fromarray(cropped_image)
                        resized_image = cropped_image_pil.resize((64, 64))
                        resized_image.save(os.path.join(folder_path_2, "Normal", filename[:-4] + f'_{i}.jpg'))




# Initializing SAM Predictor and Automatic Mask Generator 

In [12]:
sam = sam_model_registry["default"](checkpoint="/kaggle/working/sam_vit_h_4b8939.pth")
device = "cuda"
sam.to(device=device)

predictor = SamPredictor(sam)
mask_generator = SamAutomaticMaskGenerator(sam)




# Create and zipping Dataset Using Above Functions

In [13]:
create_data("/kaggle/input/dental-radiography/test" , "/kaggle/working/Segmented Dental Radiographytest" ,"/kaggle/input/dental-radiography/test/_annotations.csv" )
create_data("/kaggle/input/dental-radiography/valid" , "/kaggle/working/Segmented Dental Radiography/valid" ,"/kaggle/input/dental-radiography/valid/_annotations.csv" )
create_data("/kaggle/input/dental-radiography/train" , "/kaggle/working/Segmented Dental Radiography/train" ,"/kaggle/input/dental-radiography/train/_annotations.csv" )
zip_folder("/kaggle/working/Segmented Dental Radiography")


