In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [1]:
import fiftyone as fo
import fiftyone.brain as fob
import fiftyone.zoo as foz
import os
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
from PIL import Image

def create_fiftyone_dataset_with_labels(image_directories):
    if "cups" in fo.list_datasets():
        fo.delete_dataset("cups")
    dataset = fo.Dataset("cups")
    
    samples = []
    for image_dir in image_directories:
        # Use the folder name as a temporary label
        label = os.path.basename(os.path.normpath(image_dir))
        image_paths = [
            os.path.join(image_dir, f)
            for f in os.listdir(image_dir)
            if f.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]
        for image_path in image_paths:
            # Option 1: Add a simple field
            sample = fo.Sample(filepath=image_path, temp_label=label)
            # Option 2: Use a FiftyOne Classification field:
            # sample["ground_truth"] = fo.Classification(label=label)
            samples.append(sample)
    dataset.add_samples(samples)
    return dataset

def create_fiftyone_dataset(image_directories):
    """
    Creates a FiftyOne dataset from images in multiple directories.

    Args:
        image_directories: A list of paths to the directories containing the images.

    Returns:
        A fiftyone.core.dataset.Dataset object.
    """

    #dataset = fo.Dataset("timmies")
    if "cups" in fo.list_datasets():
        fo.delete_dataset("cups")
    dataset = fo.Dataset("cups")
    
    samples = []
    for image_dir in image_directories:  # Iterate through the list of directories
        try:
            image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            for image_path in image_paths:
                sample = fo.Sample(filepath=image_path)
                samples.append(sample)
        except FileNotFoundError:
            print(f"Warning: Directory not found: {image_dir}")
        except NotADirectoryError:
            print(f"Warning: Not a directory: {image_dir}")
        except Exception as e:
            print(f"Warning: Error processing directory {image_dir}: {e}")


    dataset.add_samples(samples)
    return dataset

def generate_embeddings_with_zoo(
    dataset,
    #model_name="resnet50-imagenet-torch",
    #model_name="mobilenet-v2-imagenet-torch", 
    model_name = "clip-vit-base32-torch",
    batch_size=24,
    device="cuda" if torch.cuda.is_available() else "cpu",
    #save_path="resnet50_embeddings.pth",
    #save_path="mobilenet_embeddings.pth"
    save_path="clip_embeddings_victor.pth"

):
    # If embeddings have already been computed, load and return them
    if os.path.exists(save_path):
        print(f"Loading saved embeddings from {save_path}")
        embeddings = torch.load(save_path, weights_only = False)
        #embeddings = torch.load(save_path, map_location=device)
        return embeddings

    # Load pre-trained model from FiftyOne Zoo
    model = foz.load_zoo_model(model_name, device=device)
    
    # Compute embeddings (this call uses dataset.compute_embeddings under the hood)
    embeddings = dataset.compute_embeddings(
        model,
        batch_size=batch_size,
        device=device
    )

    # Save embeddings for future reuse
    torch.save(embeddings, save_path)
    print(f"Embeddings saved to {save_path}")
    return embeddings


if __name__ == "__main__":
    # Replace with your directory
    # image_directory = "/Users/yujieming/syde770_cups/all_in_one_v1"
    # image_directory = "/Users/yujieming/syde770_cups/jieming_images_v2 （crop）/all_cropped"
    image_directories = [
        "C:/Users/vsung/OneDrive - University of Waterloo/SYDE750/projectDVCStorage/tims",  # Replace with your directory
        "C:/Users/vsung/OneDrive - University of Waterloo/SYDE750/projectDVCStorage/not_tims", # add the second image directory here
        # Add more directories as needed
    ]

    dataset = create_fiftyone_dataset_with_labels(image_directories)
    print(f"Dataset created with {len(dataset)} samples.")

    # Generate embeddings using the FiftyOne Zoo model
    embeddings = generate_embeddings_with_zoo(dataset)
    print("Embeddings generated.")

   

 100% |███████████████| 9401/9401 [1.2s elapsed, 0s remaining, 7.8K samples/s]         
Dataset created with 9401 samples.


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
