## NB

Only install fiftyone **after** the devcontainer was created.

This ensures that the required mongodb gets created

In [None]:
# erase folder ˜/fiftyone/open-images-v7
import os
os.system('rm -rf ~/fiftyone/open-images-v7')




In [None]:
import os
import fiftyone as fo
import fiftyone.zoo as foz

TargetLabel = "Coffee cup"
TargetImages = 50

try:
    # Load the dataset
    foz.load_zoo_dataset(
        "open-images-v7",
        split="validation",
        label_types=["detections"],
        classes=[TargetLabel],
        max_samples=TargetImages
    )
except Exception as e:
    # if exception is realted to mongo db, it is ok to proceed
    if 'MongoDB' in str(e):
        pass
    else:
        raise e

In [None]:
# delete all content of ./data folder
os.system('rm -rf ./data')
# images are written into ~/fiftyone/open-images-v7/validation . copy them to ./data
os.system('cp -r ~/fiftyone/open-images-v7/validation ./data')
os.system('ls ./data')


In [None]:
# using glob read the names of all files under data/data

import glob
files = glob.glob('./data/data/*')

# create a list called files_names with the names of the files, exlucluding the path and the extension
# hint: use os.path.basename and os.path.splitext
file_names = [os.path.splitext(os.path.basename(f))[0] for f in files]
file_names

In [None]:
import pandas as pd

# read data/metadata/classes.csv into dataframe classes - classes does not have headers. first column is called LabelName, second is called LabelDisplayName
classes = pd.read_csv('./data/metadata/classes.csv', header=None, names=['LabelName', 'LabelDisplayName'])

# filter classes to only include the class where LabelName is in df.LabelName
classes = classes[classes['LabelDisplayName'] == TargetLabel]

classes.head()

In [None]:
# read ./data/labels/detections.csv into a pandas dataframe
import pandas as pd

detect = pd.read_csv('./data/labels/detections.csv')

# keep only rows where LabelName is in classes.LabelName and ImageID is in file_names
detect = detect[detect['LabelName'].isin(classes['LabelName']) & detect['ImageID'].isin(file_names)]
detect


In [None]:
# add two columns: one with the class name and one with the full file path

# merge detect with classes on LabelName
detect = pd.merge(detect, classes, on='LabelName')

# add a column Class with the value in LabelDisplayName
detect['Class'] = detect['LabelDisplayName']

# add a column Path with the value './data/data/' + ImageID + '.jpg'
detect['Path'] = './data/data/' + detect['ImageID'] + '.jpg'

detect

In [None]:
# keep only Path, Class, XMin	XMax	YMin	YMax
detect = detect[['Path', 'Class', 'XMin', 'XMax', 'YMin', 'YMax']]
detect

In [None]:
from collections import defaultdict

# Create a dictionary to hold the bounding boxes for each image
image_bboxes = defaultdict(list)

# Iterate over the rows in the detect dataframe
for _, row in detect.iterrows():
    image_path = row['Path']
    bbox = (row['XMin'], row['XMax'], row['YMin'], row['YMax'])
    image_bboxes[image_path].append(bbox)

# Convert the dictionary to a list of tuples (image_path, bboxes)
merged_bboxes = [(image_path, bboxes) for image_path, bboxes in image_bboxes.items()]
merged_bboxes

In [None]:
# Convert merged_bboxes to a DataFrame
merged_bboxes_df = pd.DataFrame(merged_bboxes, columns=['Path', 'BBoxes'])

# Save the DataFrame to detections.csv
merged_bboxes_df.to_csv('./data/detections.csv', index=False)

In [None]:
from PIL import Image

import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Function to plot image with bounding boxes
def plot_image_with_bboxes(image_path, bboxes):
    # Open the image
    img = Image.open(image_path)
    fig, ax = plt.subplots(1)
    ax.imshow(img)

    # Get image dimensions
    width, height = img.size

    # Plot each bounding box
    for bbox in bboxes:
        xmin, xmax, ymin, ymax = bbox
        # Convert from percent to pixels
        xmin *= width
        xmax *= width
        ymin *= height
        ymax *= height
        # Create a rectangle patch
        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1, edgecolor='r', facecolor='none')
        # Add the patch to the Axes
        ax.add_patch(rect)

    plt.show()

# Plot all images with bounding boxes
for image_path, bboxes in merged_bboxes[:5]:
    plot_image_with_bboxes(image_path, bboxes)

In [None]:
%pip install azure-cognitiveservices-vision-customvision
%pip install python-dotenv
%pip install msrest

In [None]:
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from msrest.authentication import ApiKeyCredentials
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Retrieve keys and endpoint from environment variables
training_key = os.getenv('TRAINING_KEY')
prediction_key = os.getenv('PREDICTION_KEY')
endpoint = os.getenv('ENDPOINT')
project_id = os.getenv('PROJECT_ID')
publish_iteration_name = os.getenv('PUBLISH_ITERATION_NAME')

# Authenticate with the training and prediction clients
credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
trainer = CustomVisionTrainingClient(endpoint, credentials)


In [None]:
# Create a dictionary to hold the tag IDs for each class
tag_ids = {}

# Get the list of existing tags
existing_tags = {tag.name: tag.id for tag in trainer.get_tags(project_id)}

# Iterate over the classes dataframe and create a tag for each class
for _, row in classes.iterrows():
    tag_name = row['LabelDisplayName']
    if tag_name in existing_tags:
        tag_ids[tag_name] = existing_tags[tag_name]
    else:
        tag = trainer.create_tag(project_id, tag_name)
        tag_ids[tag_name] = tag.id

tag_ids

In [None]:
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, ImageFileCreateBatch, Region

# Add images to the project
tagged_images_with_regions = []

for image_path, bboxes in merged_bboxes:
    regions = []
    for bbox in bboxes:
        xmin, xmax, ymin, ymax = bbox
        regions.append(Region(tag_id=tag_ids[TargetLabel], left=xmin, top=ymin, width=xmax - xmin, height=ymax - ymin))
    
    with open(image_path, "rb") as image_contents:
        tagged_images_with_regions.append(ImageFileCreateEntry(name=os.path.basename(image_path), contents=image_contents.read(), regions=regions))

upload_result = trainer.create_images_from_files(project_id, ImageFileCreateBatch(images=tagged_images_with_regions))
if not upload_result.is_batch_successful:
    print("Image batch upload failed.")
    for image in upload_result.images:
        print("Image status: ", image.status)
    exit(-1)