# Tutorial


This tutorial runs you through the process of running inferences for a deployments in Costa Rica.

In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [None]:
# set the wd
os.chdir(os.path.expanduser('~/amber-inferences'))

In [None]:
# Install the package if required
%pip install -e .

In [None]:
from amber_inferences.utils.config import load_credentials
from amber_inferences.utils.api_utils import deployments_summary, get_deployments
from amber_inferences.utils.custom_models import *
from amber_inferences.utils.inference_scripts import *
from amber_inferences.utils.plotting import *
from amber_inferences.utils.tracking import *

import matplotlib.pyplot as plt
from IPython.display import display
from IPython.display import Markdown as md

import torch
import requests
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm

# Explore the Data on the Object Store

In [None]:
# Create an instance for the object store
aws_credentials = load_credentials('./credentials.json')
session = boto3.Session(
        aws_access_key_id=aws_credentials["AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=aws_credentials["AWS_SECRET_ACCESS_KEY"],
        region_name=aws_credentials["AWS_REGION"],
    )
s3_client = session.client("s3", endpoint_url=aws_credentials["AWS_URL_ENDPOINT"])

Look at the deployments available on the object store:

In [None]:
all_deployments = get_deployments(aws_credentials['UKCEH_username'], aws_credentials['UKCEH_password'])
all_deployments = pd.DataFrame(all_deployments)
all_deployments[all_deployments['status'] == 'active']

In [None]:
print("All countries:")
for x in all_deployments['country'].unique():
    print(f"- {x}")

Let's pick one, cri (Costa Rica) and check out the data attached. 

In [None]:
cr_deployments = deployments_summary(
    aws_credentials,
    subset_countries=["Costa Rica"],
    subset_deployments=["dep000035", "dep000036"],
    include_image_count=False
)

To get the files for a given deployment(s):

In [None]:
cr_deployments

# Log the image keys

In [None]:
from amber_inferences.utils.key_utils import save_keys

In [None]:
# takes some time to commenting out to save time
# save_keys(
#     s3_client,
#     bucket="cri",
#     deployment_id="dep000035",
#     output_file="./examples/example_keys/dep000035_keys.json",
#     subdir="snapshot_images"
# )

In [None]:
# Look at the keys
!head ./examples/example_keys/interesting_timelapse.json

# Download and View the Images

In [None]:
import json
from amber_inferences.utils.inference_scripts import download_image_from_key

In [None]:
# read the first image in the keys file and open
with open('./examples/example_keys/interesting_timelapse.json') as f:
    keys = json.load(f)

In [None]:
os.makedirs('./examples/images/dep000035/interesting_timelapse/raw/', exist_ok=True)

for i in range(len(keys)):
    download_image_from_key(s3_client, keys[i], 'cri', './examples/images/dep000035/interesting_timelapse/raw/')

In [None]:
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x) for x in image_paths]

image_paths = [x for x in image_paths if x.endswith('.jpg')]

In [None]:
# Open images and convert to a sequence
images = [Image.open(img) for img in image_paths]

# Save as GIF
os.makedirs('./examples/images/dep000035/interesting_timelapse/gifs', exist_ok=True)
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/raw_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

del images

In [None]:
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Perform Object Detection on the Images

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
# set the torch device
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.tensor([1.0], device=device)
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

In [None]:
models_load = load_models(
    device=device,
    localisation_model_path='./models/v1_localizmodel_2021-08-17-12-06.pt',
    binary_model_path='./models/moth-nonmoth-effv2b3_20220506_061527_30.pth',
    order_model_path='./models/dhc_best_128.pth',
    order_threshold_path='./models/thresholdsTestTrain.csv',
    species_model_path='./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt',
    species_labels='./models/03_costarica_data_category_map.json'
)

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

In [None]:
all_crops_loc = []

os.makedirs('./examples/images/crops/interesting_timelapse/', exist_ok=True)

for i, img_path in enumerate(tqdm(image_paths)):
    crops = crop_image_only(
        image_path=img_path,
        bucket_name="cri",
        localisation_model=models_load['localisation_model'],
        proc_device=device,
        csv_file="./examples/interesting_timelapse_crops.csv",
        save_crops=True,
        box_threshold=0.95,
        crop_dir="./examples/images/crops/interesting_timelapse",
        job_name=None,
    )

    boxes = []
    all_crops_loc.append(crops)
    image = Image.open(img_path).convert("RGB")

    for j, row in crops.iterrows():
        boxes.append({
            'x_min': row['x_min'],
            'y_min': row['y_min'],
            'x_max': row['x_max'],
            'y_max': row['y_max'],
            'label': row['crop_status'],
            'ann_col': 'grey'
        })

        # Crop original image and extract embedding
        crop = image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))

    del crops
    img = image_annotation(img_path, boxes=boxes)

    # save the image
    img.save(f'{output_dir}/{os.path.basename(img_path)}')

In [None]:
all_crops_loc = pd.concat(all_crops_loc)
all_crops_loc = all_crops_loc.reset_index(drop=True)

Save to gif

In [None]:
import os
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/object_detection_images.gif"

gif_creater(output_dir, gif_path)

# show the gif
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Flatbug

⚠️ This section is only advised if cuda is available

In [None]:
models = load_models(
    device=device,
    localisation_model_path='./models/flat_bug_M.pt',
    binary_model_path='./models/moth-nonmoth-effv2b3_20220506_061527_30.pth',
    order_model_path='./models/dhc_best_128.pth',
    order_threshold_path='./models/thresholdsTestTrain.csv',
    species_model_path='./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt',
    species_labels='./models/03_costarica_data_category_map.json'
)

In [None]:
torch.cuda.is_available()

In [None]:
if torch.cuda.is_available():
    output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_flatbug/'
    os.makedirs(output_dir, exist_ok=True)
    files = os.listdir(output_dir)
    if len(files) > 0:
        for f in files:
            os.remove(os.path.join(output_dir, f))

In [None]:
if torch.cuda.is_available():
    os.makedirs('./examples/images/crops/interesting_timelapse_flatbug/', exist_ok=True)

    all_crops_flatbug = []

    for i, img_path in enumerate(tqdm(image_paths)):
        crops = crop_image_only(
            image_path=img_path,
            bucket_name="cri",
            localisation_model=models['localisation_model'],
            proc_device=device,
            csv_file="./examples/interesting_timelapse_flatbug.csv",
            save_crops=True,
            box_threshold=0,
            crop_dir="./examples/images/crops/interesting_timelapse_flatbug",
            job_name=None,
        )
        crops = crops.loc[crops['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

        all_crops_flatbug = all_crops_flatbug + [crops]
        if crops.shape[0] > 0:
            boxes = []
            for j, row in crops.iterrows():
                boxes.append({
                    'x_min': row['x_min'],
                    'y_min': row['y_min'],
                    'x_max': row['x_max'],
                    'y_max': row['y_max'],
                    'label': '',
                    'ann_col': 'grey'
                })
            del crops
            img = image_annotation(img_path, boxes=boxes, scale=False)

            # save the image
            img.save(f'{output_dir}/{os.path.basename(img_path)}')

In [None]:
if torch.cuda.is_available():
    # Open images and convert to a sequence
    image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/annotated_boxes_flatbug/')
    image_paths = [os.path.join(output_dir, x) for x in image_paths]
    images = [Image.open(img) for img in image_paths]

    # Save as GIF
    gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/flatbug_detection_images.gif"
    images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

    # display(Image.open(gif_path))
    del images

    all_crops_flatbug = pd.concat(all_crops_flatbug)
    all_crops_flatbug = all_crops_flatbug.reset_index(drop=True)

In [None]:
if torch.cuda.is_available():
    md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Objects for Inferences

In [None]:
# Choose which crops to use (localisation or flatbug)
all_crops = all_crops_flatbug # or all_crops_loc

transform_species = transforms.Compose(
        [
            transforms.Resize((300, 300)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        ]
    )

image_paths_raw = image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths_raw = [os.path.abspath(os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x)) for x in image_paths_raw]

# Binary Classifier

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_binary'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

In [None]:
for i, image_path in enumerate(tqdm(image_paths_raw)):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            ann_col='red'
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(device)
            binary_prediction = classify_box(cropped_tensor, models_load['classification_model'])
            if binary_prediction[0] == 'moth':
                ann_col='green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': binary_prediction[0],
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'{output_dir}/{os.path.basename(image_path)}')

In [None]:
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/binary_images.gif"

gif_creater(output_dir, gif_path)

# show the gif
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Order Classifier

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_order/'

os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

imgs = []
for i, image_path in enumerate(tqdm(image_paths_raw)):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            ann_col = 'red'
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(device)
            order_prediction = classify_order(
                cropped_tensor,
                models_load['order_model'],
                models_load['order_model_labels'],
                models_load['order_model_thresholds']
            )
            if 'Lepidoptera' in order_prediction[0]:
                ann_col = 'green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': order_prediction[0],
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'{output_dir}/{os.path.basename(image_path)}')

        imgs = imgs + [im]


In [None]:
# Open images and convert to a sequence
image_paths = os.listdir(output_dir)
image_paths = [os.path.join(output_dir, x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/order_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

del images

In [None]:
md("![orderGif](" + os.path.abspath(gif_path) + " 'order')")

# Species Classifier

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_species/'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

imgs = []
all_embeddings = {}
for i, image_path in enumerate(tqdm(image_paths_raw)):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    all_embeddings[image_path] = {}

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(device)
            order_prediction = classify_order(
                cropped_tensor,
                models_load['order_model'],
                models_load['order_model_labels'],
                models_load['order_model_thresholds']
            )
            label = ""
            ann_col = 'red'

            if 'Lepidoptera' in order_prediction[0]:
                species_names, species_confidences, embeddings = classify_species(
                    cropped_tensor,
                    models_load['species_model'],
                    models_load['species_model_labels'],
                    5
                )
                label = f"{species_names[0]}, {'{:.2f}'.format(species_confidences[0]*100)}%"
                ann_col='green'
                all_embeddings[image_path][f'crop_{j}'] = {
                    'embedding': embeddings,
                    'file': os.path.basename(image_path),
                    'crop': f'crop_{j+1}',
                    'box': {'xmin':row['x_min'], 'ymin':row['y_min'], 'xmax':row['x_max'], 'ymax':row['y_max']}
                }

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': label,
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'{output_dir}/{os.path.basename(image_path)}')

In [None]:
# Open images and convert to a sequence
image_paths = os.listdir(output_dir)
image_paths = [os.path.join(output_dir, x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/species_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

del images

In [None]:
md("![speciesGif](" + os.path.abspath(gif_path) + " 'species')")

# Tracking

Next we want to be able to track individual insects across frames. This is done by using the tracking model. The tracking model takes in a list of detections and returns a list of tracks. Each track is a list of detections that belong to the same insect.

A track is defined by the IoU, distance between crops, similarity in features, and area. So we start by taking the embeddings from the species classifier. 

In [None]:
all_embeddings.keys()

In [None]:
# # create a list of embeddings
# embeddings_list = {}

# for i in all_crops['image_path'].unique():
#     embeddings_list[i] = {}

# for i, crops in tqdm(all_crops.iterrows()):
#     img_path = crops['image_path']
#     image = Image.open(img_path).convert("RGB")
#     cropped_image = image.crop((crops['x_min'], crops['y_min'], crops['x_max'], crops['y_max']))

#     embedding = extract_embedding(cropped_image, models_load['species_model'], device=device)
#     embeddings_list[img_path][crops['crop_status']] = {
#         'embedding': embedding,
#         'file': os.path.basename(crops['image_path']),
#         'crop': crops['crop_status'],
#         'crop_image': cropped_image,
#         'box': {'xmin':crops['x_min'], 'ymin':crops['y_min'], 'xmax':crops['x_max'], 'ymax':crops['y_max']}
#     }

In [None]:
from amber_inferences.utils.tracking import *
from itertools import product

In [None]:
# for each image, get the combinations between those crops and those of the next image
all_crop_pairs = []
image_paths = list(all_embeddings.keys())

for i in range(len(image_paths) - 1):
    img1 = image_paths[i]
    img2 = image_paths[i + 1]

    crops1 = all_embeddings[img1]
    crops2 = all_embeddings[img2]

    for c1, c2 in product(crops1, crops2):
        all_crop_pairs.append((img1, c1, img2, c2))


In [None]:
all_crop_pairs

In [None]:
# calculate the similarity between crops in subsequent images
results = []

for image_a, crop_a, image_b, crop_b in tqdm(all_crop_pairs):
    c_a = all_embeddings[image_a][crop_a]
    c_a['image_path'] = image_a
    c_b = all_embeddings[image_b][crop_b]
    c_b['image_path'] = image_b

    res = calculate_cost(c_a, c_b)
    results.append(res)

In [None]:
columns = [
    "image_path1",
    "crop1_id",
    "image_path2",
    "crop2_id",
    "cnn_cost",
    "iou_cost",
    "box_ratio_cost",
    "dist_ratio_cost",
    "total_cost"
]

results_df = pd.DataFrame(results).reset_index(drop=True)
results_df.columns = columns
results_df.head()

In [None]:
# plot a histogram of the costs
plt.figure(figsize=(5, 3))
plt.hist(results_df['total_cost'], bins=50, color='blue', alpha=0.7)
plt.xlabel('Total Cost')
plt.ylabel('Frequency')
plt.title('Histogram of Total Costs')

plt.annotate('<- increasing similarity', xy=(0, 0.9), xycoords='axes fraction',
             fontsize=8, color='black', ha='left')
plt.annotate('decreasing similarity ->', xy=(1, 0.9), xycoords='axes fraction',
fontsize=8, color='black', ha='right')

plt.grid()
plt.show()

Next we define the tracks. A track is the series of crops we consider to belong to one individual. This is based on a cost threshold. 

In [None]:
# find the best match for crops from last image
best_matches = find_best_matches(results_df)
best_matches

In [None]:
tracks_df = track_id_calc(best_matches, cost_threshold=1)
tracks_df

In [None]:
import matplotlib.colors as mcolors

# Generate N unique colors for each track
num_tracks = tracks_df['track_id'].nunique()

# Use a colormap to get visually distinct colors
cmap = plt.cm.get_cmap('hsv', num_tracks)  # tab20

# Map track_id to hex colors
track_id_to_color = {
    track_id: mcolors.to_hex(cmap(i)) for i, track_id in enumerate(sorted(tracks_df['track_id'].unique()))
}

# Add color column to DataFrame
tracks_df['colour'] = tracks_df['track_id'].map(track_id_to_color)

In [None]:
all_crops_merge = all_crops.merge(tracks_df, how='left', left_on=['image_path', 'crop_status'], right_on=['image_path', 'crop_id'])
all_crops_merge = all_crops_merge.reset_index(drop=True)
all_crops_merge.sort_values('track_id')

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_tracking/'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

In [None]:
imgs = []

# drop unique track_ids
all_crops_merge_subset = all_crops_merge.copy()
track_counts = all_crops_merge_subset["track_id"].value_counts()
valid_tracks = track_counts[track_counts > 1].index
all_crops_merge_subset = all_crops_merge_subset[all_crops_merge_subset["track_id"].isin(valid_tracks)].reset_index(drop=True)

for i, image_path in enumerate(image_paths_raw):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops_merge_subset.loc[all_crops_merge_subset['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]
    crops_df = crops_df.loc[crops_df['track_id'].notna(),]

    boxes = []
    if crops_df.shape[0] > 0:
        for j, row in crops_df.iterrows():

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': row['track_id'],
                'ann_col': row['colour']
            })

    im = image_annotation(image_path, boxes=boxes, scale=False)
    out_path = f'{output_dir}/{os.path.basename(image_path)}'
    im.save(out_path)

In [None]:
# Open images and convert to a sequence
image_paths = os.listdir(output_dir)
image_paths = [os.path.join(output_dir, x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/tracking_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

del images

In [None]:
md("![trackingGif](" + os.path.abspath(gif_path) + " 'tracking')")