# Tutorial


This tutorial runs you through the process of running inferences for a deployments in Costa Rica.

In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"


In [None]:
# set the wd
os.chdir(os.path.expanduser('~/amber-inferences'))

In [None]:
# Install the package if required
%pip install -e .

In [None]:
from amber_inferences.utils.config import load_credentials
from amber_inferences.utils.api_utils import get_deployments
from amber_inferences.utils.deployment_summary import deployment_data
from amber_inferences.utils.custom_models import *
from amber_inferences.utils.inference_scripts import *
from amber_inferences.utils.plotting import *
from amber_inferences.utils.tracking import *


# import matplotlib.pyplot as plt
from IPython.display import display
from IPython.display import Markdown as md

import torch
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

# Explore the Data on the Object Store

In [None]:
# Create an instance for the object store
aws_credentials = load_credentials('./credentials.json')
session = boto3.Session(
        aws_access_key_id=aws_credentials["AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=aws_credentials["AWS_SECRET_ACCESS_KEY"],
        region_name=aws_credentials["AWS_REGION"],
    )
s3_client = session.client("s3", endpoint_url=aws_credentials["AWS_URL_ENDPOINT"])

Look at the deployments available on the object store:

In [None]:
all_deployments = get_deployments(aws_credentials['UKCEH_username'], aws_credentials['UKCEH_password'])
all_deployments = pd.DataFrame(all_deployments)
all_deployments[all_deployments['status'] == 'active']

In [None]:
def get_deployment_names(username, password, bucket):
    response = get_deployments(username, password)
    response = [x for x in response if x["country_code"].lower() == bucket]

    # create a list of deployment names
    deployment_names = [x['deployment_id'] for x in response]
    return deployment_names

get_deployment_names(aws_credentials['UKCEH_username'], aws_credentials['UKCEH_password'], 'cri')

In [None]:
print("All countries:")
for x in all_deployments['country'].unique():
    print(f"- {x}")

Let's pick one, cri (Costa Rica) and check out the data attached. 

In [None]:
cr_deployments = deployment_data(
    aws_credentials,
    subset_countries=["cri"],
    subset_deployments=["dep000035", "dep000036"],
    include_file_count=False
)

To get the files for a given deployment(s):

In [None]:
cr_deployments

# Log the image keys

In [None]:
from amber_inferences.utils.key_utils import save_keys

In [None]:
# takes some time to commenting out to save time
save_keys(
    s3_client,
    bucket="cri",
    deployment_id="dep000035",
    output_file="./examples/example_keys/dep000035_keys.json",
    subdir="snapshot_images"
)

Let's create a subset which just looks at 10 minutes from one night:

In [None]:
with open("./examples/example_keys/dep000035_keys.json", "r") as f:
    dep000035_keys = json.load(f)

dep000035_keys = {list(dep000035_keys.keys())[0]: dep000035_keys[list(dep000035_keys.keys())[0]] }
dep000035_keys[list(dep000035_keys.keys())[0]] = [x for x in dep000035_keys[list(dep000035_keys.keys())[0]] if "20240430004" in x][0:10]

# save to file
with open("./examples/example_keys/interesting_timelapse.json", "w") as f:
    json.dump(dep000035_keys, f, indent=4)

In [None]:
# Look at the keys
!head ./examples/example_keys/interesting_timelapse.json

# Download and View the Images

In [None]:
import json
from amber_inferences.utils.inference_scripts import download_image_from_key

In [None]:
# read the first image in the keys file and open
with open('./examples/example_keys/interesting_timelapse.json') as f:
    keys = json.load(f)

keys = keys[list(keys.keys())[0]]

In [None]:
os.makedirs('./examples/images/dep000035/interesting_timelapse/raw/', exist_ok=True)

for i in range(len(keys)):
    download_image_from_key(s3_client, keys[i], 'cri', './examples/images/dep000035/interesting_timelapse/raw/')

In [None]:
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x) for x in image_paths]

image_paths = [x for x in image_paths if x.endswith('.jpg')]

In [None]:
# Open images and convert to a sequence
images = [Image.open(img) for img in image_paths]

# Save as GIF
os.makedirs('./examples/images/dep000035/interesting_timelapse/gifs', exist_ok=True)
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/raw_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

del images

In [None]:
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Perform Object Detection on the Images

In [None]:
# set the torch device
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.tensor([1.0], device=device)
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

In [None]:
dep_data=deployment_data(
    aws_credentials,
    subset_countries=["cri"],
    subset_deployments=["dep000035"],
    include_file_count=False
)["dep000035"]

In [None]:
models_load = load_models(
    device=device,
    localisation_model_path='./models/v1_localizmodel_2021-08-17-12-06.pt',
    binary_model_path='./models/moth-nonmoth-effv2b3_20220506_061527_30.pth',
    order_model_path='./models/dhc_best_128.pth',
    order_threshold_path='./models/thresholdsTestTrain.csv',
    species_model_path='./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt',
    species_labels='./models/03_costarica_data_category_map.json'
)

In [None]:
# remove old runs
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

In [None]:
deployment_id = "dep000035"
dep_data = deployment_data(
    aws_credentials,
    subset_countries=["cri"],
    subset_deployments=[deployment_id],
    include_file_count=False
)[deployment_id]

In [None]:
all_crops_loc = []

for i, img_path in enumerate(tqdm(image_paths)):
    crops = crop_image_only(
        image_path=img_path,
        dep_data=dep_data,
        localisation_model=models_load['localisation_model'],
        proc_device=device,
        csv_file="./examples/interesting_timelapse_crops.csv",
        save_crops=True,
        box_threshold=0.95,
        crop_dir="./examples/images/crops/interesting_timelapse",
        job_name=None,
    )

    boxes = []
    all_crops_loc.append(crops)
    image = Image.open(img_path).convert("RGB")

    for j, row in crops.iterrows():
        boxes.append({
            'x_min': row['x_min'],
            'y_min': row['y_min'],
            'x_max': row['x_max'],
            'y_max': row['y_max'],
            'label': row['crop_status'],
            'ann_col': 'grey'
        })

        # Crop original image and extract embedding
        crop = image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))

    del crops
    img = image_annotation(img_path, boxes=boxes)

    # save the image
    img.save(f'{output_dir}/{os.path.basename(img_path)}')

In [None]:
all_crops_loc = pd.concat(all_crops_loc)
all_crops_loc = all_crops_loc.reset_index(drop=True)

Save to gif

In [None]:
import os
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/object_detection_images.gif"

gif_creater(output_dir, gif_path)

# show the gif
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Flatbug

⚠️ This section is only advised if cuda is available

In [None]:
torch.cuda.is_available()

In [None]:
if torch.cuda.is_available():
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    device = torch.device("cuda:0")
    models_load = load_models(
        device=device,
        localisation_model_path='./models/flat_bug_M.pt',
        binary_model_path='./models/moth-nonmoth-effv2b3_20220506_061527_30.pth',
        order_model_path='./models/dhc_best_128.pth',
        order_threshold_path='./models/thresholdsTestTrain.csv',
        species_model_path='./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt',
        species_labels='./models/03_costarica_data_category_map.json'
    )

In [None]:
if torch.cuda.is_available():
    output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_flatbug/'
    os.makedirs(output_dir, exist_ok=True)
    files = os.listdir(output_dir)
    if len(files) > 0:
        for f in files:
            os.remove(os.path.join(output_dir, f))

In [None]:
dep_data=deployment_data(
    aws_credentials,
    subset_countries=["cri"],
    subset_deployments=["dep000035"],
    include_file_count=False
)["dep000035"]

In [None]:
img_path  =image_paths[0]

image_path=img_path
dep_data=dep_data
localisation_model=models_load['localisation_model']
proc_device=device
csv_file="./examples/interesting_timelapse_flatbug.csv"
save_crops=True
box_threshold=0
crop_dir="./examples/images/crops/interesting_timelapse_flatbug"
job_name=None

image_path = Path(image_path)



image = Image.open(image_path).convert("RGB")


original_image = image.copy()
original_width, original_height = image.size


In [None]:
def flatbug(image_path, flatbug_model, save_annotation=False):
    output = flatbug_model(str(image_path))

    # Save a visualisation of the predictions
    if len(output.json_data["boxes"]) > 0 and save_annotation:
        print(f"Saving annotated image: {image_path}")
        output.plot(
            outpath=f"{os.path.dirname(image_path)}/flatbug/flatbug_{os.path.basename(image_path)}"
        )

    # rename the confs item as scores
    crop_info = output.json_data
    crop_info["scores"] = crop_info.pop("confs")
    crop_info["labels"] = crop_info.pop("classes")

    return crop_info

In [None]:
localisation_outputs = flatbug(image_path, localisation_model)

In [None]:
# flatbug_model = localisation_model
localisation_outputs, box_coords = get_boxes(
        localisation_model,
        image,
        image_path,
        original_width,
        original_height,
        torch.device("cuda:0"),
)

In [None]:
if torch.cuda.is_available():
    os.makedirs('./examples/images/crops/interesting_timelapse_flatbug/', exist_ok=True)

    all_crops_flatbug = []

    for i, img_path in enumerate(tqdm(image_paths)):
        crops = crop_image_only(
            image_path=img_path,
            dep_data=dep_data,
            localisation_model=models_load['localisation_model'],
            proc_device=device,
            csv_file="./examples/interesting_timelapse_flatbug.csv",
            save_crops=True,
            box_threshold=0,
            crop_dir="./examples/images/crops/interesting_timelapse_flatbug",
            job_name=None,
        )
        crops = crops.loc[crops['crop_status'] != 'No detections for image.',]

        all_crops_flatbug = all_crops_flatbug + [crops]
        if crops.shape[0] > 0:
            boxes = []
            for j, row in crops.iterrows():
                boxes.append({
                    'x_min': row['x_min'],
                    'y_min': row['y_min'],
                    'x_max': row['x_max'],
                    'y_max': row['y_max'],
                    'label': '',
                    'ann_col': 'grey'
                })
            del crops
            img = image_annotation(img_path, boxes=boxes, scale=False)

            # save the image
            img.save(f'{output_dir}/{os.path.basename(img_path)}')

In [None]:
all_crops_flatbug = pd.concat(all_crops_flatbug)
all_crops_flatbug = all_crops_flatbug.reset_index(drop=True)

In [None]:
if torch.cuda.is_available():
    import os
    gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/flatbug_detection_images.gif"

    gif_creater(output_dir, gif_path)


In [None]:
if torch.cuda.is_available():
    md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Objects for Inferences

In [None]:
# Choose which crops to use (localisation or flatbug)
all_crops = all_crops_flatbug # or all_crops_loc

transform_species = transforms.Compose(
        [
            transforms.Resize((300, 300)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        ]
    )

all_crops = all_crops.reset_index(drop=True)

all_crops['image_path'] = all_crops['image_path'].apply(lambda x: os.path.abspath(x))
all_crops['base_image_path'] = all_crops['image_path'].apply(lambda x: os.path.basename(x))

image_paths_raw = image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths_raw = [os.path.abspath(os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x)) for x in image_paths_raw]

# Binary Classifier

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_binary'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

In [None]:
image_paths

In [None]:
all_crops

In [None]:
for i, image_path in enumerate(tqdm(image_paths_raw)):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'].str.contains(image_path, na=False), ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'No detections for image.',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            ann_col='red'
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(device)
            binary_prediction = classify_box(cropped_tensor, models_load['classification_model'])
            if binary_prediction[0] == 'moth':
                ann_col='green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': binary_prediction[0],
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'{output_dir}/{os.path.basename(image_path)}')

In [None]:
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/binary_images.gif"

gif_creater(output_dir, gif_path)

# show the gif
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Order Classifier

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_order/'

os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

imgs = []
for i, image_path in enumerate(tqdm(image_paths_raw)):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            ann_col = 'red'
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(device)
            order_prediction = classify_order(
                cropped_tensor,
                models_load['order_model'],
                models_load['order_model_labels'],
                models_load['order_model_thresholds']
            )
            if 'Lepidoptera' in order_prediction[0]:
                ann_col = 'green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': order_prediction[0],
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'{output_dir}/{os.path.basename(image_path)}')

        imgs = imgs + [im]


In [None]:
# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/order_images.gif"
gif_creater(output_dir, gif_path)

# show the gif
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Species Classifier

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_species/'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

In [None]:
imgs = []
all_embeddings = {}
for i, image_path in enumerate(tqdm(image_paths_raw)):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'No detections for image',]

    all_embeddings[image_path] = {}

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(device)
            order_prediction = classify_order(
                cropped_tensor,
                models_load['order_model'],
                models_load['order_model_labels'],
                models_load['order_model_thresholds']
            )
            label = ""
            ann_col = 'red'

            if 'Lepidoptera' in order_prediction[0]:
                species_names, species_confidences, embeddings = classify_species(
                    cropped_tensor,
                    models_load['species_model'],
                    models_load['species_model_labels'],
                    5
                )
                label = f"{species_names[0]}, {'{:.2f}'.format(species_confidences[0]*100)}%"
                ann_col='green'
                all_embeddings[image_path][row['crop_status']] = {
                    'embedding': embeddings,
                    'image_path': os.path.basename(image_path),
                    'crop': row['crop_status'],
                    'box': {'xmin':row['x_min'], 'ymin':row['y_min'], 'xmax':row['x_max'], 'ymax':row['y_max']},
                    'image_size': [original_width, original_height]
                }

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': label,
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'{output_dir}/{os.path.basename(image_path)}')

In [None]:
# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/species_images.gif"
gif_creater(output_dir, gif_path)

# show the gif
md("![mothGif](" + os.path.abspath(gif_path) + " 'moth')")

# Tracking

Next we want to be able to track individual insects across frames. This is done by using the tracking model. The tracking model takes in a list of detections and returns a list of tracks. Each track is a list of detections that belong to the same insect.

A track is defined by the IoU, distance between crops, similarity in features, and area. So we start by taking the embeddings from the species classifier. 

In [None]:
from amber_inferences.utils.tracking import calculate_cost, find_best_matches
from itertools import product

In [None]:
# Track insects across consecutive frames using embeddings, and get the similarity scores, to define the best matches between crops
track_results = {}
image_paths_sorted = sorted(all_embeddings.keys())

for idx in range(1, len(image_paths_sorted)):
    prev_img = image_paths_sorted[idx - 1]
    curr_img = image_paths_sorted[idx]
    prev_embeds = all_embeddings[prev_img]
    curr_embeds = all_embeddings[curr_img]
    for crop_status, curr_data in curr_embeds.items():
        # Compare each crop in current image to all crops in previous image
        similarities = []
        for prev_status, prev_data in prev_embeds.items():
            # Calculate cost (distance, iou, etc.) between embeddings
            cost_df = calculate_cost(prev_data, curr_data)
            similarities.append(cost_df)
        if similarities:
            # Concatenate all cost DataFrames and find best match
            all_costs = pd.concat(similarities, ignore_index=True)
            best_match = find_best_matches(all_costs)
            track_results[(curr_img, crop_status)] = best_match
        else:
            # No previous crops to match
            track_results[(curr_img, crop_status)] = pd.DataFrame({
                'previous_image': [None],
                'best_match_crop': ['No crops from previous image. Tracking not possible.'],
                'cnn_cost': [''],
                'iou_cost': [''],
                'box_ratio_cost': [''],
                'dist_ratio_cost': [''],
                'total_cost': ['']
            })


In [None]:
track_results_df = pd.concat(track_results).reset_index()
track_results_df.rename(columns={'level_0': 'current_image', 'level_1': 'current_crop'}, inplace=True)
track_results_df.drop(columns=['level_2'], inplace=True)
track_results_df['crop_no'] = track_results_df['current_crop'].apply(lambda x: int(x.split('_')[-1]))

track_results_df = track_results_df.sort_values(by=['current_image', 'crop_no']).reset_index(drop=True)

track_results_df.head()

In [None]:
# merge the tracking results with the crops dataframe
all_crops_tracking = all_crops.merge(
    track_results_df,
    left_on=['base_image_path', 'crop_status'],
    right_on=['image_path', 'current_crop'],
    suffixes=('', '_tracking'),
    how='left'
)
all_crops_tracking = all_crops_tracking.loc[:, ~all_crops_tracking.columns.str.endswith('_tracking')]

In [None]:
track_df = track_id_calc(all_crops_tracking, cost_threshold=1)
print(
    f"Number of tracks: {track_df['track_id'].nunique()}"
)

all_crops_tracked = all_crops_tracking.merge(
    track_df,
    how="left",
    left_on=["base_image_path", "crop_status"],
    right_on=["image_path", "crop_id"],
    suffixes=('', '_y')
)

all_crops_tracked = all_crops_tracked.reset_index(drop=True)
all_crops_tracked = all_crops_tracked.loc[:, ~all_crops_tracked.columns.str.contains("_y")]

all_crops_tracked.loc[
    all_crops_tracked['image_path'] == all_crops_tracked['image_path'].values[0],
    ['previous_image', 'best_match_crop']
] = 'first frame'


all_crops_tracked.head(10)

Where track_id is NaN the crop is not a moth, or the image does not have any crops. 

In [None]:
# check there are no duplicates in the crops
duplicates = all_crops_tracked[all_crops_tracked.duplicated(subset=['base_image_path', 'crop_status'], keep=False)]
duplicates = duplicates.sort_values(by=['base_image_path', 'crop_status']).reset_index(drop=True)

if not duplicates.empty:
    print(f"Found {len(duplicates)} duplicates in the crops:")
    display(duplicates)

In [None]:
output_dir = './examples/images/dep000035/interesting_timelapse/annotated_boxes_tracking/'
os.makedirs(output_dir, exist_ok=True)
files = os.listdir(output_dir)
if len(files) > 0:
    for f in files:
        os.remove(os.path.join(output_dir, f))

In [None]:
imgs = []
for i, image_path in enumerate(tqdm(image_paths_raw)):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops_tracked.loc[all_crops_tracked['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'No detections for image.',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            label = ''
            ann_col = 'grey'

            # if not '' and not nan
            if row['track_id'] != '' and pd.notna(row['track_id']):
                label = row['track_id']
                ann_col = 'green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': label,
                'ann_col': ann_col
            })
        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'{output_dir}/{os.path.basename(image_path)}')

In [None]:
# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/tracking_images.gif"
gif_creater(output_dir, gif_path)

# show the gif
md("![trackingGif](" + os.path.abspath(gif_path) + " 'tracking')")

In [None]:
dep_data