# Tutorial


This tutorial runs you through the process of running inferences for a deployments in Costa Rica.

In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [None]:
# Install the package if required
%pip install -e .

In [None]:
from PIL import ImageFont, ImageDraw, Image

import boto3
import pandas as pd

import amber_inferences
from amber_inferences.utils.config import load_credentials
from amber_inferences.utils.api_utils import get_buckets, deployments_summary, get_deployments
from amber_inferences.utils.custom_models import *
from amber_inferences.utils.inference_scripts import *

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import display

import torch

In [None]:
# set the wd
os.chdir(os.path.normpath('amber-inferences'))

# Explore the Data on the Object Store

In [None]:
# Create an instance for the object store
aws_credentials = load_credentials('./credentials.json')
session = boto3.Session(
        aws_access_key_id=aws_credentials["AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=aws_credentials["AWS_SECRET_ACCESS_KEY"],
        region_name=aws_credentials["AWS_REGION"],
    )
s3_client = session.client("s3", endpoint_url=aws_credentials["AWS_URL_ENDPOINT"])

**🚨 Note: this feature has been taken down by Posit so the app is not currently available 🚨**

Look at the deployments available on the object store:

In [None]:
all_deployments = get_deployments(aws_credentials['UKCEH_username'], aws_credentials['UKCEH_password'])

List the buckets/countries:

In [None]:
# all_deployments = pd.DataFrame(all_deployments)
# all_deployments[all_deployments['status'] == 'active']

Let's pick one, cri (Costa Rica) and check out the data attached. 

In [None]:
# cr_deployments = deployments_summary(
#     aws_credentials,
#     subset_countries=["Costa Rica"],
#     subset_deployments=["dep000035", "dep000036"],
#     include_image_count=False
# )

To get the files for a given deployment(s):

In [None]:
# cr_deployments

# Log the image keys

In [None]:
from amber_inferences.utils.key_utils import save_keys

In [None]:
# takes some time to commenting out to save time
# save_keys(
#     s3_client,
#     bucket="cri",
#     deployment_id="dep000035",
#     output_file="./examples/example_keys/dep000035_keys.json",
#     subdir="snapshot_images"
# )

In [None]:
# Look at the keys
!head ./examples/example_keys/interesting_timelapse.json

# Download the images

In [None]:
import json
from amber_inferences.utils.inference_scripts import download_image_from_key

In [None]:
# read the first image in the keys file and open
with open('./examples/example_keys/interesting_timelapse.json') as f:
    keys = json.load(f)

In [None]:
os.makedirs('./examples/images/dep000035/interesting_timelapse/raw/', exist_ok=True)

for i in range(len(keys)):
    download_image_from_key(s3_client, keys[i], 'cri', './examples/images/dep000035/interesting_timelapse/raw/')

In [None]:
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x) for x in image_paths]

image_paths = [x for x in image_paths if x.endswith('.jpg')]

len(image_paths)

In [None]:
# # Open the images in a 2x5 grid
# fig, axs = plt.subplots(5, 6, figsize=(20, 20))
# axs = axs.ravel()

# for i, img_path in enumerate(image_paths):
#     if os.path.exists( img_path):  # Ensure the file exists
#         img = mpimg.imread(img_path)
#         axs[i].imshow(img)
#         axs[i].axis("off")  # Hide axes for better visualization
#         axs[i].set_title(f"Image {i+1}")
#     else:
#         axs[i].axis("off")
#         axs[i].set_title("Missing Image")

# plt.tight_layout()
# plt.show()

In [None]:
# Open images and convert to a sequence
images = [Image.open(img) for img in image_paths]

# Save as GIF
os.makedirs('./examples/images/dep000035/interesting_timelapse/gifs', exist_ok=True)
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/raw_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

del images
# display(Image.open(gif_path))

# Perform Object Detection on the Images

In [None]:
models = load_models(
    device=torch.device("cuda:0"),
    localisation_model_path='./models/v1_localizmodel_2021-08-17-12-06.pt',
    binary_model_path='./models/moth-nonmoth-effv2b3_20220506_061527_30.pth',
    order_model_path='./models/dhc_best_128.pth',
    order_threshold_path='./models/thresholdsTestTrain.csv',
    species_model_path='./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt',
    species_labels='./models/03_costarica_data_category_map.json'
)

In [None]:
def image_annotation(image_path, img=None, boxes={}, scale=False, default_colour='grey'):
    if img is None:
        img = Image.open(image_path)

    draw = ImageDraw.Draw(img)

    for box in boxes:
        x0 = float(box['x_min'])
        y0 = float(box['y_min'])
        x1 = float(box['x_max'])
        y1 = float(box['y_max'])
        if scale:
            og_width, og_height = img.size
            x0 = x0/300*og_width
            y0 = y0/300*og_height
            x1 = x1/300*og_width
            y1 = y1/300*og_height
        if 'ann_col' not in box.keys():
            box['ann_col'] = default_colour
        if 'label' not in box.keys():
            box['label'] = ''

        draw.rectangle([x0, y0, x1, y1], outline=box['ann_col'], width=3)
        draw.text((x0, y0), box['label'], fill=box['ann_col'],
                  font=ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", size=50) )

    return img

In [None]:
imgs = []
os.makedirs('./examples/images/dep000035/interesting_timelapse/annotated_boxes/', exist_ok=True)
os.makedirs('./examples/images/crops/interesting_timelapse/', exist_ok=True)

for i, img_path in enumerate(image_paths):
    crops = crop_image_only(
        image_path=img_path,
        bucket_name="cri",
        localisation_model=models['localisation_model'],
        proc_device=torch.device("cuda:0"),
        csv_file="./examples/interesting_timelapse_crops.csv",
        save_crops=True,
        box_threshold=0.95,
        crop_dir="./examples/images/crops/interesting_timelapse",
        job_name=None,
    )

    boxes = []
    for j, row in crops.iterrows():
        boxes.append({
            'x_min': row['x_min'],
            'y_min': row['y_min'],
            'x_max': row['x_max'],
            'y_max': row['y_max'],
            'label': '',
            'ann_col': 'grey'
        })
    del crops
    img = image_annotation(img_path, boxes=boxes)

    # save the image
    img.save(f'./examples/images/dep000035/interesting_timelapse/annotated_boxes/{os.path.basename(img_path)}')



In [None]:
# fig, axs = plt.subplots(8, 4, figsize=(20, 30))
# axs = axs.ravel()

# for i, img in enumerate(imgs):
#     axs[i].imshow(img)
#     axs[i].axis('off')
#     axs[i].set_title(f"Image {i+1}")

# plt.tight_layout()
# plt.show()

Save to gif

In [None]:
# Open images and convert to a sequence
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/annotated_boxes/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/annotated_boxes/', x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/object_detection_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

# # display(Image.open(gif_path))
del images

# Flatbug

In [None]:
models = load_models(
    device=torch.device("cuda:0"),
    localisation_model_path='./models/flat_bug_M.pt',
    binary_model_path='./models/moth-nonmoth-effv2b3_20220506_061527_30.pth',
    order_model_path='./models/dhc_best_128.pth',
    order_threshold_path='./models/thresholdsTestTrain.csv',
    species_model_path='./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt',
    species_labels='./models/03_costarica_data_category_map.json'
)

In [None]:

os.makedirs('./examples/images/dep000035/interesting_timelapse/annotated_boxes_flatbug/', exist_ok=True)
os.makedirs('./examples/images/crops/interesting_timelapse_flatbug/', exist_ok=True)

all_crops = []

for i, img_path in enumerate(image_paths):
    crops = crop_image_only(
        image_path=img_path,
        bucket_name="cri",
        localisation_model=models['localisation_model'],
        proc_device=torch.device("cuda:0"),
        csv_file="./examples/interesting_timelapse_flatbug.csv",
        save_crops=True,
        box_threshold=0,
        crop_dir="./examples/images/crops/interesting_timelapse_flatbug",
        job_name=None,
    )
    crops = crops.loc[crops['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    all_crops = all_crops + [crops]
    if crops.shape[0] > 0:
        boxes = []
        for j, row in crops.iterrows():
            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': '',
                'ann_col': 'grey'
            })
        del crops
        img = image_annotation(img_path, boxes=boxes, scale=False)

        # save the image
        img.save(f'./examples/images/dep000035/interesting_timelapse/annotated_boxes_flatbug/{os.path.basename(img_path)}')



In [None]:
# fig, axs = plt.subplots(8, 4, figsize=(20, 30))
# axs = axs.ravel()

# for i, img in enumerate(imgs):
#     axs[i].imshow(img)
#     axs[i].axis('off')
#     axs[i].set_title(f"Image {i+1}")

# plt.tight_layout()
# plt.show()

In [None]:
# Open images and convert to a sequence
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/annotated_boxes_flatbug/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/annotated_boxes_flatbug/', x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/flatbug_detection_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

# display(Image.open(gif_path))
del images

In [None]:
all_crops = pd.concat(all_crops)
all_crops = all_crops.reset_index(drop=True)

# Binary Classifier

In [None]:
os.makedirs('./examples/images/dep000035/interesting_timelapse/annotated_boxes_binary/', exist_ok=True)

transform_species = transforms.Compose(
        [
            transforms.Resize((300, 300)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        ]
    )

In [None]:
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths = [os.path.abspath(os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x)) for x in image_paths]

In [None]:
for i, image_path in enumerate(image_paths):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            ann_col='red'
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(torch.device('cuda:0'))
            binary_prediction = classify_box(cropped_tensor, models['classification_model'])
            if binary_prediction[0] == 'moth':
                ann_col='green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': binary_prediction[0],
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'./examples/images/dep000035/interesting_timelapse/annotated_boxes_binary/{os.path.basename(image_path)}')

In [None]:
# Open images and convert to a sequence
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/annotated_boxes_binary/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/annotated_boxes_binary/', x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/binary_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

# display(Image.open(gif_path))
del images

# Order Classifier

In [None]:
all_crops['image_path'][0]

In [None]:
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths = [os.path.abspath(os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x)) for x in image_paths]
image_paths[0]

In [None]:
os.makedirs('./examples/images/dep000035/interesting_timelapse/annotated_boxes_order/', exist_ok=True)

imgs = []
for i, image_path in enumerate(image_paths):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            ann_col = 'red'
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(torch.device('cuda:0'))
            order_prediction = classify_order(
                cropped_tensor,
                models['order_model'],
                models['order_model_labels'],
                models['order_model_thresholds']
            )
            if 'Lepidoptera' in order_prediction[0]:
                ann_col = 'green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': order_prediction[0],
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'./examples/images/dep000035/interesting_timelapse/annotated_boxes_order/{os.path.basename(image_path)}')

        imgs = imgs + [im]


In [None]:
# Open images and convert to a sequence
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/annotated_boxes_order/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/annotated_boxes_order/', x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/order_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

# display(Image.open(gif_path))
del images

# Species Classifier

In [None]:
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths = [os.path.abspath(os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x)) for x in image_paths]

# all_crops['image_path'] = all_crops['image_path'].replace('annotated_boxes', 'raw')

print(image_paths[0])
print(all_crops['image_path'][0])

In [None]:
os.makedirs('./examples/images/dep000035/interesting_timelapse/annotated_boxes_species/', exist_ok=True)

imgs = []
for i, image_path in enumerate(image_paths):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops.loc[all_crops['image_path'] == image_path, ]
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]

    if crops_df.shape[0] > 0:
        boxes = []
        for j, row in crops_df.iterrows():
            cropped_image = original_image.crop((row['x_min'], row['y_min'], row['x_max'], row['y_max']))
            cropped_tensor = transform_species(cropped_image).unsqueeze(0).to(torch.device('cuda:0'))
            order_prediction = classify_order(
                cropped_tensor,
                models['order_model'],
                models['order_model_labels'],
                models['order_model_thresholds']
            )
            label = ""
            ann_col = 'red'

            if 'Lepidoptera' in order_prediction[0]:
                species_names, species_confidences = classify_species(
                    cropped_tensor,
                    models['species_model'],
                    models['species_model_labels'],
                    5
                )
                label = f"{species_names[0]}, {'%.2f'.format(species_confidences[0]*100)}%"
                ann_col='green'

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': label,
                'ann_col': ann_col
            })

        im = image_annotation(image_path, boxes=boxes, scale=False)
        im.save(f'./examples/images/dep000035/interesting_timelapse/annotated_boxes_species/{os.path.basename(image_path)}')

In [None]:
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/annotated_boxes_species/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/annotated_boxes_species/', x) for x in image_paths]
print(len(image_paths))

In [None]:
# Open images and convert to a sequence
image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/annotated_boxes_species/')
image_paths = [os.path.join('./examples/images/dep000035/interesting_timelapse/annotated_boxes_species/', x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/species_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

# display(Image.open(gif_path))
del images

# Running the Pipeline from Command Line

The entire pipeline can be run from the command line. The commands are shown below for demonstrative purposes using subprocess, but for high throughput analysis we recommend using slurm. There are examples of slurm scripts in the ./slurm_scripts directory: each regional bash file (e.g. `costarica_final.sh`) calls on the sbatch file `array_processor.sh`. 

```python

In [None]:
import subprocess

chunk_id = 1
batch_size = 20 # runs for 20 images at a time

country='costarica'
region="cri"

credentials_file="./credentials.json"

deployment_id = "dep000035"
output_base_dir=f"./data/{deployment_id}/{country}_test"
json_file = f"./examples/example_keys/interesting_timelapse.json"

os.makedirs(output_base_dir, exist_ok=True)
os.makedirs(f"{output_base_dir}/{deployment_id}", exist_ok=True)

species_model="./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt"
species_labels="./models/03_costarica_data_category_map.json"

In [None]:
batch_number_padded = f"{chunk_id:04d}"
csv_file = f"{output_base_dir}/{deployment_id}_{batch_number_padded}.csv"
print(f"Results will save to {csv_file}")

In [None]:
command = [
    "python3", "-m",
    "amber_inferences.cli.perform_inferences",
    "--chunk_id", str(chunk_id),
    "--batch_size", str(batch_size),
    "--json_file", json_file,
    "--output_dir", output_base_dir,
    "--bucket_name", region,
    "--credentials_file", credentials_file,
    "--csv_file", csv_file,
    "--species_model_path", species_model,
    "--species_labels", species_labels,
    "--perform_inference",
    "--remove_image",
    "--box_threshold", "0",
    "--binary_model_path", "./models/moth-nonmoth-effv2b3_20220506_061527_30.pth",
    "--localisation_model_path", "./models/flat_bug_M.pt",
    "--order_model_path", "./models/dhc_best_128.pth",
    "--order_thresholds_path", "./models/thresholdsTestTrain.csv",
    "--skip_processed",
    "--verbose"
]

result = subprocess.run(command, capture_output=True, text=True)

print(result.stdout)

if result.returncode != 0:
    print("STDERR:\n", result.stderr)

In [None]:
from io import StringIO
import sys
import unittest
from unittest.mock import patch, MagicMock
from amber_inferences.utils.config import load_credentials
from amber_inferences.utils.deployment_summary import count_files, print_deployments

In [None]:
def setUp(self):
        self.aws_credentials = load_credentials("./credentials.json")

mock_get_deployments = MagicMock()
mock_get_deployments.return_value = [
    {"deployment_id": "dep000020", "location_name": "trap_4", "status": "active", "country": "Panama", "country_code": "pan"},
    {"deployment_id": "dep000022", "location_name": "trap_6", "status": "inactive", "country": "Panama", "country_code": "pan"},
]

# Mock count_files to return a specific number
mock_count_files = MagicMock()
mock_count_files.return_value = 0

# Mock boto3 session and client
mock_s3_client = MagicMock()
mock_boto_session = MagicMock()
mock_boto_session.return_value.client.return_value = mock_s3_client

# Redirect stdout to capture print output
captured_output = StringIO()
sys.stdout = captured_output

# Call the function
print_deployments(self.aws_credentials, include_inactive=False, print_file_count=True)

# Reset stdout
# sys.stdout = sys.__stdout__

# # Assert the captured output contains expected strings
# output = captured_output.getvalue()
# self.assertIn("Deployment ID: dep000020 - Location: trap_4", output)
# self.assertIn(" - This deployment has 0 images.", output)
# self.assertNotIn("Deployment ID: fake_deployment - Location: fake_deployment", output)

In [None]:
test_print_deployments_active_only()

In [None]:
keys = result['keys']

In [None]:


image_count = len([x for x in keys if x.endswith(".jpg")])
audio_count = len([x for x in keys if x.endswith(".wav")])

In [None]:
image_count