# Running the Pipeline from Command Line

The entire pipeline can be run from the command line. The commands are shown below for demonstrative purposes using subprocess, but for high throughput analysis we recommend using slurm. There are examples of slurm scripts in the ./slurm_scripts directory: each regional bash file (e.g. `costarica_final.sh`) calls on the sbatch file `array_processor.sh`. 

In [None]:
# set the wd
import os
os.chdir(os.path.expanduser('~/amber-inferences'))

In [None]:
%pip install -e .

In [None]:
# from amber_inferences.utils.config import load_credentials
# from amber_inferences.utils.api_utils import deployments_summary, get_deployments
# from amber_inferences.utils.custom_models import *
# from amber_inferences.utils.inference_scripts import *
# from amber_inferences.utils.plotting import *
# from amber_inferences.utils.tracking import *

import matplotlib.pyplot as plt
from IPython.display import display
from IPython.display import Markdown as md

import torch
import requests

import torch
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm

In [None]:
from sys import path as syspath
from os import path as ospath

syspath.append('/home/users/katriona/amber-inferences')

## Inferences

In [None]:
import subprocess

chunk_id = 1
batch_size = 20 # runs for 20 images at a time

country='costarica'
region="cri"

credentials_file="./credentials.json"

deployment_id = "dep000035"
output_base_dir=f"./data/{deployment_id}/{country}_test"
json_file = f"./examples/example_keys/test.json" #interesting_timelapse.json"

os.makedirs(output_base_dir, exist_ok=True)
os.makedirs(f"{output_base_dir}/{deployment_id}", exist_ok=True)

species_model="./models/turing-costarica_v03_resnet50_2024-06-04-16-17_state.pt"
species_labels="./models/03_costarica_data_category_map.json"

In [None]:
batch_number_padded = f"{chunk_id:04d}"
csv_file = f"{output_base_dir}/{deployment_id}_{batch_number_padded}.csv"
# json_file = csv_file.replace('csv', 'json')
print(f"Results will save to {csv_file}")
# print(f"Embeddings will save to {json_file}")

In [None]:
if ospath.exists(csv_file):
    os.remove(csv_file)

In [None]:
command = [
    "python3", "-m",
    "amber_inferences.cli.perform_inferences",
    "--chunk_id", str(chunk_id),
    "--batch_size", str(batch_size),
    "--json_file", json_file,
    "--output_dir", output_base_dir,
    "--bucket_name", region,
    "--credentials_file", credentials_file,
    "--csv_file", csv_file,
    "--species_model_path", species_model,
    "--species_labels", species_labels,
    "--perform_inference",
    "--remove_image",
    "--box_threshold", "0",
    "--binary_model_path", "./models/moth-nonmoth-effv2b3_20220506_061527_30.pth",
    "--localisation_model_path", "./models/v1_localizmodel_2021-08-17-12-06.pt",
    "--order_model_path", "./models/dhc_best_128.pth",
    "--order_thresholds_path", "./models/thresholdsTestTrain.csv",
    "--skip_processed",
    "--verbose"
]

In [None]:
result = subprocess.run(command, capture_output=True, text=True)

In [None]:
print(result.stdout)

if result.returncode != 0:
    print("STDERR:\n", result.stderr)

In [None]:
print(f'python3 -m amber_inferences.cli.perform_inferences \
    --chunk_id {str(chunk_id)} \
    --batch_size {str(batch_size)} \
    --json_file {json_file} \
    --output_dir {output_base_dir} \
    --bucket_name {region} \
    --credentials_file {credentials_file} \
    --csv_file {csv_file} \
    --species_model_path {species_model} \
    --species_labels {species_labels} \
    --perform_inference \
    --remove_image \
    --box_threshold "0" \
    --binary_model_path "./models/moth-nonmoth-effv2b3_20220506_061527_30.pth" \
    --localisation_model_path "./models/flat_bug_M.pt" \
    --order_model_path "./models/dhc_best_128.pth" \
    --order_thresholds_path "./models/thresholdsTestTrain.csv" \
    --skip_processed \
    --verbose')

## Tracking

In [None]:
import json
from amber_inferences.utils.tracking import *
import os
os.chdir(os.path.expanduser('~/amber-inferences'))

In [None]:
results_df = pd.read_csv('./data/dep000035/costarica_test/dep000035_0001.csv')

In [None]:
# load in the json file
batch_json = './data/dep000035/costarica_test/dep000035_0001.json'
with open(batch_json, encoding="utf-8") as file:
    embedding_list = json.load(file)

In [None]:
embedding_list[list(embedding_list.keys())[0]]['crop_1']['image_size']

In [None]:
crop_similarities = crop_costs(embedding_list)

In [None]:
crop_similarities

In [None]:
best_matches = find_best_matches(crop_similarities)

In [None]:
best_matches

In [None]:
tracks_df = track_id_calc(best_matches, 1)

In [None]:
tracks_df

In [None]:
import matplotlib.colors as mcolors

# Generate N unique colors for each track
num_tracks = tracks_df['track_id'].nunique()

# Use a colormap to get visually distinct colors
cmap = plt.cm.get_cmap('hsv', num_tracks)  # tab20

# Map track_id to hex colors
track_id_to_color = {
    track_id: mcolors.to_hex(cmap(i)) for i, track_id in enumerate(sorted(tracks_df['track_id'].unique()))
}

# Add color column to DataFrame
tracks_df['colour'] = tracks_df['track_id'].map(track_id_to_color)

In [None]:
all_crops_merge = results_df.merge(tracks_df, how='left', left_on=['image_path', 'crop_status'], right_on=['image_path', 'crop_id'])
all_crops_merge = all_crops_merge.reset_index(drop=True)
all_crops_merge.sort_values('track_id')

In [None]:
image_paths_raw = image_paths = os.listdir('./examples/images/dep000035/interesting_timelapse/raw/')
image_paths_raw = [os.path.abspath(os.path.join('./examples/images/dep000035/interesting_timelapse/raw/', x)) for x in image_paths_raw]

In [None]:
output_dir = './test'

In [None]:
all_crops_merge_subset

In [None]:
image_paths_raw

In [None]:
imgs = []

# drop unique track_ids
all_crops_merge_subset = all_crops_merge.copy()
track_counts = all_crops_merge_subset["track_id"].value_counts()
valid_tracks = track_counts[track_counts > 1].index
all_crops_merge_subset = all_crops_merge_subset[all_crops_merge_subset["track_id"].isin(valid_tracks)].reset_index(drop=True)


for i, image_path in enumerate(image_paths_raw):
    imge = Image.open(image_path).convert("RGB")
    original_image = imge.copy()
    original_width, original_height = imge.size

    crops_df = all_crops_merge_subset.loc[
    all_crops_merge_subset['image_path'] == f'./data/dep000035/costarica_test/{os.path.basename(image_path)}', ]
    print(crops_df)
    crops_df = crops_df.loc[crops_df['crop_status'] != 'NO DETECTIONS FOR IMAGE',]
    crops_df = crops_df.loc[crops_df['track_id'].notna(),]

    boxes = []
    if crops_df.shape[0] > 0:
        for j, row in crops_df.iterrows():

            boxes.append({
                'x_min': row['x_min'],
                'y_min': row['y_min'],
                'x_max': row['x_max'],
                'y_max': row['y_max'],
                'label': row['track_id'],
                'ann_col': row['colour']
            })


    im = image_annotation(image_path, boxes=boxes, scale=False)
    out_path = f'{output_dir}/{os.path.basename(image_path)}'
    im.save(out_path)

In [None]:
# Open images and convert to a sequence
image_paths = os.listdir(output_dir)
image_paths = [os.path.join(output_dir, x) for x in image_paths]
images = [Image.open(img) for img in image_paths]

# Save as GIF
gif_path = "./examples/images/dep000035/interesting_timelapse/gifs/test_tracking_images.gif"
images[0].save(gif_path, save_all=True, append_images=images[1:], duration=500, loop=0)

del images

In [None]:
md("![trackingGif](" + os.path.abspath(gif_path) + " 'tracking')")