In [1]:
import os
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir("..")
%cd .
%load_ext autoreload
%autoreload 2

## Data Exploration & Wrangling


In [None]:
%pip install fiftyone imagehash
%pip uninstall fiftyone-db -y
%pip install fiftyone-db-ubuntu2204 --force-reinstall

In [2]:
from pathlib import Path

import fiftyone as fo
import fiftyone.utils.yolo as fouy
from fiftyone import ViewField as F

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import imagehash
from tqdm import tqdm

from til_23_cv import ReIDEncoder, cos_sim, thres_strategy_A

### Load Dataset


In [3]:
name = "til23plush"
dataset_dir = "data/til23plush"
splits = "train", "val", "test"

# NOTE: Uncomment to recache dataset
# fo.delete_dataset(name)

In [4]:
if name in fo.list_datasets():
    ds = fo.load_dataset(name)

    # Delete any predictions still attached
    ds.delete_sample_field("predictions", 2)
    ds.delete_evaluations()
else:
    ds = fo.Dataset(name=name, persistent=True)
    for split in splits:
        ds.add_dir(
            dataset_dir=dataset_dir,
            dataset_type=fo.types.YOLOv5Dataset,
            include_all_data=True,
            split=split,
            tags=split,
        )

    # Add Perceptual Hashes for Dupe Detection later
    # Due to multiple false positives, perceptual hash chosen is closer to cryptographic
    for sample in ds.iter_samples(progress=True, autosave=True):
        sample["phash"] = str(imagehash.dhash(Image.open(sample.filepath)))

print(ds)

Name:        til23plush
Media type:  image
Num samples: 8064
Persistent:  True
Tags:        []
Sample fields:
    id:           fiftyone.core.fields.ObjectIdField
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    phash:        fiftyone.core.fields.StringField


In [5]:
# Relabel all detections to plushie
view = ds.set_field(
    "ground_truth.detections", F("detections").map(F().set_field("label", "plushie"))
)

### Dupe Detection

There were no dupes; The Object Detection this time is just that easy.

In [6]:
# Filter view to only show images where `phash` has more than 1 instance
counts = filter(lambda i: i[1] > 1, view.count_values("phash").items())
counts = [k for k, v in counts]
dupes = view.filter_field("phash", F().is_in(counts)).filter_field(
    "tags", F().contains(["train", "val"])
)

### Export YOLO Dataset for Ultralytics

Converts `til23plush` to `til23plushonly`.

In [15]:
# See: https://docs.voxel51.com/api/fiftyone.utils.yolo.html#fiftyone.utils.yolo.YOLOv5DatasetExporter
splits = "train", "val"
config = dict(
    export_dir="data/til23plushonly",
    dataset_type=fo.types.YOLOv5Dataset,
    label_field="ground_truth",
    export_media="symlink",
    include_path=False,
)

In [16]:
for split in splits:
    v = view.filter_field("tags", F().contains([split]))
    v.export(split=split, **config)

Directory 'data/til23plushonly' already exists; export will be merged with existing files
 100% |███████████████| 5664/5664 [5.1s elapsed, 0s remaining, 1.1K samples/s]       
Directory 'data/til23plushonly' already exists; export will be merged with existing files
 100% |█████████████████| 800/800 [691.1ms elapsed, 0s remaining, 1.2K samples/s]       


### Eval Object Detection

In [13]:
split = "test"
label_dir = f"runs/detect/predict/{split}/labels"

In [14]:
sview = view.filter_field("tags", F().contains([split]))
fouy.add_yolo_labels(
    sample_collection=sview,
    label_field="predictions",
    labels_path=label_dir,
    classes=["plushie"],
)
results = sview.evaluate_detections(
    "predictions",
    gt_field="ground_truth",
    eval_key="eval",
)
results.print_report()

Sample field 'eval_tp' does not exist
Sample field 'eval_fp' does not exist
Sample field 'eval_fn' does not exist
Evaluating detections...
 100% |███████████████| 1600/1600 [3.7s elapsed, 0s remaining, 461.7 samples/s]      
              precision    recall  f1-score   support

     plushie       0.00      0.00      0.00       0.0

   micro avg       0.00      0.00      0.00       0.0
   macro avg       0.00      0.00      0.00       0.0
weighted avg       0.00      0.00      0.00       0.0



### Export Image Classification Dataset for Suspect Recognition

Converts `til23plush` to `til23reid`.

In [3]:
padding = 0.5, 0.0
label_field = "ground_truth"
export_dir = "data/til23reid"
splits = "train", "val"

In [4]:
for split, p in zip(splits, padding):
    ds.filter_field("tags", F().contains([split])).export(
        export_dir=f"{export_dir}/{split}",
        dataset_type=fo.types.ImageClassificationDirectoryTree,
        label_field=label_field,
        alpha=p,
        image_format=".png",
    )

Detected an image classification exporter and a label field 'ground_truth' of type <class 'fiftyone.core.labels.Detections'>. Exporting image patches...
 100% |█████████████| 12321/12321 [2.1m elapsed, 0s remaining, 95.2 samples/s]       
Detected an image classification exporter and a label field 'ground_truth' of type <class 'fiftyone.core.labels.Detections'>. Exporting image patches...
 100% |███████████████| 1699/1699 [14.6s elapsed, 0s remaining, 117.3 samples/s]      


### Eval Suspect Recognition

Used to find optimal threshold parameters.

NOTE: Suspect folder used below was handpicked from the `til23reid` dataset.

NOTE: We uses suspect images as the detection pool. But 10 > ~3 detections per test image. This might affect parameter tuning accuracy.

In [4]:
val_dir = "data/til23reid/val"
sus_dir = "data/val_suspects"
reid_path = "reid.torchscript"

In [5]:
imds = fo.Dataset.from_dir(
    dataset_dir=val_dir,
    dataset_type=fo.types.ImageClassificationDirectoryTree,
)

 100% |███████████████| 1699/1699 [565.6ms elapsed, 0s remaining, 3.0K samples/s]      


In [6]:
# Load encoder and encode suspects.
encoder = ReIDEncoder(reid_path)
embeds = {}
for pth in Path(sus_dir).glob("*.png"):
    lbl = pth.stem
    im = np.array(Image.open(pth))
    embeds[lbl] = encoder([im])[0]
embeds = sorted(embeds.items())
embeds = list(zip(*embeds))
sus_cls = list(embeds[0]) # type: ignore
sus_embeds = list(embeds[1]) # type: ignore

In [7]:
# Encode all images.
for sample in imds.iter_samples(progress=True, autosave=True):
    im = np.array(Image.open(sample.filepath))
    embeds = encoder([im])[0]
    sample["logits"] = [cos_sim(v, embeds) for v in sus_embeds]

 100% |███████████████| 1699/1699 [1.2m elapsed, 0s remaining, 23.2 samples/s]      


In [8]:
imds.delete_sample_fields(["predictions", "bin_ground_truth", "bin_predictions"], 2)
for sample in imds.iter_samples(progress=True, autosave=True):
    logits = sample["logits"]
    gt = sample["ground_truth"].label
    
    # Multiclass for confusion matrix.
    sample["predictions"] = fo.Classification(
        label=sus_cls[np.argmax(logits)],
        confidence=np.max(logits),
        logits=logits,
    )

    # Binary for PR curve.
    # Goal is to find minimum confidence where all false matches disappear.
    # NOTE: Due to issue with fiftyone, we select a random label.
    # placeholder = np.random.rand() < 0.5
    # idx = thres(logits)
    # correct = sus_cls[idx] == gt if idx != -1 else False
    # conf = np.max(logits)
    # conf = 1.0 - conf if idx == -1 else conf
    # sample["bin_ground_truth"] = fo.Classification(
    #     label="N" if placeholder else "P"
    # )
    # placeholder = placeholder if correct else not placeholder
    # sample["bin_predictions"] = fo.Classification(
    #     label="N" if placeholder else "P",
    #     confidence=conf,
    # )

 100% |███████████████| 1699/1699 [1.5s elapsed, 0s remaining, 1.3K samples/s]         


In [104]:
def evaluate_threshold_function(func, x_axis, suspect_dropout, ds=imds):
    np.random.seed(42)
    acc_axis = []
    p_axis = []
    r_axis = []
    f_axis = []
    all_logits = [np.array(s["logits"]).copy() for s in ds]
    all_gts = [sus_cls.index(s["ground_truth"].label) for s in ds]

    # NOTE: Seems there is a variance of +- 0.01 for the scores. Oh well.
    for thres in tqdm(x_axis):
        tp, fp, tn, fn = 0, 0, 0, 0
        for logits, gt in zip(all_logits, all_gts): # type: ignore
            logits = np.array(logits).copy()
            no_suspect = np.random.rand() < suspect_dropout

            if no_suspect:
                logits[gt] = np.delete(logits, gt).mean()

            pred = func(logits, thres)
            if no_suspect and pred == -1:
                tn += 1
            elif not no_suspect and pred == gt:
                tp += 1
            elif no_suspect and pred != -1:
                fp += 1
            elif not no_suspect and pred == -1:
                fn += 1
            elif not no_suspect and pred != gt:
                # We don't count false predictions for now.
                # fp += 1
                pass
        
        acc = (tp + tn) / max(tp + tn + fp + fn, 1)
        p = tp / max(tp + fp, 1)
        r = tp / max(tp + fn, 1)
        f = 2 * p * r / max(p + r, 1e-6)

        acc_axis.append(acc)
        p_axis.append(p)
        r_axis.append(r)
        f_axis.append(f)
    
    return acc_axis, p_axis, r_axis, f_axis

In [152]:
# Compare PR curves for different threshold functions and tune other threshold params.
suspect_dropout = 0.5 # Adversarial case where many images have no suspect.
x_axis = np.arange(0.0, 1.0, 0.01)

cos_func = lambda logits, thres: np.argmax(logits) if np.max(logits) > thres else -1
# Can also vary `vote_thres` and 10*`sd_thres` instead of `accept_thres`.
# NOTE: Below are optimal (based on val set) after tuning.
a_func = lambda logits, thres: thres_strategy_A(logits, thres, 0.37, 4.4)

cos_res = evaluate_threshold_function(cos_func, x_axis, suspect_dropout)
a_res = evaluate_threshold_function(a_func, x_axis, suspect_dropout)

100%|██████████| 100/100 [00:01<00:00, 59.29it/s]
100%|██████████| 100/100 [00:04<00:00, 22.74it/s]


In [None]:
# %matplotlib widget
# Plot comparison.
fig, ax = plt.subplots()
# ax.plot(x_axis, cos_res[1], label="Cos Precision")
# ax.plot(x_axis, cos_res[2], label="Cos Recall")
# ax.plot(x_axis, cos_res[0], label="Cos Accuracy")
ax.plot(x_axis, cos_res[3], label="Cos F1")
# ax.plot(x_axis, a_res[1], label="A Precision")
# ax.plot(x_axis, a_res[2], label="A Recall")
# ax.plot(x_axis, a_res[0], label="A Accuracy")
ax.plot(x_axis, a_res[3], label="A F1")
ax.set_xlabel("Threshold")
ax.set_ylabel("Score")
ax.set_title("Threshold vs Score")
ax.legend()
plt.show()
print("Max Cos F1 & Threshold:", max(cos_res[3]), x_axis[np.argmax(cos_res[3])])
print("Max A F1 & Threshold:", max(a_res[3]), x_axis[np.argmax(a_res[3])])

In [12]:
fo.config.requirement_error_level = 1
imds.delete_evaluations()
results = imds.evaluate_classifications(
    "predictions",
    gt_field="ground_truth",
    eval_key="eval",
)
results.print_report()
# display(results.plot_confusion_matrix())
# results = imds.evaluate_classifications(
#     "bin_predictions",
#     gt_field="bin_ground_truth",
#     eval_key="eval_bin",
#     method="binary",
#     classes=["N", "P"],
# )
# results.print_report()
# display(results.plot_pr_curve())

              precision    recall  f1-score   support

          #0       1.00      1.00      1.00       179
          #1       1.00      1.00      1.00       177
          #2       1.00      1.00      1.00       169
          #3       1.00      0.99      1.00       163
          #4       0.98      1.00      0.99       163
          #5       0.99      0.98      0.98       163
          #6       0.95      0.92      0.93       166
          #7       1.00      0.98      0.99       169
          #8       0.97      0.99      0.98       173
          #9       0.90      0.92      0.91       177

    accuracy                           0.98      1699
   macro avg       0.98      0.98      0.98      1699
weighted avg       0.98      0.98      0.98      1699



### Preview


In [None]:
# Put whatever view or dataset below
# v = ds
# v = view
# v = dupes
# v = sview
v = imds
fo.launch_app(dataset=v)