In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from pathlib import Path

import numpy as np
import pandas as pd
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
data_path = "data/cars"
data_path = Path(data_path)
images_path = data_path / "training_images"
annotations_file_path = data_path / "annotations.csv"

In [5]:
from example.train_cars import load_cars_df

INFO:pytorch_accelerated:Setting random seeds
INFO:matplotlib.font_manager:generated new fontManager


In [6]:
train_df, valid_df, lookups = load_cars_df(annotations_file_path, images_path)

# Anchor section

TODO: Now that we understand anchor boxes, lets look at how we can evaluate whether our chosen anchors are suitable for our problem and, if not, find some sensible choices for our dataset.

The approach here is largely adapted from the autoanchor approach used in Yolov5, which was also used with Yolov7.

## Evaluating current anchor boxes

The simplest approach would be to simply use the same anchors as used for COCO, which are already bundled with the defined architectures.

In [7]:
from yolov7 import create_yolov7_model

In [8]:
model = create_yolov7_model('yolov7', pretrained=False)

In [9]:
model.detection_head.anchor_grid

tensor([[[[[[ 12.,  16.]]],


          [[[ 19.,  36.]]],


          [[[ 40.,  28.]]]]],




        [[[[[ 36.,  75.]]],


          [[[ 76.,  55.]]],


          [[[ 72., 146.]]]]],




        [[[[[142., 110.]]],


          [[[192., 243.]]],


          [[[459., 401.]]]]]])

By default these are the ones from coco. Here we can see that we have 3 groups, one for each layer of the feature pyramid network. The numbers correspond to the width and height of the anchors that will be generated.

The FPN (Feature Pyramid Network) has three outputs and each output's role is to detect objects according to their scale. For example:

- P3/8 is for detecting smaller objects.
- P4/16 is for detecting medium objects.
- P5/32 is for detecting bigger objects.
So when you're going to detect smaller objects you need to use smaller anchor boxes and for medium objects you should use medium scale anchor boxes, so on

TODO: Should have already explained what anchors are in a different section.

In [10]:
current_anchors = model.detection_head.anchor_grid.clone().cpu().view(-1, 2); current_anchors

tensor([[ 12.,  16.],
        [ 19.,  36.],
        [ 40.,  28.],
        [ 36.,  75.],
        [ 76.,  55.],
        [ 72., 146.],
        [142., 110.],
        [192., 243.],
        [459., 401.]])

To evaluate our current anchor boxes, we can calculate the best possible recall, which would occur if the model was able to successfully match an appropriate anchor box with a ground truth. 

### Find and Resize ground truth bounding boxes

To evaluate our anchor boxes, we first need some knowedge of the shapes and sizes of the objects in our dataset. However, before we can evaluate, we need to resize the width and height of our ground truth boxes based on the size of the images that we will train on - for this architecture, this is recommended to be 640.

Let's start by finding the width and height of all ground truth boxes in the training set. We can calculate these as demonstrated below:

In [11]:
train_annotations_df = train_df.query('has_annotation == True').copy()

In [12]:
train_annotations_df['h'] = train_annotations_df['ymax'] -  train_annotations_df['ymin']
train_annotations_df['w'] = train_annotations_df['xmax'] -  train_annotations_df['xmin']

In [13]:
train_annotations_df

Unnamed: 0,image,xmin,ymin,xmax,ymax,class_name,has_annotation,image_id,class_id,h,w
0,vid_4_1000.jpg,281.259045,187.035071,327.727931,223.225547,car,True,0,0.0,36.190476,46.468886
1,vid_4_10000.jpg,15.163531,187.035071,120.329957,236.430180,car,True,1,0.0,49.395109,105.166425
2,vid_4_10040.jpg,239.192475,176.764801,361.968162,236.430180,car,True,3,0.0,59.665380,122.775687
4,vid_4_10060.jpg,16.630970,186.546010,132.558611,238.386422,car,True,4,0.0,51.840412,115.927641
5,vid_4_10100.jpg,447.568741,160.625804,582.083936,232.517696,car,True,6,0.0,71.891892,134.515195
...,...,...,...,...,...,...,...,...,...,...,...
554,vid_4_9860.jpg,0.000000,198.321729,49.235251,236.223284,car,True,994,0.0,37.901554,49.235251
555,vid_4_9880.jpg,329.876184,156.482351,536.664239,250.497895,car,True,995,0.0,94.015544,206.788055
556,vid_4_9900.jpg,0.000000,168.295823,141.797524,239.176652,car,True,996,0.0,70.880829,141.797524
557,vid_4_9960.jpg,487.428988,172.233646,616.917699,228.839864,car,True,999,0.0,56.606218,129.488711


In [38]:
raw_gt_wh = train_annotations_df[['w', 'h']].values

Next, we will need the height and width of our images. Sometimes, we have this information ahead of time, in which case we can use this knowledge directly. Otherwise, We can do this as follows

In [15]:
from PIL import Image
from tqdm.contrib.concurrent import process_map

In [16]:
def find_image_size(image_path):
    image = Image.open(image_path)
    w, h = image.size
    return (image_path.parts[-1], (w, h))

image_sizes = process_map(find_image_size, [images_path/p for p in train_df.image.unique()])


100%|███████████████████████████████████████████████████| 324/324 [00:00<00:00, 5619.21it/s]


In [17]:
image_sizes_df = pd.DataFrame(dict(image_sizes)).T.reset_index().rename(columns={'index': 'image', 0: 'image_w', 1:'image_h'})

In [18]:
image_sizes_df

Unnamed: 0,image,image_w,image_h
0,vid_4_1000.jpg,676,380
1,vid_4_10000.jpg,676,380
2,vid_4_10040.jpg,676,380
3,vid_4_10060.jpg,676,380
4,vid_4_10100.jpg,676,380
...,...,...,...
319,vid_4_13060.jpg,676,380
320,vid_4_13100.jpg,676,380
321,vid_4_13240.jpg,676,380
322,vid_4_13280.jpg,676,380


We can now merge this with our existing dataframe

In [19]:
train_annotations_df = pd.merge(train_annotations_df, image_sizes_df, on='image'); train_annotations_df

Unnamed: 0,image,xmin,ymin,xmax,ymax,class_name,has_annotation,image_id,class_id,h,w,image_w,image_h
0,vid_4_1000.jpg,281.259045,187.035071,327.727931,223.225547,car,True,0,0.0,36.190476,46.468886,676,380
1,vid_4_10000.jpg,15.163531,187.035071,120.329957,236.430180,car,True,1,0.0,49.395109,105.166425,676,380
2,vid_4_10040.jpg,239.192475,176.764801,361.968162,236.430180,car,True,3,0.0,59.665380,122.775687,676,380
3,vid_4_10060.jpg,16.630970,186.546010,132.558611,238.386422,car,True,4,0.0,51.840412,115.927641,676,380
4,vid_4_10100.jpg,447.568741,160.625804,582.083936,232.517696,car,True,6,0.0,71.891892,134.515195,676,380
...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,vid_4_9860.jpg,0.000000,198.321729,49.235251,236.223284,car,True,994,0.0,37.901554,49.235251,676,380
393,vid_4_9880.jpg,329.876184,156.482351,536.664239,250.497895,car,True,995,0.0,94.015544,206.788055,676,380
394,vid_4_9900.jpg,0.000000,168.295823,141.797524,239.176652,car,True,996,0.0,70.880829,141.797524,676,380
395,vid_4_9960.jpg,487.428988,172.233646,616.917699,228.839864,car,True,999,0.0,56.606218,129.488711,676,380


In [20]:
image_sizes = train_annotations_df[['image_w', 'image_h']].values

Now, we can use this information to get the resized widths and heights of our ground truth targets, with respect to our target image size. To preserve the aspect ratios of the objects in our images, the recommended approach to resizing is to scale the image so that the longest size is equal to our target size. We can do this using the function below:

In [21]:
from yolov7.anchors import calculate_resized_gt_wh

In [22]:
??calculate_resized_gt_wh

[0;31mSignature:[0m [0mcalculate_resized_gt_wh[0m[0;34m([0m[0mimage_sizes[0m[0;34m,[0m [0mgt_wh[0m[0;34m,[0m [0mtarget_image_size[0m[0;34m=[0m[0;36m640[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mSource:[0m   
[0;32mdef[0m [0mcalculate_resized_gt_wh[0m[0;34m([0m[0mimage_sizes[0m[0;34m,[0m [0mgt_wh[0m[0;34m,[0m [0mtarget_image_size[0m[0;34m=[0m[0;36m640[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;31m# image sizes array of [w, h] , either np.array([[w, h]]) or per image[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m    [0mnormalized_gt_wh[0m [0;34m=[0m [0mgt_wh[0m [0;34m/[0m [0mimage_sizes[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m    [0;31m# find target image sizes, assuming resizing so that the longest side is the target size[0m[0;34m[0m
[0;34m[0m    [0mtarget_image_sizes[0m [0;34m=[0m [0;34m([0m[0;34m[0m
[0;34m[0m        [0mtarget_image_size[0m [0;34m*[

In [41]:
raw_gt_wh.shape

(397, 2)

In [39]:
gt_wh = calculate_resized_gt_wh(raw_gt_wh, image_sizes, target_image_size=640); gt_wh[:5]

array([[ 43.99421122,  34.26317273],
       [ 99.56584662,  46.76460062],
       [116.23733718,  56.48793344],
       [109.75397973,  49.07967981],
       [127.35166419,  68.06332961]])

Alternatively, as all of our images are the same size in this case, we could simply specify a single image size.

In [40]:
calculate_resized_gt_wh(raw_gt_wh, image_sizes=np.array([[676, 380]]), target_image_size=640)[:5]

array([[ 43.99421122,  34.26317273],
       [ 99.56584662,  46.76460062],
       [116.23733718,  56.48793344],
       [109.75397973,  49.07967981],
       [127.35166419,  68.06332961]])

Note that we have also filtered out any boxes what will be incredibly small (less than 3 pixels in either height or width), with respect to the new image size, as these boxes are usually too small to be considered useful!

### Calculating Best Possible Recall

Now that we have the width and height of all ground truth boxes in our training set, we can evaluate our current anchor boxes as follows:

In [43]:
from yolov7.anchors import calculate_best_possible_recall, LOSS_ANCHOR_MULTIPLE_THRESHOLD

In [44]:
??calculate_best_possible_recall

[0;31mSignature:[0m [0mcalculate_best_possible_recall[0m[0;34m([0m[0manchors[0m[0;34m,[0m [0mgt_wh[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0mcalculate_best_possible_recall[0m[0;34m([0m[0manchors[0m[0;34m,[0m [0mgt_wh[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"""[0m
[0;34m    Given a tensor of anchors and and an array of widths and heights for each bounding box in the dataset,[0m
[0;34m    calculate the best possible recall that can be obtained if every box was matched to an appropriate anchor.[0m
[0;34m[0m
[0;34m    :param anchors: a tensor of shape [N, 2] representing the width and height of each anchor[0m
[0;34m    :param gt_wh: a tensor of shape [N, 2] representing the width and height of each ground truth bounding box[0m
[0;34m[0m
[0;34m    """[0m[0;34m[0m
[0;34m[0m    [0mbest_anchor_ratio[0m [0;34m=[0m [0mcalculate_best_anchor_ratio[0m[0;34m([0m[0manchors[0m[0;34m=[0m[0ma

In [46]:
calculate_best_possible_recall(current_anchors, gt_wh)

tensor(1.)

From this, we can see that the current anchor boxes are a good fit for this dataset; which makes sense, as the images are quite similar to those in COCO.

### How does this work?

At this point, you may be wondering, how exactly do we calculate the best possible recall. To answer this, let's go through the process manually.


Intuitively, we would like to ensure that at least one anchor can be matched to each ground truth box. Whilst we could do this by framing it as an optimization problem - how do we match each ground truth box with its optimal anchor - this would introduce a lot of complexity for what we are trying to do.

Given an anchor box, we need a simpler way of measuring how well it can be made to fit a ground truth box. Let's examine one approach that can be taken to do this, starting with the width and height of a single ground truth box.


In [48]:
gt_box_wh = gt_wh[0]; gt_box_wh

array([43.99421122, 34.26317273])

For each anchor box, we can inspect the ratios of its height and width when compared to the height and width of our ground truth target, and use this to understand where the biggest differences are.

In [49]:
current_anchors/gt_box_wh

tensor([[ 0.2728,  0.4670],
        [ 0.4319,  1.0507],
        [ 0.9092,  0.8172],
        [ 0.8183,  2.1889],
        [ 1.7275,  1.6052],
        [ 1.6366,  4.2611],
        [ 3.2277,  3.2104],
        [ 4.3642,  7.0922],
        [10.4332, 11.7035]], dtype=torch.float64)

As the scale of these ratios will depend on whether the anchor box sides are greater or smaller than the sides of our ground truth box, we can ensure that our magnitudes are in the range [0, 1] by also calculating the reciprocal and taking the minimum ratios for each anchor. 


In [50]:
symmetric_size_ratios = torch.min(current_anchors/gt_box_wh, gt_box_wh/current_anchors); symmetric_size_ratios

tensor([[0.2728, 0.4670],
        [0.4319, 0.9518],
        [0.9092, 0.8172],
        [0.8183, 0.4568],
        [0.5789, 0.6230],
        [0.6110, 0.2347],
        [0.3098, 0.3115],
        [0.2291, 0.1410],
        [0.0958, 0.0854]], dtype=torch.float64)

From this, we now have an indication of how well, independently, the width and height of each anchor box 'fits' to our ground truth target. 

Now, our challenge is how to evaluate the matching of the the width and height together!

One way we can approach this is, to take the minimum ratio for each anchor; representing the side that worst matches our ground truth.

In [51]:
worst_side_size_ratio = symmetric_size_ratios.min(-1).values; worst_side_size_ratio

tensor([0.2728, 0.4319, 0.8172, 0.4568, 0.5789, 0.2347, 0.3098, 0.1410, 0.0854],
       dtype=torch.float64)

The reason why we have selected the worst fitting side here, is because we know that the other side matches our target *at least* as well as the one selected; we can think of this as the worst case scenario!

Now, let's select the anchor box which matches the best out of these options, this is simply the largest value.

In [52]:
best_anchor_ratio = worst_side_size_ratio.max(-1).values; best_anchor_ratio

tensor(0.8172, dtype=torch.float64)

Out of the worst fitting options, this is our selected match!


TODO: Ensure this is after the loss function section

Recalling that the loss function only looks to match anchor boxes that are up to 4 times greater or smaller than the size of the ground truth target, we can now verify whether this anchor is within this range and would be considered a successful match.

We can do that as demonstrated below, taking the reciprical of our loss multiple, to ensure that it is in the same range as our value:

In [53]:
LOSS_ANCHOR_MULTIPLE_THRESHOLD

4

In [54]:
best_anchor_ratio > 1. / LOSS_ANCHOR_MULTIPLE_THRESHOLD

tensor(True)

From this, we can see that at least one of our anchors could be successfully matched to our selected ground truth target!

Now that we understand the sequence of steps, we can now apply the same logic to all of our ground truth boxes to see how many matches we can obtain with our current set of anchors:

In [55]:
symmetric_size_ratios = torch.min(current_anchors[None]/gt_wh[:, None],
                                  gt_wh[:, None]/current_anchors[None])
worst_side_size_ratio = symmetric_size_ratios.min(-1).values
best_anchor_ratio = worst_side_size_ratio.max(-1).values

In [56]:
best_anchor_ratio > 1. / LOSS_ANCHOR_MULTIPLE_THRESHOLD

tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, Tr

Now that we have calculated, for each ground truth box, whether it has a match. We can take the mean number of matches to find out best possible recall; in our case, this is 1, as we saw earlier!

In [57]:
best_possible_recall = (best_anchor_ratio > 1. / LOSS_ANCHOR_MULTIPLE_THRESHOLD).float().mean(); best_possible_recall

tensor(1.)

## Selecting new anchor boxes

Whilst using the pre-defined anchors may be a good choice for similar datasets, this may not be appropriate for all datasets, for example, those that contain lots of small objects. In these cases, a better approach may be to select entirely new anchors.

Let's explore how we can do this!

First, let's define the number of anchors that we need for our architecture.

In [81]:
num_anchors = current_anchors.shape[0]; num_anchors

9

Now, based on our bounding boxes, we need to define a sensible set widths and heights of anchor templates. One way that we can estimate this is by using Kmeans to cluster our ground truth aspect ratios, based on the number of anchors that we need. We can then use these centroids as our starting estimates. We can do this using the following function:

In [82]:
from yolov7.anchors import estimate_anchors

In [83]:
??estimate_anchors

[0;31mSignature:[0m [0mestimate_anchors[0m[0;34m([0m[0mnum_anchors[0m[0;34m,[0m [0mgt_wh[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0mestimate_anchors[0m[0;34m([0m[0mnum_anchors[0m[0;34m,[0m [0mgt_wh[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"""[0m
[0;34m    Given a target number of anchors and an array of widths and heights for each bounding box in the dataset,[0m
[0;34m    estimate a set of anchors using the centroids from Kmeans clustering.[0m
[0;34m[0m
[0;34m    :param num_anchors: the number of anchors to return[0m
[0;34m    :param gt_wh: an array of shape [N, 2] representing the width and height of each ground truth bounding box[0m
[0;34m[0m
[0;34m    """[0m[0;34m[0m
[0;34m[0m    [0mprint[0m[0;34m([0m[0;34mf"Running kmeans for {num_anchors} anchors on {len(gt_wh)} points..."[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0mstd_dev[0m [0;34m=[0m [0mgt_wh[0m[0;34m.[0m[0mstd[0m

In [84]:
proposed_anchors = estimate_anchors(num_anchors, gt_wh); proposed_anchors

Running kmeans for 9 anchors on 397 points...


array([[157.29889337,  57.47936534],
       [ 70.37782144,  28.9259909 ],
       [117.67344588,  55.91906451],
       [ 71.90866699,  50.8658278 ],
       [186.28917826,  84.09961313],
       [ 38.3042379 ,  25.61764762],
       [ 56.74676871,  36.96437289],
       [112.83692506,  42.92073289],
       [ 90.20439918,  36.43405811]])

Here, we can see that we now have a set of anchor templates that we can use as a starting point. As before, let's calculate our best possible recall using these anchors:

In [85]:
calculate_best_possible_recall(proposed_anchors, gt_wh)

tensor(1.)

Once again, we see that our best possible recall is 1, which means that these anchors are also a good fit for our problem!

Whilst it is perhaps unnecessary in this case, we may be able improve these anchors further using a [genetic algorithm](https://www.geeksforgeeks.org/genetic-algorithms/). Following this methodology, we can define a *fitness* (or reward) function to measure how well our anchors match our data and make small, random changes to our anchors to try and maximise this function. 

In this case we can define our fitness function as follows:

In [86]:
from yolov7.anchors import anchor_fitness, evolve_anchors

In [87]:
??anchor_fitness

[0;31mSignature:[0m [0manchor_fitness[0m[0;34m([0m[0manchors[0m[0;34m,[0m [0mwh[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0manchor_fitness[0m[0;34m([0m[0manchors[0m[0;34m,[0m [0mwh[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"""[0m
[0;34m    A fitness function that can be used to evolve a set of anchors. This function calculates the mean best anchor ratio[0m
[0;34m    for all matches that are within the multiple range considered during the loss calculation.[0m
[0;34m    """[0m[0;34m[0m
[0;34m[0m    [0mbest_anchor_ratio[0m [0;34m=[0m [0mcalculate_best_anchor_ratio[0m[0;34m([0m[0manchors[0m[0;34m=[0m[0manchors[0m[0;34m,[0m [0mgt_wh[0m[0;34m=[0m[0mwh[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mreturn[0m [0;34m([0m[0;34m[0m
[0;34m[0m        [0mbest_anchor_ratio[0m[0;34m[0m
[0;34m[0m        [0;34m*[0m [0;34m([0m[0mbest_anchor_ratio[0m [0;34m>[0m [0;36m1[0m 

Here, we are taking the best anchor ratio for each match that will be considered during the loss calculation. If an anchor box is more than four times greater or smaller than its matched bounding box, it will not contribute to our score. Let's use this to calculate a fitness score for our proposed anchors:

In [88]:
anchor_fitness(proposed_anchors, gt_wh)

tensor(0.8825, dtype=torch.float64)

 Now, let's use this as the fitness function when optimizing our anchors, as demonstrated below:

In [99]:
evolved_anchors = evolve_anchors(proposed_anchors, gt_wh, anchor_fitness_fn=anchor_fitness, num_iterations=30000); evolved_anchors

Evolving anchors with Genetic Algorithm: fitness = 0.8855: 100%|█| 30000/30000 [00:19<00:00,


array([[156.06907735,  61.09621462],
       [ 66.80622862,  29.0438958 ],
       [136.33063134,  52.56100946],
       [ 80.06700492,  36.42821897],
       [179.52356295,  83.0822995 ],
       [ 37.64336168,  29.35155407],
       [ 52.00907081,  37.50677223],
       [114.96001811,  44.23679448],
       [ 99.66602472,  37.86939469]])

Inspecting the definition of this function, we can see that, for a specified number of iterations, we are simply sampling random noise from a normal distribution and using this to mutate our anchors. If this change leads to an increased score, we keep these as our anchors! 


In [79]:
??evolve_anchors

[0;31mSignature:[0m
[0mevolve_anchors[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mproposed_anchors[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgt_wh[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnum_iterations[0m[0;34m=[0m[0;36m1000[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmutation_probability[0m[0;34m=[0m[0;36m0.9[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmutation_noise_mean[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmutation_noise_std[0m[0;34m=[0m[0;36m0.1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0manchor_fitness_fn[0m[0;34m=[0m[0;34m<[0m[0mfunction[0m [0manchor_fitness[0m [0mat[0m [0;36m0x7f17b52be5f0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0mevolve_anchors[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mproposed_anchors[0m[0;34m,[0m[0;34m[0m


Let's see whether this has improved our score at all:

In [98]:
anchor_fitness(evolved_anchors, gt_wh)

tensor(0.8876, dtype=torch.float64)

We can see that our evolved anchors have a better fitness score than our original proposed anchors, as we would expect!

Now, all that is left to do is to sort the anchors into a rough ascending order, considering the smallest dimension for each anchor.

In [101]:
evolved_anchors = torch.as_tensor(evolved_anchors)[torch.sort(torch.as_tensor(evolved_anchors.min(-1))).indices]

In [103]:
calculate_best_possible_recall(evolved_anchors, gt_wh)

tensor(1.)

## Putting it all together

Now that we understand the process, we could calculate our anchors for our dataset in a single step using the following function.

In [104]:
from yolov7.anchors import calculate_anchors

In [105]:
calculate_anchors(current_anchors, image_sizes, gt_wh, target_image_size=640, best_possible_recall_threshold=0.98)

Best Possible Recall (BPR) = 1.0000

tensor([[ 12.,  16.],
        [ 19.,  36.],
        [ 40.,  28.],
        [ 36.,  75.],
        [ 76.,  55.],
        [ 72., 146.],
        [142., 110.],
        [192., 243.],
        [459., 401.]])

In this case, as our best possible recall is already greater than the threshold, we can keep our original anchors!