## **To perform the $\chi^2$ test. TP, FP and FN frequencies are computed from validation data**

**Load best model**

In [220]:
from helper_functions import *
from collections import Counter

def load_model(MODEL_NAME):
    # Load the trained model
    model = get_object_detection_model(num_classes=4)
    # Load best version of model (lowest val loss)
    model.load_state_dict(torch.load(f"runs/{MODEL_NAME}/best.pth"))
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    return model

model = load_model("202404211431_FasterRCNN_960")
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

**Load validation data**

In [221]:
DATA_FOLDER = r'./fashion_v1/yolo'
CLASSES = ['bg', 'Hole', 'Stain', 'TUD']
IMG_WIDTH = 1280
IMG_HEIGHT = 720

val_dataset = FashionDataset(
    data_folder=DATA_FOLDER,
    split='val',
    width=IMG_WIDTH, height=IMG_HEIGHT,
    transform = A.Compose([
        ToTensorV2(p=1.0)
        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}))

print(f"Validation data has {len(val_dataset)} sample{'s' if len(val_dataset)>1 else ''}.")

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=4, shuffle=False, num_workers=4,
    collate_fn=utils.utils.collate_fn)

def transform_target(dict_):
    res = dict(
        boxes=dict_['boxes'].detach().cpu().numpy(),
        labels=dict_['labels'].detach().cpu().numpy()
    )
    return res

Validation data has 1000 samples.


**Extract predictions**

In [222]:
met = ValidationMetric(model, val_loader, device)
gt = met.get_ground_truth()
gt = [transform_target(target) for target in gt]
preds = met.get_prediction()

**Filter predictions from thresholds**

In [223]:
predictions = copy.copy(preds)
predictions = [apply_nms(pred, iou_thresh=0.4) for pred in predictions]
predictions = [apply_threshold(pred, threshold=0.55) for pred in predictions]
predictions = [transform_target(p) for p in predictions]

**Find TP, FP, FN for the diffrent classes**

In [224]:
tp, fp, fn = calculate_tp_fp_fn(predictions, gt, iou_threshold=0.25, type="class")

The classes are encoded as:
- 1 = hole
- 2 = stain
- 3 = TUD

**TP**

In [225]:
print(tp)
sum(tp.values())

{1: 6, 2: 31, 3: 54}


91

**TP**

In [226]:
print(fp)
sum(fp.values())

{1: 15, 2: 314, 3: 25}


354

**FN**

In [227]:
print(fn)
sum(fn.values())

{1: 11, 2: 72, 3: 82}


165

**Check that the number of predictions and gt march, ensuring the extraction worked correctly**

In [228]:
# Check that the number of TP, FP and FN are correct
print(f"There are {(sum(tp.values())+sum(fp.values()))} predictions. There should be {sum(len(pred['labels']) for pred in predictions)} predictions, ok? {sum(tp.values())+sum(fp.values()) == sum(len(pred['labels']) for pred in predictions)}")
print(f"There are {(sum(tp.values())+sum(fn.values()))} ground truth. There should be {sum(len(gt['labels']) for gt in gt)} ground truth, ok? {sum(tp.values())+sum(fn.values()) == sum(len(gt['labels']) for gt in gt)}")

There are 445 predictions. There should be 445 predictions, ok? True
There are 256 ground truth. There should be 256 ground truth, ok? True


**Find TP, FP, FN for the diffrent sizes**

In [229]:
tp, fp, fn = calculate_tp_fp_fn(predictions, gt, iou_threshold=0.25, type="size")

**TP**

In [230]:
print(tp)
sum(tp.values())

{'small': 75, 'medium': 13, 'large': 3}


91

**FP**

In [231]:
print(fp)
sum(fp.values())

{'small': 179, 'medium': 120, 'large': 55}


354

**FN**

In [232]:
print(fn)
sum(fn.values())

{'small': 128, 'medium': 26, 'large': 11}


165

**Check that the number of predictions and gt march, ensuring the extraction worked correctly**

In [233]:
# Check that the number of TP, FP and FN are correct
print(f"There are {(sum(tp.values())+sum(fp.values()))} predictions. There should be {sum(len(pred['labels']) for pred in predictions)} predictions, ok? {sum(tp.values())+sum(fp.values()) == sum(len(pred['labels']) for pred in predictions)}")
print(f"There are {(sum(tp.values())+sum(fn.values()))} ground truth. There should be {sum(len(gt['labels']) for gt in gt)} ground truth, ok? {sum(tp.values())+sum(fn.values()) == sum(len(gt['labels']) for gt in gt)}")

There are 445 predictions. There should be 445 predictions, ok? True
There are 256 ground truth. There should be 256 ground truth, ok? True
