In [1]:
import sys
sys.path.append('../../30_data_tools/')

In [2]:
import torch
from pathlib import Path
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from ultralytics import YOLO
from helper import load_dotenv
import pandas as pd
import shutil
from tqdm.auto import tqdm

In [3]:
dotenv = load_dotenv()

In [4]:
resnet_path = dotenv['MODEL_DIR'] / '24-02-24_01_resNet.pt'
yolo_path = dotenv['MODEL_DIR'] / 'yolov8m_moires_24-02-24.pt'

In [5]:
resnet = torch.load( resnet_path )
yolo = YOLO(yolo_path)

In [12]:
resnet.eval()
transforms_data = transforms.Compose([
    transforms.Resize((224, 224)),   #must same as here
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) # normalization
])

In [30]:
moire_paths = list(Path('./dataset/val/moire/').glob('./*.jpg')) + list(Path('./dataset/real_val/moire/').glob('./*.jpg'))
non_moire_paths = list(Path('./dataset/val/no_moire/').glob('./*.jpg')) + list(Path('./dataset/real_val/no_moire/').glob('./*.jpg'))

In [10]:
def predict_resnet( img_paths, model, transformation ):
    input_tensor = torch.stack([
        transforms_data(Image.open(img_path))
        for img_path in img_paths
    ])       
    output_tensor = model(input_tensor)

    out = output_tensor.tolist()
    for line in out:
        line.reverse()
        
    return out

In [9]:
def predict_yolo( img_paths, model ):
    images = [
        Image.open(img_path) for img_path
        in img_paths
    ]

    pred = model.predict(images, verbose=False)

    out = [p.probs.data.tolist() for p in pred]
    
    for line in out:
        line.reverse()

    return out

In [31]:
data = pd.DataFrame(
    [(t.name.replace(t.suffix,''),t,0) for t in non_moire_paths] + [(t.name.replace(t.suffix,''),t,1) for t in moire_paths],
    columns=['filename','filepath','target_label']
)

data.loc[
    :,
    ['resnet_no_moire','resnet_moire','yolo_no_moire','yolo_moire']
] = np.nan

In [32]:
with tqdm(total=data.shape[0]) as pbar:
    while data.loc[pd.isna(data.resnet_no_moire)].shape[0] > 0:
        remaining = data.loc[pd.isna(data.resnet_no_moire)].shape[0]
        n = 15 if remaining > 15 else remaining
        sample = data.loc[pd.isna(data.resnet_no_moire)].sample(n=n)
        yolo_result = predict_yolo( sample.filepath.tolist(), yolo )
        resnet_result = predict_resnet( sample.filepath.tolist(), resnet, transforms_data )
        
        data.loc[
            sample.index,
            ['resnet_no_moire','resnet_moire','yolo_no_moire','yolo_moire']
        ] = [
            (resnet_result[i][0],resnet_result[i][1],yolo_result[i][0],yolo_result[i][1])
            for i in range(len(resnet_result))
        ]

        remaining_after = data.loc[pd.isna(data.resnet_no_moire)].shape[0]
        pbar.update(remaining - remaining_after)

  0%|          | 0/4490 [00:00<?, ?it/s]

In [33]:
data.loc[
    :,
    'resnet_label'
] = (data.resnet_moire > data.resnet_no_moire).astype('uint8')

data.loc[
    :,
    'yolo_label'
] = (data.yolo_moire > data.yolo_no_moire).astype('uint8')

In [44]:
data.loc[
    :,
    'dataset'
] = data.filepath.apply(lambda fpath: fpath.parent.parent.name )

In [34]:
data.shape

(4490, 9)

In [37]:
def get_metrics( data, target_model ):
    metrics = {}
    
    metrics['TP'] = data.loc[
        (data.target_label == 1) & (data[f'{ target_model }_label'] == 1)
    ].shape[0]
    metrics['TN'] = data.loc[
        (data.target_label == 0) & (data[f'{ target_model }_label'] == 0)
    ].shape[0]
    metrics['FP'] = data.loc[
        (data.target_label == 0) & (data[f'{ target_model }_label'] == 1)
    ].shape[0]
    metrics['FN'] = data.loc[
        (data.target_label == 1) & (data[f'{ target_model }_label'] == 0)
    ].shape[0]

    metrics['accuracy'] = (metrics['TP'] + metrics['TN']) / (metrics['TP'] + metrics['TN'] + metrics['FP'] + metrics['FN'])
    metrics['recall'] = (metrics['TP']) / (metrics['TP'] + metrics['FN'])
    metrics['precision'] = (metrics['TP']) / (metrics['TP'] + metrics['FP'])

    return metrics

In [51]:
get_metrics(
    data.loc[
        data.dataset == 'val'
    ],
    'yolo'
)

{'TP': 222,
 'TN': 286,
 'FP': 15,
 'FN': 51,
 'accuracy': 0.8850174216027874,
 'recall': 0.8131868131868132,
 'precision': 0.9367088607594937}

In [52]:
get_metrics(
    data.loc[
        data.dataset == 'val'
    ],
    'resnet'
)

{'TP': 243,
 'TN': 259,
 'FP': 42,
 'FN': 30,
 'accuracy': 0.8745644599303136,
 'recall': 0.8901098901098901,
 'precision': 0.8526315789473684}

In [27]:
sets_out = {
    'both_right' : data.loc[
        (data.target_label == data.resnet_label) &
        (data.target_label == data.yolo_label)
    ],
    'resnet_right' : data.loc[
        (data.target_label == data.resnet_label) &
        (data.target_label != data.yolo_label)
    ],
    'yolo_right' : data.loc[
        (data.target_label != data.resnet_label) &
        (data.target_label == data.yolo_label)
    ],
    'both_wrong' : data.loc[
        (data.target_label != data.resnet_label) &
        (data.target_label != data.yolo_label)
    ]
}

In [None]:
data.iloc[0]

In [29]:
(467 + 35 + 41) / (467 + 35 + 41 + 31)

0.945993031358885

In [28]:
for set_name in sets_out:
    print( set_name, sets_out[set_name].shape[0] )

both_right 467
resnet_right 35
yolo_right 41
both_wrong 31


In [None]:
for set_name in tqdm(sets_out):
    set_dir = Path(f'./temp/{ set_name }')

    if set_dir.exists() == False:
        set_dir.mkdir()

    for i in range(sets_out[set_name].shape[0]):
        row = sets_out[set_name].iloc[i]

        in_path = row.filepath
        type_name = 'moire' if row.target_label == 1 else 'no_moire'
        out_dir = set_dir / type_name

        if out_dir.exists() == False:
            out_dir.mkdir()
        
        out_path = out_dir / in_path.name

        shutil.copy(
            in_path,
            out_path
        )