In [3]:
from transformers import (
    ViTForImageClassification,
    ViTFeatureExtractor,
    ViTImageProcessor,
)

from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)

import torchvision.transforms as trans

import torch

from PIL import Image

In [4]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, jaccard_score, confusion_matrix

In [5]:
import os
import shutil
import math

In [6]:
# Create the image processor
model_name_or_path: str = 'google/vit-base-patch16-224-in21k'
cache_dir: str = None
model_revision: str = 'main'
use_auth_token: bool = False

image_processor = ViTImageProcessor.from_pretrained(
    model_name_or_path,
    cache_dir=cache_dir,
    revision=model_revision,
    use_auth_token=use_auth_token,
)

# Define torchvision transforms to be applied to each image.
if "shortest_edge" in image_processor.size:
    size = image_processor.size["shortest_edge"]
else:
    size = (image_processor.size["height"], image_processor.size["width"])

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)

_test_transforms = Compose(
    [
        Resize(size),
        CenterCrop(size),
        ToTensor(),
        normalize,
    ]
)

In [7]:
def get_model(model_path):
    model = ViTForImageClassification.from_pretrained(model_path, local_files_only=True)
    return model

In [15]:
def compute_logits(model, filename, transforms=_test_transforms):
    image = Image.open(filename)
    
    if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info):
        image = image.convert('RGB')

    processed = image_processor(image)
    processed.pixel_values = transforms(image.convert('RGB'))
    outputs = model(torch.reshape(processed.pixel_values, (1, 3, 224, 224)))

    # obtain the class
    return outputs.logits

In [9]:
def make_prediction(model, filename):
    logits = compute_logits(model, filename)

    prediction = logits.argmax(-1)
    
    return model.config.id2label[prediction.item()]

In [10]:
def compute_model_performance(model_dir, bread_test_dir='test/bread',
                              not_bread_test_dir='test/not_bread'):
    model_results: list[str,str,str] = [
        #path, predicted_class, expected_class
    ]
    model = get_model(model_dir)

    if bread_test_dir:
        for dirpath, _, filenames in os.walk(bread_test_dir):
            for filename in filenames:
                path = os.path.join(dirpath, filename)
                prediction = make_prediction(model, path)
                model_results.append((path, prediction, 'bread'))

    if not_bread_test_dir:
        for dirpath, _, filenames in os.walk(not_bread_test_dir):
            for filename in filenames:
                path = os.path.join(dirpath, filename)
                prediction = make_prediction(model, path)
                model_results.append((path, prediction, 'not_bread'))
            
    true_labels = [res[2] for res in model_results]
    pred_labels = [res[1] for res in model_results]

    # Compute accuracy, precision, recall, and Jaccard score
    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, pos_label='bread')
    recall = recall_score(true_labels, pred_labels, pos_label='bread')
    jaccard = jaccard_score(true_labels, pred_labels, pos_label='bread')

    # Print the results
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("Jaccard score:", jaccard)
    
    print(f'Examples of bread:     {len([e for e in true_labels if e == "bread"])}')
    print(f'Examples of not bread: {len([e for e in true_labels if e == "not_bread"])}')
    
    cm = confusion_matrix(true_labels, pred_labels)
    print(cm)
    
    return model_results

In [11]:
def group_results_dir(results):
    false_negatives = [f for f, p, a in results if p != a]
    dirname = 'false_negatives'

    if not os.path.isdir(dirname):
        os.mkdir(dirname)

    for file in false_negatives:
        new_path = os.path.join(dirname, f'retrain_{os.path.basename(file)}')
        shutil.copy(file, new_path)
        
        
    dirname = 'positives'

    bread = [f for f, p, a in results if p == a and p == 'bread']

    if not os.path.isdir(dirname):
        os.mkdir(dirname)

    for file in bread:
        new_path = os.path.join(dirname, f'retrain_{os.path.basename(file)}')
        shutil.copy(file, new_path)
        
        
    dirname = 'true_negatives'

    true_negatives = [f for f, p, a in results if p == a and a != 'bread']

    if not os.path.isdir(dirname):
        os.mkdir(dirname)

    for file in true_negatives:
        new_path = os.path.join(dirname, f'retrain_{os.path.basename(file)}')
        shutil.copy(file, new_path)

In [12]:
def evaluate_test_data(model_name):
    compute_model_performance(model_name, bread_test_dir='data/test', not_bread_test_dir=None)
    
    abs_diff = lambda t: abs(t.tolist()[0][1] - t.tolist()[0][0])

    for dirpath, _, filenames in os.walk('data/test'):
        for filename in filenames:
            path = os.path.join(dirpath, filename)
            tensor = compute_logits(get_model(model_name), path, transforms=_test_transforms)
            print(path, tensor, abs_diff(tensor))

## Access the original model

In [8]:
original_model_results = compute_model_performance('outputs')

ValueError: Unable to infer channel dimension format

## Access Kesley's Model
#### The data was just as impure, but had a few more examples of bread

In [None]:
kesley_model_results = compute_model_performance('kesley_2070_output1')

## Access Ray's Model
#### For this model he cleaned up the data.  A few additional bread examples.

In [10]:
ray_model_results = compute_model_performance('ray_output1')

Accuracy: 0.767487684729064
Precision: 1.0
Recall: 0.7086419753086419
Jaccard score: 0.7086419753086419
Examples of bread:     810
Examples of not bread: 205
[[574 236]
 [  0 205]]


In [38]:
group_results_dir(ray_model_results)

## Access the first 'tuned' model
#### This model was trained against the additional true positives and false negatives from the previous analysis

In [22]:
tuned_model_results = compute_model_performance('tuned_model')

Accuracy: 0.9251231527093596
Precision: 1.0
Recall: 0.9061728395061729
Jaccard score: 0.9061728395061729
Examples of bread:     810
Examples of not bread: 205
[[734  76]
 [  0 205]]


In [None]:
group_results_dir(tuned_model_results)

### Test the first 'tuned' model against hand taken pictures of bread.

In [26]:
compute_model_performance('tuned_model', bread_test_dir='data/test', not_bread_test_dir=None)

Accuracy: 0.0
Precision: 0.0
Recall: 0.0
Jaccard score: 0.0
Examples of bread:     6
Examples of not bread: 0
[[0 6]
 [0 0]]


  _warn_prf(average, modifier, msg_start, len(result))


[('data/test/ben_bread1.jpg', 'not_bread', 'bread'),
 ('data/test/ben_bread4.jpg', 'not_bread', 'bread'),
 ('data/test/ben_bread3.jpg', 'not_bread', 'bread'),
 ('data/test/ben_bread2.jpg', 'not_bread', 'bread'),
 ('data/test/ben_bread6.jpg', 'not_bread', 'bread'),
 ('data/test/ben_bread5.jpg', 'not_bread', 'bread')]

### Investigate how confident the model is in its decisions

In [28]:
evaluate_test_data('tuned_model')

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0
Precision: 0.0
Recall: 0.0
Jaccard score: 0.0
Examples of bread:     6
Examples of not bread: 0
[[0 6]
 [0 0]]
data/test/ben_bread1.jpg tensor([[-3.9181,  3.7298]], grad_fn=<AddmmBackward0>) 7.647916555404663
data/test/ben_bread4.jpg tensor([[-4.6839,  4.6427]], grad_fn=<AddmmBackward0>) 9.326653480529785
data/test/ben_bread3.jpg tensor([[-4.6632,  4.5624]], grad_fn=<AddmmBackward0>) 9.225627422332764
data/test/ben_bread2.jpg tensor([[-5.2045,  5.0825]], grad_fn=<AddmmBackward0>) 10.287071704864502
data/test/ben_bread6.jpg tensor([[-5.1308,  5.0473]], grad_fn=<AddmmBackward0>) 10.17812967300415
data/test/ben_bread5.jpg tensor([[-5.2100,  5.0623]], grad_fn=<AddmmBackward0>) 10.272348880767822


## Investigate performance of model which applied various transformations to the training data

In [15]:
tuned_model_results = compute_model_performance('transform_test1')

Accuracy: 0.9280788177339901
Precision: 1.0
Recall: 0.9098765432098765
Jaccard score: 0.9098765432098765
Examples of bread:     810
Examples of not bread: 205
[[737  73]
 [  0 205]]


In [29]:
evaluate_test_data('transform_test1')

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0
Precision: 0.0
Recall: 0.0
Jaccard score: 0.0
Examples of bread:     6
Examples of not bread: 0
[[0 6]
 [0 0]]
data/test/ben_bread1.jpg tensor([[-4.1048,  4.0504]], grad_fn=<AddmmBackward0>) 8.155181884765625
data/test/ben_bread4.jpg tensor([[-4.6098,  4.4740]], grad_fn=<AddmmBackward0>) 9.083780288696289
data/test/ben_bread3.jpg tensor([[-3.6970,  3.6155]], grad_fn=<AddmmBackward0>) 7.312503099441528
data/test/ben_bread2.jpg tensor([[-4.0864,  3.9890]], grad_fn=<AddmmBackward0>) 8.075334787368774
data/test/ben_bread6.jpg tensor([[-4.2918,  4.2009]], grad_fn=<AddmmBackward0>) 8.492727756500244
data/test/ben_bread5.jpg tensor([[-4.6298,  4.4756]], grad_fn=<AddmmBackward0>) 9.105485439300537


## Evaluate model with more robust transforms

In [11]:
tuned_model_results = compute_model_performance('transform_test2')

Accuracy: 0.9310344827586207
Precision: 1.0
Recall: 0.9135802469135802
Jaccard score: 0.9135802469135802
Examples of bread:     810
Examples of not bread: 205
[[740  70]
 [  0 205]]


In [12]:
evaluate_test_data('transform_test2')

Accuracy: 0.5
Precision: 1.0
Recall: 0.5
Jaccard score: 0.5
Examples of bread:     40
Examples of not bread: 0
[[20 20]
 [ 0  0]]
data/test/DALL·E 2023-04-16 19.25.02.png tensor([[-2.3647,  2.1788]], grad_fn=<AddmmBackward0>) 4.543483734130859
data/test/DALL·E 2023-04-16 19.24.31 - Amateur quality picture of store bought bread .png tensor([[ 3.2896, -2.9382]], grad_fn=<AddmmBackward0>) 6.227869033813477
data/test/DALL·E 2023-04-16 19.24.39 - Amateur quality picture of store bought bread .png tensor([[ 3.5672, -3.3575]], grad_fn=<AddmmBackward0>) 6.924655437469482
data/test/DALL·E 2023-03-27 18.25.43.png tensor([[ 3.5594, -3.3478]], grad_fn=<AddmmBackward0>) 6.907146215438843
data/test/DALL·E 2023-04-16 19.27.31.png tensor([[ 2.0394, -1.8505]], grad_fn=<AddmmBackward0>) 3.8899019956588745
data/test/ben_bread1.jpg tensor([[-4.1550,  3.9975]], grad_fn=<AddmmBackward0>) 8.152436256408691
data/test/DALL·E 2023-04-16 19.24.23 - Amateur quality picture of wonderbread .png tensor([[ 2.7590, -2

In [18]:
evaluate_test_data('transform_gan1')

Accuracy: 0.6153846153846154
Precision: 1.0
Recall: 0.6153846153846154
Jaccard score: 0.6153846153846154
Examples of bread:     52
Examples of not bread: 0
[[32 20]
 [ 0  0]]
data/test/DALL·E 2023-04-16 19.25.02.png tensor([[ 2.8928, -2.5842]], grad_fn=<AddmmBackward0>) 5.476934432983398
data/test/494.png tensor([[-4.0470,  3.7243]], grad_fn=<AddmmBackward0>) 7.771250247955322
data/test/DALL·E 2023-04-16 19.24.31 - Amateur quality picture of store bought bread .png tensor([[ 3.1003, -2.8235]], grad_fn=<AddmmBackward0>) 5.923765659332275
data/test/DALL·E 2023-04-16 19.24.39 - Amateur quality picture of store bought bread .png tensor([[ 3.6781, -3.3804]], grad_fn=<AddmmBackward0>) 7.058478116989136
data/test/DALL·E 2023-03-27 18.25.43.png tensor([[ 3.6981, -3.3784]], grad_fn=<AddmmBackward0>) 7.076551675796509
data/test/DALL·E 2023-04-16 19.27.31.png tensor([[ 3.1161, -2.8003]], grad_fn=<AddmmBackward0>) 5.916431188583374
data/test/ben_bread1.jpg tensor([[-2.3267,  2.0766]], grad_fn=<Add

In [16]:
tuned_model_results = compute_model_performance('transform_gan2')

Accuracy: 0.9399014778325123
Precision: 0.9986684420772304
Recall: 0.9259259259259259
Jaccard score: 0.9247842170160296
Examples of bread:     810
Examples of not bread: 205
[[750  60]
 [  1 204]]


In [17]:
evaluate_test_data('transform_gan2')

Accuracy: 0.7884615384615384
Precision: 1.0
Recall: 0.7884615384615384
Jaccard score: 0.7884615384615384
Examples of bread:     52
Examples of not bread: 0
[[41 11]
 [ 0  0]]
data/test/DALL·E 2023-04-16 19.25.02.png tensor([[ 3.2798, -3.0134]], grad_fn=<AddmmBackward0>) 6.293270826339722
data/test/494.png tensor([[-3.6405,  3.3382]], grad_fn=<AddmmBackward0>) 6.978690147399902
data/test/DALL·E 2023-04-16 19.24.31 - Amateur quality picture of store bought bread .png tensor([[ 3.2421, -2.9930]], grad_fn=<AddmmBackward0>) 6.235102415084839
data/test/DALL·E 2023-04-16 19.24.39 - Amateur quality picture of store bought bread .png tensor([[ 3.4635, -3.1351]], grad_fn=<AddmmBackward0>) 6.598620891571045
data/test/DALL·E 2023-03-27 18.25.43.png tensor([[ 3.5058, -3.1913]], grad_fn=<AddmmBackward0>) 6.697089433670044
data/test/DALL·E 2023-04-16 19.27.31.png tensor([[ 3.2895, -2.9680]], grad_fn=<AddmmBackward0>) 6.257474184036255
data/test/ben_bread1.jpg tensor([[-2.0123,  1.8067]], grad_fn=<Add

In [21]:
tuned_model_results = compute_model_performance('transform_gan2', bread_test_dir='data/test', not_bread_test_dir=None)
group_results_dir(tuned_model_results)

Accuracy: 0.7884615384615384
Precision: 1.0
Recall: 0.7884615384615384
Jaccard score: 0.7884615384615384
Examples of bread:     52
Examples of not bread: 0
[[41 11]
 [ 0  0]]
