In [1]:
import torch
import torchvision
print(torch.__version__)
print(torchvision.__version__)

2.0.1+cu118
0.15.2+cu118


In [2]:
device = 'cuda' if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
pretrained_vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT

pretrianed_vit = torchvision.models.vit_b_16(weights=pretrained_vit_weights).to(device)

In [6]:
import torch.nn as nn
# for parameter in pretrianed_vit.parameters():
#     parameter.requires_grad = False

pretrianed_vit.heads = nn.Linear(in_features=768, out_features=2).to(device)

In [7]:
### Preparing data for vit model
from pathlib import Path

# Setting up dataset path
data_path = Path('data')
# Setup train val data
train_dir = data_path / 'train'
val_dir = data_path / 'val'
test_dir = data_path / 'test'

train_dir, val_dir, test_dir

(PosixPath('data/train'), PosixPath('data/val'), PosixPath('data/test'))

In [8]:
# Transform the data
from torch.utils.data import DataLoader
from torchvision import transforms


# Data augmentation we will do
vit_transforms = pretrained_vit_weights.transforms()


train_transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=(0, 90)),
    vit_transforms
])


# Implementing test transforms with basic augmentation
test_transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    vit_transforms
])


In [9]:
# Import dataset
from torchvision import datasets

train_data = datasets.ImageFolder(train_dir, transform=train_transform)
val_data = datasets.ImageFolder(val_dir, transform=test_transform)
test_data = datasets.ImageFolder(test_dir, transform=test_transform)

class_names = train_data.classes

In [10]:
# Turning Dataset into dataloaders
from torch.utils.data import DataLoader

# Setup batch size and number of workers 
BATCH_SIZE = 16
NUM_WORKERS = os.cpu_count()

train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

In [27]:
pretrianed_vit = torch.nn.DataParallel(pretrianed_vit)
pretrianed_vit.load_state_dict(torch.load('runs/pretrained_vit_b16/20231127-184722/models/best.pt'), strict=False)

_IncompatibleKeys(missing_keys=['module.class_token', 'module.conv_proj.weight', 'module.conv_proj.bias', 'module.encoder.pos_embedding', 'module.encoder.layers.encoder_layer_0.ln_1.weight', 'module.encoder.layers.encoder_layer_0.ln_1.bias', 'module.encoder.layers.encoder_layer_0.self_attention.in_proj_weight', 'module.encoder.layers.encoder_layer_0.self_attention.in_proj_bias', 'module.encoder.layers.encoder_layer_0.self_attention.out_proj.weight', 'module.encoder.layers.encoder_layer_0.self_attention.out_proj.bias', 'module.encoder.layers.encoder_layer_0.ln_2.weight', 'module.encoder.layers.encoder_layer_0.ln_2.bias', 'module.encoder.layers.encoder_layer_0.mlp.0.weight', 'module.encoder.layers.encoder_layer_0.mlp.0.bias', 'module.encoder.layers.encoder_layer_0.mlp.3.weight', 'module.encoder.layers.encoder_layer_0.mlp.3.bias', 'module.encoder.layers.encoder_layer_1.ln_1.weight', 'module.encoder.layers.encoder_layer_1.ln_1.bias', 'module.encoder.layers.encoder_layer_1.self_attention.in

In [28]:
from pytorch_trainer.engine import test
optimizer = torch.optim.Adam(pretrianed_vit.parameters(),
                            lr=1e-3,
                            betas=(0.9,0.999),
                            weight_decay=0.1)
loss_fn = torch.nn.CrossEntropyLoss()

test_hash = test(model=pretrianed_vit, test_dataloader=test_dataloader,
                             loss_fn=loss_fn, device=device)

Making predictions: 100%|██████████| 1/1 [00:01<00:00,  1.23s/it]


In [29]:
from datetime import datetime
meta = {'epochs': 10, 'val_acc': None,
                    'test_acc': f'{test_hash["test_accuracy"]*100:.2f}%',
                    'test_loss': test_hash['test_loss'],
                    'date': datetime.now().isoformat()}

In [48]:
from sklearn.metrics import classification_report
generated_report = classification_report(test_dataloader.dataset.targets,test_hash['prediction_tensors'].numpy(), target_names=class_names, output_dict=True)

In [49]:
print(generated_report)

{'missing': {'precision': 0.7272727272727273, 'recall': 0.8, 'f1-score': 0.761904761904762, 'support': 10.0}, 'present': {'precision': 0.6, 'recall': 0.5, 'f1-score': 0.5454545454545454, 'support': 6.0}, 'accuracy': 0.6875, 'macro avg': {'precision': 0.6636363636363636, 'recall': 0.65, 'f1-score': 0.6536796536796536, 'support': 16.0}, 'weighted avg': {'precision': 0.6795454545454546, 'recall': 0.6875, 'f1-score': 0.6807359307359307, 'support': 16.0}}


In [50]:
generated_report['missing']

{'precision': 0.7272727272727273,
 'recall': 0.8,
 'f1-score': 0.761904761904762,
 'support': 10.0}

In [51]:
generated_report['present']

{'precision': 0.6,
 'recall': 0.5,
 'f1-score': 0.5454545454545454,
 'support': 6.0}

In [52]:
generated_report['macro avg']

{'precision': 0.6636363636363636,
 'recall': 0.65,
 'f1-score': 0.6536796536796536,
 'support': 16.0}

In [53]:
generated_report['weighted avg']

{'precision': 0.6795454545454546,
 'recall': 0.6875,
 'f1-score': 0.6807359307359307,
 'support': 16.0}

In [55]:
generated_report.values()

dict_values([{'precision': 0.7272727272727273, 'recall': 0.8, 'f1-score': 0.761904761904762, 'support': 10.0}, {'precision': 0.6, 'recall': 0.5, 'f1-score': 0.5454545454545454, 'support': 6.0}, 0.6875, {'precision': 0.6636363636363636, 'recall': 0.65, 'f1-score': 0.6536796536796536, 'support': 16.0}, {'precision': 0.6795454545454546, 'recall': 0.6875, 'f1-score': 0.6807359307359307, 'support': 16.0}])

In [77]:
generated_report['accuracy']

0.6875

In [57]:
metrics = generated_report

In [58]:
keys, vals = list(metrics.keys()), list(metrics.values())

In [96]:
header_exist = False
for key in keys:
    if key == 'accuracy':
        continue
    row_heads = list(metrics[key].keys())
    row_values = list(metrics[key].values())
    n=len(row_heads) + 1
    s = '' if header_exist else (('%23s,' * n % tuple(['class'] + row_heads)).rstrip(',') + '\n')  # header
    header_exist = True
    with open('test.csv', 'a') as f:
                f.write(s + ('%23s,' * n % tuple([key] + row_values)).rstrip(',') + '\n')
with open('test.csv', 'a') as f:
    f.write(('%23s,' * n % tuple(['accuracy'] + [str(metrics['accuracy'])] + [' ' for i in range(n-2)])).rstrip(',') + '\n')

'               accuracy,                 0.6875,                       ,                       ,                       ,'

In [75]:
metrics.items()

dict_items([('missing', {'precision': 0.7272727272727273, 'recall': 0.8, 'f1-score': 0.761904761904762, 'support': 10.0}), ('present', {'precision': 0.6, 'recall': 0.5, 'f1-score': 0.5454545454545454, 'support': 6.0}), ('accuracy', 0.6875), ('macro avg', {'precision': 0.6636363636363636, 'recall': 0.65, 'f1-score': 0.6536796536796536, 'support': 16.0}), ('weighted avg', {'precision': 0.6795454545454546, 'recall': 0.6875, 'f1-score': 0.6807359307359307, 'support': 16.0})])

In [76]:
for k, v in metrics.items():
    print(k)

missing
present
accuracy
macro avg
weighted avg
