# GI Classifiers

Done by:

1. Luis David Solano Santamaría
2. Angie Solís Manzano
3. Emilia Víquez Mora

## Environment

In [31]:
import torch
import torchvision

from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch import nn
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score #TODO(us): Add other metrics

from vit import VitBase16

## Functions required

In [32]:
def train_vit(model, train_loader, epochs, lr, device):
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    
    dictionary = {}

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        all_preds = []
        all_labels = []

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_preds)
        recall = recall_score(all_labels, all_preds, average='macro') # TODO(us): find which is the best average suited to our problem
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}')
        print(f'\tAccuracy: {accuracy*100:.2f}%')
        print(f'\tRecall: {recall*100:.2f}%')

    dictionary['accuracy'] = accuracy
    dictionary['recall'] = recall

    return dictionary

In [33]:
def test_vit(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    dictionary = {}

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds, average='macro')
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    print(f"Test Recall: {recall * 100:.2f}%")
    dictionary['accuracy'] = accuracy
    dictionary['recall'] = recall

    return dictionary

In [34]:
def train_test_vit(path, num_classes, device, transform, batch_size, epochs, lr):
    vit_model = VitBase16(num_classes=num_classes, device=device).to(device)

    train_dataset = datasets.ImageFolder(f'../cross_splitted/{path}/train', transform=transform)
    test_dataset = datasets.ImageFolder(f'../cross_splitted/{path}/test', transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    dictionary = {}
    dictionary['train'] = train_vit(vit_model, train_loader, device=device, epochs=epochs, lr=lr)
    dictionary['test'] = test_vit(vit_model, test_loader, device=device)

    return dictionary

In [35]:
def cross_validate_vit(k):
    values = []
    for i in range(1, 6):
        print(f'Training and Testing #{i}')
        values.append(train_test_vit(f'fold_{i}'))


    metrics = ['accuracy', 'recall']
    average_train = [0] * len(metrics)
    average_test = [0] * len(metrics)

    for j in range(len(metrics)):
        for i in range(len(values)):    
            average_train[j] += values[i]['train'][metrics[j]]
            average_test[j] += values[i]['test'][metrics[j]]
        average_train[j] /= k
        average_test[j] /= k
        print(f'\nAverage {metrics[j]} for training: {average_train[j]}')
        print(f'\nAverage {metrics[j]} for testing: {average_test[j]}')

## Train ViT

In [36]:
IMAGE_SIZE = 224  
NUM_CLASSES = 5
LR = 0.0001  
BATCH_SIZE = 32  
EPOCHS = 4
REPETITIONS = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device {DEVICE}')

transform = transforms.Compose(
    [
        transforms.Resize(256, interpolation=torchvision.transforms.InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

Using device cuda


First, we'll find with an initial fold a good set of parameters.

In [37]:
train_test_vit(path='fold_1', num_classes=NUM_CLASSES, device=DEVICE, transform=transform, batch_size=BATCH_SIZE, epochs=EPOCHS, lr=LR)

Epoch [1/4], Loss: 0.2541
	Accuracy: 90.80%
	Recall: 90.80%
Epoch [2/4], Loss: 0.0837
	Accuracy: 97.22%
	Recall: 97.23%
Epoch [3/4], Loss: 0.0524
	Accuracy: 98.17%
	Recall: 98.18%
Epoch [4/4], Loss: 0.0496
	Accuracy: 98.38%
	Recall: 98.38%
Test Accuracy: 95.60%
Test Recall: 95.60%


{'train': {'accuracy': 0.98375, 'recall': 0.98375},
 'test': {'accuracy': 0.956, 'recall': 0.9559999999999998}}

After this, now we will apply the cross validation with all folds.

In [None]:
#cross_validate_vit(k=REPETITIONS)

## Train ConvNeXt

In [None]:
CONVNEXT_VERSION = 'convnext_tiny'  
NUM_CLASSES = 5
PRETRAINED = True  
LR = 0.0001  
BATCH_SIZE = 32  
EPOCHS = 2
REPETITIONS = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device {DEVICE}')

transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]
)