In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/vit/helper_functions.py /content

In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [None]:
!cp /content/drive/MyDrive/vit/going_modular/engine.py /content
!cp /content/drive/MyDrive/vit/going_modular/model_builder.py /content
!cp /content/drive/MyDrive/vit/going_modular/predictions.py /content
!cp /content/drive/MyDrive/vit/going_modular/train.py /content
!cp /content/drive/MyDrive/vit/going_modular/utils.py /content

In [None]:
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms
from helper_functions import set_seeds

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# 1. Get pretrained weights for ViT-Base
pretrained_vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT

# 2. Setup a ViT model instance with pretrained weights
pretrained_vit = torchvision.models.vit_b_16(weights=pretrained_vit_weights).to(device)

# 3. Freeze the base parameters
for parameter in pretrained_vit.parameters():
    parameter.requires_grad = False

# 4. Change the classifier head
class_names = ['cnv','normal']

set_seeds()
pretrained_vit.heads = nn.Linear(in_features=768, out_features=len(class_names)).to(device)
# pretrained_vit # uncomment for model output

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:05<00:00, 68.9MB/s]


In [None]:
from torchinfo import summary

# Print a summary using torchinfo (uncomment for actual output)
summary(model=pretrained_vit,
        input_size=(32, 3, 224, 224), # (batch_size, color_channels, height, width)
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 2]              768                  Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 768, 14, 14]    (590,592)            False
├─Encoder (encoder)                                          [32, 197, 768]       [32, 197, 768]       151,296              False
│    └─Dropout (dropout)                                     [32, 197, 768]       [32, 197, 768]       --                   --
│    └─Sequential (layers)                                   [32, 197, 768]       [32, 197, 768]       --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 197, 768]       [32, 197, 768]       (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 197, 768]       [32, 

#### Notice how only the output layer is trainable, where as, all of the rest of the layers are untrainable (frozen).

In [None]:
# Setup directory paths to train and test images
train_dir = "/content/drive/MyDrive/dataset80-20/training"
test_dir = "/content/drive/MyDrive/dataset80-20/testing"

Remember, if you're going to use a pretrained model, it's generally important to ensure your own custom data is transformed/formatted in the same way the data the original model was trained on.

In [None]:
# Get automatic transforms from pretrained ViT weights
pretrained_vit_transforms = pretrained_vit_weights.transforms()
print(pretrained_vit_transforms)

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


## And now we've got transforms ready, we can turn our images into DataLoaders using the create_dataloaders()

In [None]:
import os

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int=NUM_WORKERS
):

  # Use ImageFolder to create dataset(s)
  train_data = datasets.ImageFolder(train_dir, transform=transform)
  test_data = datasets.ImageFolder(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into data loaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True,
  )
  test_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return train_dataloader, test_dataloader, class_names

In [None]:
# Setup dataloaders
train_dataloader_pretrained, test_dataloader_pretrained, class_names = create_dataloaders(train_dir=train_dir,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=pretrained_vit_transforms,
                                                                                                     batch_size=32) # Could increase if we had more samples, such as here: https://arxiv.org/abs/2205.01580 (there are other improvements there too...)


In [None]:
import engine

# Create optimizer and loss function
optimizer = torch.optim.Adam(params=pretrained_vit.parameters(),
                             lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

# Train the classifier head of the pretrained ViT feature extractor model
set_seeds()
pretrained_vit_results = engine.train(model=pretrained_vit,
                                      train_dataloader=train_dataloader_pretrained,
                                      test_dataloader=test_dataloader_pretrained,
                                      optimizer=optimizer,
                                      loss_fn=loss_fn,
                                      epochs=50,
                                      device=device)

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.0794 | train_acc: 0.9772 | test_loss: 0.0760 | test_acc: 0.9715
Epoch: 2 | train_loss: 0.0328 | train_acc: 0.9907 | test_loss: 0.0633 | test_acc: 0.9783
Epoch: 3 | train_loss: 0.0260 | train_acc: 0.9925 | test_loss: 0.0598 | test_acc: 0.9821
Epoch: 4 | train_loss: 0.0228 | train_acc: 0.9932 | test_loss: 0.0592 | test_acc: 0.9827
Epoch: 5 | train_loss: 0.0201 | train_acc: 0.9947 | test_loss: 0.0563 | test_acc: 0.9808
Epoch: 6 | train_loss: 0.0180 | train_acc: 0.9954 | test_loss: 0.0690 | test_acc: 0.9811


In [None]:
import torch
import h5py
import numpy as np

# Assuming you have a trained Vision Transformer model 'pretrained_vit'

# 1. Convert the PyTorch model to a dictionary
model_state_dict = pretrained_vit.state_dict()

# Add other necessary information if needed, e.g., hyperparameters, model architecture details
model_info = {
    'architecture': 'Vision Transformer',
    'input_size': (224, 224),  # Input image size
    'num_classes': 2,  # Number of output classes
    'other_info': 'Your additional information here',
}

# 2. Save the dictionary to an HDF5 file
h5_filename = "/content/drive/MyDrive/models 80-20/vit-model-80-20-50ep.h5"  # Specify the name of the HDF5 file

with h5py.File(h5_filename, 'w') as h5_file:
    # Save the model state_dict
    for key, value in model_state_dict.items():
        h5_file.create_dataset(key, data=value.cpu().numpy())  # Convert tensors to numpy arrays

    # Save other model information as attributes
    for key, value in model_info.items():
        h5_file.attrs[key] = str(value)

print(f"Model saved to {h5_filename}")

# 3. Save the model using torch.save
checkpoint = {
    'model_state_dict': pretrained_vit.state_dict(),
    # You can add more information if needed, such as optimizer state, hyperparameters, etc.
}

# Define the path where you want to save the model
save_path = '/content/drive/MyDrive/models 80-20/vit_model-80-20-50ep.pth'

# Save the checkpoint to the specified path
torch.save(checkpoint, save_path)

print(f"Model saved to {save_path}")


pretrained ViT performed far better than our custom ViT model trained from scratch (in the same amount of time).


In [None]:
# Plot the loss curves
from helper_functions import plot_loss_curves

plot_loss_curves(pretrained_vit_results)

## That's the power of transfer learning!

We managed to get outstanding results with the same model architecture, except our custom implementation was trained from scratch (worse performance) and this feature extractor model has the power of pretrained weights from ImageNet behind it.

# Let's make Prediction:

In [None]:
import requests

# Import function to make predictions on images and plot them
from predictions import pred_and_plot_image

from PIL import Image
import tempfile

# Load the grayscale image
gray_image = Image.open("/content/drive/MyDrive/cnv sample/CNV-743852-1.jpeg")

# Convert the grayscale image to RGB format
rgb_image = gray_image.convert("RGB")
temp_rgb_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
rgb_image.save(temp_rgb_path.name)
# Predict on custom image
pred_and_plot_image(model=pretrained_vit,
                    image_path=temp_rgb_path.name,
                    class_names=class_names)

In [None]:
import requests

# Import function to make predictions on images and plot them
from predictions import pred_and_plot_image

from PIL import Image
import tempfile

# Load the grayscale image
gray_image = Image.open("/content/drive/MyDrive/normal sample/NORMAL-508852-2.jpeg")

# Convert the grayscale image to RGB format
rgb_image = gray_image.convert("RGB")
temp_rgb_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
rgb_image.save(temp_rgb_path.name)
# Predict on custom image
pred_and_plot_image(model=pretrained_vit,
                    image_path=temp_rgb_path.name,
                    class_names=class_names)

In [None]:
test1_dir = "/content/drive/MyDrive/testing"
import os

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    test1_dir: str
):

  # Use ImageFolder to create dataset(s)

  test1_data = datasets.ImageFolder(test1_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  test1_dataloader = DataLoader(
      test1_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return test1_dataloader, class_names

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

def evaluate_model(model, test1_dataloader, device):
    model.eval()
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for images, labels in test1_dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(predicted.cpu().numpy())

    # Calculate accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print("Accuracy:", accuracy)

    # Generate the confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)
    print("Confusion Matrix:\n", cm)

    # Generate the classification report
    class_names = test1_dataloader.dataset.classes
    report = classification_report(true_labels, predicted_labels, target_names=class_names)
    print("Classification Report:\n", report)

# Evaluate the pretrained ViT model
evaluate_model(pretrained_vit, test1_dataloader_pretrained, device)


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score, auc
import torch
from torchvision import transforms
from torch.utils.data import DataLoader

# Load your saved model
model_path = '/content/drive/MyDrive/models 80-20/vit_model-80-20-50ep.pth'
loaded_model = torch.load(model_path)

# Set the model to evaluation mode
loaded_model.eval()

# Load your validation dataset (val_dataset) with labels
# You can use tfds.load or tf.keras.utils.image_dataset_from_directory to load the dataset

# Define a function to load your validation dataset with labels
def load_test_data(test1_dir, transform):
    test1_dataset = datasets.ImageFolder(root=test1_dir, transform=transform)
    return DataLoader(test1_dataset, batch_size=32, shuffle=False)

# Define the data transformation for the validation dataset (similar to what you used for training)
test1_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load your validation dataset
test1_dir = "/content/drive/MyDrive/testing"
test1_loader = load_test_data(test1_dir, test1_transform)

# Predict probabilities on the validation dataset
y_true = []
y_probs = []

for images, labels in test1_loader:
    images = images.to(device)  # Assuming 'device' is correctly set to GPU
    labels = labels.to(device)
    outputs = loaded_model(images)
    probabilities = torch.softmax(outputs, dim=1)[:, 1]  # Assuming binary classification, get probabilities of class '1'
    y_true.extend(labels.cpu().numpy())
    y_probs.extend(probabilities.cpu().detach().numpy())

# Convert to numpy arrays
y_true = np.array(y_true)
y_probs = np.array(y_probs)

# Calculate ROC curve and AUC for each class
fpr = {}
tpr = {}
roc_auc = {}

class_names = ['cnv', 'normal']

for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(y_true == i, y_probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curves
plt.figure(figsize=(10, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label=f'ROC curve ({class_names[i]} AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for Classifiers')
plt.legend(loc="lower right")
plt.show()
