In [None]:
from google.colab import drive
drive.mount('drive')

Mounted at drive


In [None]:
!tar -xvf 'drive/MyDrive/Harvard/BMI707/Data/OCT2017.tar.gz' -C './'

In [None]:
training_size = 4000
num_class = 4

imageSize=224
train_dir = "./OCT2017/train/"
test_dir =  "./OCT2017/test/"

from tqdm import tqdm
import os
import cv2
import numpy as np
import skimage

def get_data(folder):
    """
    Load the data and labels from the given folder.
    """
    X = []
    y = []
    for folderName in os.listdir(folder):
        if not folderName.startswith('.'):
            if folderName in ['NORMAL']:
                label = 0
            elif folderName in ['CNV']:
                label = 1
            elif folderName in ['DME']:
                label = 2
            elif folderName in ['DRUSEN']:
                label = 3
            else:
                label = 4
            for image_filename in tqdm(os.listdir(folder + folderName)[:int(training_size/num_class)]):
                img_file = cv2.imread(folder + folderName + '/' + image_filename)
                if img_file is not None:
                    img_file = skimage.transform.resize(img_file, (imageSize, imageSize, 3))
                    img_arr = np.asarray(img_file)
                    X.append(img_arr)
                    y.append(label)
    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

X_train, y_train = get_data(train_dir)
X_test, y_test= get_data(test_dir)

100%|██████████| 1000/1000 [00:38<00:00, 25.90it/s]
100%|██████████| 1000/1000 [00:47<00:00, 21.08it/s]
100%|██████████| 1000/1000 [00:43<00:00, 22.83it/s]
100%|██████████| 1000/1000 [00:47<00:00, 20.90it/s]
100%|██████████| 250/250 [00:09<00:00, 25.17it/s]
100%|██████████| 250/250 [00:07<00:00, 31.72it/s]
100%|██████████| 250/250 [00:08<00:00, 28.33it/s]
100%|██████████| 250/250 [00:09<00:00, 25.91it/s]


In [None]:
unique, counts = np.unique(y_train, return_counts=True)

print(X_train.shape)
print(X_test.shape)

for label, count in zip(unique, counts):
  print("The number of {} samples: {}".format(label, count))

(4000, 224, 224, 3)
(1000, 224, 224, 3)
The number of 0 samples: 1000
The number of 1 samples: 1000
The number of 2 samples: 1000
The number of 3 samples: 1000


In [None]:
from transformers import AutoImageProcessor, ViTModel
import torch
import torch.utils.data as Data
from tqdm import tqdm
import copy

In [None]:
class Vision_Transformer(torch.nn.Module):
    def __init__(self, link="google/vit-base-patch16-224-in21k", device='cpu', freeze_embedding=True):
        super(Vision_Transformer, self).__init__()
        self.image_processor = AutoImageProcessor.from_pretrained(link, do_rescale=False)
        self.embedding = ViTModel.from_pretrained(link)
        self.embedding_dim = self.embedding.config.hidden_size

        self.classifier = torch.nn.Sequential(
          torch.nn.Linear(self.embedding_dim, 256),
          torch.nn.ReLU(),
          torch.nn.Linear(256, 128),
          torch.nn.ReLU(),
          torch.nn.Linear(128, 4),
          torch.nn.Softmax(dim=-1)
        )

        if freeze_embedding:
            for para in self.embedding.parameters():
                para.requires_grad = False

        self.to(device)

    def forward(self, x):
        inputs = self.image_processor(x, return_tensors='pt')
        encoded_input = {key: value.to(next(self.parameters()).device) for key, value in inputs.items()}
        model_output = self.embedding(**encoded_input)

        embeddings = model_output.last_hidden_state[:, 0, :]
        logits = self.classifier(embeddings)

        return logits

In [None]:
model = Vision_Transformer()
out = model(X_train[:2])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [None]:
out

tensor([[0.2562, 0.2317, 0.2546, 0.2575],
        [0.2539, 0.2326, 0.2610, 0.2525]], grad_fn=<SoftmaxBackward0>)

In [None]:
class DLClassifier():
    def __init__(self, model, lr, minibatch, epoch, verbose=True, cuda=False):
        self.model = model
        self.best_model = None
        self.loss = None
        self.val_loss = None
        self.epoch = epoch
        self.lr = lr
        self.minibatch = minibatch
        self.optimizer = None
        self.scheduler = None
        self.criterion = None
        self.verbose = verbose
        self.cuda = cuda

    def fit(self, x_train, y_train, x_test, y_test, save_best=True):
        torch_dataset = Data.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train))
        loader = Data.DataLoader(
            dataset=torch_dataset,
            batch_size=self.minibatch,
            shuffle=True,
            drop_last=False
        )

        if self.cuda and torch.cuda.is_available():
            self.model = self.model.cuda()
            self.criterion = torch.nn.CrossEntropyLoss().cuda()
        else:
            self.model = self.model.cpu()
            self.criterion = torch.nn.CrossEntropyLoss()

        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.5, patience=40,
                                                               verbose=False, threshold=0.0001, threshold_mode='rel',
                                                               cooldown=0, min_lr=1e-05, eps=1e-08)

        loss_list = []
        valid_loss_list = []

        min_loss_val = 1e10

        for epoch in range(self.epoch):
            self.model.train()
            epoch_loss = 0
            loss_ = 0
            for idx, (x, target) in enumerate(tqdm(loader), 0):
                if self.cuda and torch.cuda.is_available():
                    x = x.cuda()
                    target = target.cuda()
                else:
                    x = x.cpu()
                    target = target.cpu()
                predict = self.model(x)
                #            losses.append(loss)
                self.optimizer.zero_grad()
                loss = self.criterion(predict, target.long())

                loss.backward()
                self.optimizer.step()
                #self.scheduler.step(loss)
                epoch_loss += loss.item()
                loss_ = epoch_loss / (idx + 1)
                del x, target, predict
                if self.cuda and torch.cuda.is_available():
                    torch.cuda.empty_cache()
            loss_list.append(loss_)
            with torch.no_grad():
                if self.cuda and torch.cuda.is_available():
                    pred = self.model(torch.tensor(x_test).cuda())
                    y = torch.tensor(y_test).cuda()
                else:
                    pred = self.model(torch.tensor(x_test))
                    y = torch.tensor(y_test)
                valid_loss = self.criterion(pred, y.long())
                valid_loss_list.append(valid_loss.item())
                if self.verbose:
                    correct = int(torch.sum(torch.argmax(pred, dim=1) == y))
                    total = len(y)
                    print("Epoch={}/{}, train_loss={}, valid_loss={}, valid_acc={}, lr={}".format(
                        epoch + 1, self.epoch, loss_, valid_loss, correct / total,
                        self.optimizer.state_dict()['param_groups'][0]['lr']))
                if save_best:
                  if valid_loss <= min_loss_val and epoch > 5:
                      min_loss_val = valid_loss
                      self.best_model = copy.deepcopy(self.model)
        self.loss = loss_list
        self.val_loss = valid_loss_list
        print('Training finished.')

    def predict(self, X):
        self.model.eval()
        if self.cuda:
            outputs = self.model(torch.tensor(X).cuda())
        else:
            outputs = self.model(torch.tensor(X).cpu())
        pred = torch.argmax(outputs, dim=1).cpu().numpy().astype('int64')
        return pred

    def predict_prob(self, X):
        self.model.eval()
        if self.cuda:
            outputs = self.model(torch.tensor(X).cuda())
        else:
            outputs = self.model(torch.tensor(X).cpu())
        pred = outputs.detach().cpu().numpy()
        return pred

In [None]:
base = Vision_Transformer(device='cuda')
ViT_clf = DLClassifier(base, lr=0.001, minibatch=32, epoch=10, cuda=True)

In [None]:
ViT_clf.fit(X_train, y_train, X_test, y_test)

100%|██████████| 125/125 [01:34<00:00,  1.33it/s]


Epoch=1/10, train_loss=1.1415690150260924, valid_loss=0.8692566156387329, valid_acc=0.893, lr=0.001


100%|██████████| 125/125 [01:17<00:00,  1.62it/s]


Epoch=2/10, train_loss=0.9779788122177124, valid_loss=0.8288081884384155, valid_acc=0.929, lr=0.001


100%|██████████| 125/125 [01:21<00:00,  1.54it/s]


Epoch=3/10, train_loss=0.9407933602333068, valid_loss=0.820317268371582, valid_acc=0.935, lr=0.001


100%|██████████| 125/125 [01:20<00:00,  1.56it/s]


Epoch=4/10, train_loss=0.9294708108901978, valid_loss=0.841715395450592, valid_acc=0.903, lr=0.001


100%|██████████| 125/125 [01:19<00:00,  1.58it/s]


Epoch=5/10, train_loss=0.9147429966926575, valid_loss=0.7933866381645203, valid_acc=0.956, lr=0.001


100%|██████████| 125/125 [01:19<00:00,  1.57it/s]


Epoch=6/10, train_loss=0.9014894495010376, valid_loss=0.8085428476333618, valid_acc=0.936, lr=0.001


100%|██████████| 125/125 [01:18<00:00,  1.59it/s]


Epoch=7/10, train_loss=0.8928033776283264, valid_loss=0.7873673439025879, valid_acc=0.957, lr=0.001


100%|██████████| 125/125 [01:19<00:00,  1.58it/s]


Epoch=8/10, train_loss=0.8864827075004578, valid_loss=0.8080419301986694, valid_acc=0.936, lr=0.001


100%|██████████| 125/125 [01:19<00:00,  1.58it/s]


Epoch=9/10, train_loss=0.882181803226471, valid_loss=0.7901687026023865, valid_acc=0.951, lr=0.001


100%|██████████| 125/125 [01:19<00:00,  1.58it/s]


Epoch=10/10, train_loss=0.879286135673523, valid_loss=0.7988764047622681, valid_acc=0.945, lr=0.001
Training finished.


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

y_pred = ResNet_clf.predict(X_test)
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True)
print(classification_report(y_test, y_pred))