In [None]:
# IMPORT LIBRARIES 
import numpy as np
import torch
import time
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

#######################################################################
# READ INPUT FILES
X_train = np.load("path-to-xtrain.npy")
y_train = np.load("path-to-ytrain.npy")
X_test =  np.load("path-to-xtest.npy")
y_test =  np.load("path-to-ytest.npy")

#######################################################################
# PREPARE INPUT SHAPE
X_train = np.squeeze(X_train)
X_test  = np.squeeze(X_test)
if X_train.ndim == 3:
    X_train = X_train[:, None, :, :]
    X_test  = X_test[:, None, :, :]
if X_train.ndim == 4 and X_train.shape[-1] == 1:
    X_train = np.transpose(X_train, (0, 3, 1, 2))
    X_test  = np.transpose(X_test, (0, 3, 1, 2))
if X_train.ndim == 4 and X_train.shape[1] == 3:
    X_train = X_train[:, :1, :, :]
    X_test  = X_test[:, :1, :, :]

#######################################################################
# NORMALIZE VALUES
X_train = X_train.astype(np.float32) / 255.0
X_test  = X_test.astype(np.float32) / 255.0

#######################################################################
# CONSTRUCT VT ARCHITECTURE
train_ds = TensorDataset(torch.tensor(X_train), torch.tensor(y_train, dtype=torch.long))
test_ds  = TensorDataset(torch.tensor(X_test),  torch.tensor(y_test, dtype=torch.long))
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=64, shuffle=False)
class ViT(nn.Module):
    def __init__(self, img_size=c, patch_size=7, in_chans=1, num_classes=2,embed_dim=128, depth=6, num_heads=4, mlp_ratio=4.0, dropout=0.1):
        super().__init__()
        self.patch_embed = nn.Conv2d(in_chans, embed_dim,kernel_size=patch_size, stride=patch_size)
        num_patches = (img_size // patch_size) ** 2
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads,dim_feedforward=int(embed_dim * mlp_ratio),dropout=dropout, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes)
    def forward(self, x):
        x = self.patch_embed(x)
        x = x.flatten(2).transpose(1, 2)
        B, N, C = x.shape
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed[:, :N+1, :]
        x = self.transformer(x)
        x = self.norm(x[:, 0])
        return self.head(x)
        
#######################################################################
# DEFINE VT PARAMETERS
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ViT().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
epochs = 3

#######################################################################
# PERFORM TRAINING AND TESTING OVER THE NUMBER OF EPOCHS
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in test_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            correct += (preds.argmax(1) == yb).sum().item()
            total += yb.size(0)
    print(f"Epoch {epoch+1}: loss={total_loss/len(train_loader):.4f}, "
          f"test_acc={correct/total:.4f}")
del(model)