In [88]:
# -*- coding: utf-8 -*-
'''

Train CIFAR10 with PyTorch and Vision Transformers!
written by @kentaroy47, @arutema47
source : https://github.com/kentaroy47/vision-transformers-cifar10

'''

from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import numpy as np

import torchvision
import torchvision.transforms as transforms

import os
import argparse
import pandas as pd
import csv
import time




In [89]:
run_number = 1
base_dir = "results/runs"

# Define the new run directory

os.makedirs("results", exist_ok=True)
os.makedirs("results/runs", exist_ok=True)

In [90]:
torch.manual_seed(42)

<torch._C.Generator at 0x78b034bbc410>

In [91]:
# # setup for a read only personal access token
# # note : token expires 19 aug 2025
# token = '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'
# token_user = 'Asterisk07'
# repo_host = 'Asterisk07'
# repo_name = 'BTP-Transformer-explainability'

# url = f'https://{token_user}:{token}@github.com/{repo_host}/{repo_name}/'
# !git clone {url}

# !mv {repo_name}/models .
# !rm -rf BTP-Transformer-explainability # delete a file

In [92]:
!ls

__pycache__  data  log	results  utils.py  wandb


In [93]:
# !rm -rf models

In [94]:
# !npm install -g github-files-fetcher

In [95]:
# !fetcher --url=https://github.com/kentaroy47/vision-transformers-cifar10/tree/main/models
# !fetcher --url=https://https://github.com/Asterisk07/BTP-Transformer-explainability/main/models


In [96]:
2

2

In [97]:

import os

# Check if 'utils.py' exists in the current directory
if os.path.exists('utils.py'):
    print("utils.py exists in the current directory.")
else:
    print("utils.py does not exist in the current directory.")
    !wget https://raw.githubusercontent.com/kentaroy47/vision-transformers-cifar10/main/utils.py
    print("utils.py fetched")



utils.py exists in the current directory.


In [98]:
from utils import progress_bar

In [99]:
progress_bar

<function utils.progress_bar(current, total, msg=None)>

In [100]:


# from randomaug import RandAugment
from torchvision.transforms import RandAugment



In [101]:
!pip install einops



In [102]:
# from models import *
# from models.vit import ViT
# from models.convmixer import ConvMixer

In [103]:

import json

In [104]:

qkv_titles = ['q','k','v']

In [105]:
# https://github.com/lucidrains/vit-pytorch/blob/main/vit_pytorch/vit.py
# VIT.py
import torch
from torch import nn

from einops import rearrange, repeat
from einops.layers.torch import Rearrange
import numpy as np
# helpers

def pair(t):
    return t if isinstance(t, tuple) else (t, t)

# classes

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x,save_flag=False, run_dir = None,img_idx = None):
        out =  self.net(x)
        if(save_flag==True):
                file_path = os.path.join(run_dir, 'ff_out.npy')
                # np.save(file_path, out)
                np.save(file_path, out[img_idx].detach().cpu().numpy())
        return out

class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()

        inner_dim = dim_head *  heads
        # print("attention : dim = ", dim, "| inner_dim = ",inner_dim,"| dim_head = ", dim_head, "| heads = ",heads  )
        project_out = not (heads == 1 and dim_head == dim)

        self.heads = heads
        self.scale = dim_head ** -0.5

        self.attend = nn.Softmax(dim = -1)
        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x,save_flag=False, run_dir = None,img_idx = None):
        qkv = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)



        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale

        attn = self.attend(dots)

        out = torch.matmul(attn, v)
        if(save_flag==True):

                # Convert each tensor in `qkv` to a numpy array and save it
#                 qkv=attention.to_qkv
                for i, tensor in enumerate((q,k,v)):
                    np_array = tensor[img_idx].detach().cpu().numpy()  # Convert to numpy
                    # np.save(f'qkv_{i}.npy', np_array)  # Save each as a .npy file
                    file_path = os.path.join(run_dir, f'{qkv_titles[i]}.npy')
                    np.save(file_path, np_array)
                file_path = os.path.join(run_dir, 'att_out')
                np.save(file_path, out[img_idx].detach().cpu().numpy())
                file_path = os.path.join(run_dir, 'att_score')
                np.save(file_path,attn[img_idx].detach().cpu().numpy())
        out = rearrange(out, 'b h n d -> b n (h d)')
        # return self.to_out(out),q,k,v
        return self.to_out(out)

class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
        super().__init__()
        # print("transformer : dim = ", dim, "| dim_head = ", dim_head, "| heads = ",heads  )

        self.layers = nn.ModuleList([])
#         self.saved_values = {'logits': [], 'queries': [], 'keys': [], 'values': []}  # To store the values
        # self.saved_values = list()  # To store th
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout))
            ]))

    def forward(self, x,save_flag=False, run_dir = None, img_idx = None):
        for i, (attn, ff) in enumerate(self.layers):
            # Unpack the output from the Attention layer
            #
            # print("passed trans direcetory ", run_dir, " and saving ",save_flag)
            if save_flag:
              layer_dir = os.path.join(run_dir,  f"layer {i:02}")
              os.makedirs(layer_dir, exist_ok=True)
              # print("passed trans layer direcetory ", layer_dir)
            else:
              layer_dir = None
            attn_out = attn(x,save_flag=save_flag, run_dir = layer_dir, img_idx = img_idx)



            # Save the query, key, value, and logits (output) for this layer
            # self.saved_values.append(q.cpu().detach().numpy())
            # self.saved_values.append(k.cpu().detach().numpy())
            # self.saved_values.append(v.cpu().detach().numpy())

            # Combine the attention output with the original x
            x = attn_out + x
            # self.saved_values.append(x.cpu().detach().numpy())  # Save logits
            # print("i : ",i)
            # Apply the feedforward network
#             x = ff(x) + x

            x = ff(x,save_flag=save_flag, run_dir = layer_dir, img_idx = img_idx) + x

        return x


class ViT(nn.Module):
    def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        image_height, image_width = pair(image_size)
        patch_height, patch_width = pair(patch_size)

        # print("vit : dim = ", dim, "| dim_head = ", dim_head, "| heads = ",heads , " | mlp = ",mlp_dim )

        assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'

        num_patches = (image_height // patch_height) * (image_width // patch_width)
        patch_dim = channels * patch_height * patch_width
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_height, p2 = patch_width),
            nn.Linear(patch_dim, dim),
        )

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

    def forward(self, img, save_flag=False, run_dir = None,img_idx = None):
        # if (save_flag):
          # print("\n\treached here 3")
        x = self.to_patch_embedding(img)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)

        x = self.transformer(x,save_flag, run_dir, img_idx)
#         if(save_flag==True):
#                 # Convert each tensor in `qkv` to a numpy array and save it
#                 qkv=attention.to_qkv
#                 for i, tensor in enumerate(qkv):
#                     np_array = tensor.detach().cpu().numpy()  # Convert to numpy
#                     np.save(f'qkv_{i}.npy', np_array)  # Save each as a .npy file


        x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]

        x = self.to_latent(x)
        return self.mlp_head(x)

In [106]:
import argparse
import sys

# Define your arguments here
def parse_args():
    # parsers
    parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
    parser.add_argument('--lr', default=1e-4, type=float, help='learning rate') # resnets.. 1e-3, Vit..1e-4
    parser.add_argument('--opt', default="adam")
    parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
    parser.add_argument('--noaug', action='store_false', help='disable use randomaug')
    parser.add_argument('--noamp', action='store_true', help='disable mixed precision training. for older pytorch versions')
    parser.add_argument('--nowandb', action='store_true', help='disable wandb')
    parser.add_argument('--mixup', action='store_true', help='add mixup augumentations')
    parser.add_argument('--net', default='vit')
    parser.add_argument('--dp', action='store_true', help='use data parallel')
    parser.add_argument('--bs', default='512')
    parser.add_argument('--size', default="32")
    parser.add_argument('--n_epochs', type=int, default='200')
    parser.add_argument('--patch', default='4', type=int, help="patch for ViT")
    parser.add_argument('--dimhead', default="512", type=int)
    parser.add_argument('--convkernel', default='8', type=int, help="parameter for convmixer")

    return parser.parse_args()




In [107]:
command = 'python train_cifar10.py --n_epochs 500 --lr 0.0005'
command.split()[1:]

['train_cifar10.py', '--n_epochs', '500', '--lr', '0.0005']

In [108]:
# Simulate command-line arguments
# sys.argv = ['your_script.py', '--lr', '0.2', '--opt', 'adam', '--net', 'vit', '--bs', '64','--dimhead','256']
sys.argv = command.split()[1:]

args = parse_args()



In [109]:
# !pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu118 --upgrade --force-reinstall

In [110]:
# (2.0.1+cu117)
# Requirement already satisfied: torchvision in /opt/conda/lib/python3.10/site-packages (0.15.2+cu117)

In [111]:
# !pip show torchvision


In [112]:
2

2

In [113]:
# !pip show torch
# #

In [114]:
import torchvision
torchvision.__version__

'0.19.0'

In [115]:
import torch
torch.__version__

'2.4.0'

In [116]:
!pip install wandb



In [117]:

# take in args
usewandb = ~args.nowandb
if usewandb:
    import wandb
    watermark = "{}_lr{}".format(args.net, args.lr)
    wandb.init(project="cifar10-challange",
            name=watermark)
    wandb.config.update(args)

bs = int(args.bs)
imsize = int(args.size)

use_amp = not args.noamp
aug = args.noaug

device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# Data
print('==> Preparing data..')
if args.net=="vit_timm":
    size = 384
else:
    size = imsize

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.Resize(size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Add RandAugment with N, M(hyperparameter)
if aug:
    N = 2; M = 14;
    transform_train.transforms.insert(0, RandAugment(N, M))

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▁▂▂▃▄▅▂▂▃▄▅▅▆▇▁▂▂▃▄▅▅▆▁▁▂▂▃▄▅▁▂▃▄▅▅▆▇█
epoch_time,▅▅▇▂▂▃▃▂▂▂▃▂▁▁▁▁▇▂▁▁▂▁▂▁█▇▂▁▂▂▂▇▁▃▂▂▂▂▂▇
lr,████▇▇▇▆█▇▇▇▆▅▄▃██▇▇▇▆▅▄███▇▇▇▆█▇▇▇▆▅▄▃▁
train_loss,█▆█▆▄▄▃▃▆▄▄▃▃▂▂▁█▆▄▄▃▃▂▂██▆▄▄▃▃█▄▄▃▃▂▂▁▁
val_acc,▁▂▁▂▄▅▅▆▂▄▅▅▆▆▇▇▁▂▄▅▅▆▆▇▁▁▂▄▅▅▆▁▄▅▅▆▆▇▇█
val_loss,█▇█▇▅▄▄▃▇▅▄▄▃▃▃▂█▇▅▄▄▃▃▃██▇▅▄▄▃█▅▄▄▃▃▃▂▁

0,1
epoch,10.0
epoch_time,31.31984
lr,0.0005
train_loss,1.29985
val_acc,63.19
val_loss,102.83137


==> Preparing data..


In [118]:
# use only this token :
# f439c9e9cdf4ff7e3d47e80d4588628783d8bafe #aster

In [119]:
NUM_WORKERS = 4

# Prepare dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True, num_workers=NUM_WORKERS)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=bs, shuffle=False, num_workers=NUM_WORKERS)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Iterate through the first batch and print information about each image
first_batch = next(iter(trainloader))

images, labels = first_batch

# Print information about each image in the first batch
# for i in range(len(images)):
for i in range(10):
    print(f"Image {i+1}:")
    print(f"Label: {labels[i]}")
    print(f"Shape: {images[i].shape}")
    print(f"Max Pixel Value: {images[i].max()}")
    print(f"Min Pixel Value: {images[i].min()}")
    print()  # Newline for readability


In [120]:
# For Multi-GPU
if 'cuda' in device:
    print(device)
    if args.dp:
        print("using data parallel")
        net = torch.nn.DataParallel(net) # make parallel
        cudnn.benchmark = True


cuda


In [121]:
!rm -rf results

In [122]:

def get_vit():
    return ViT(
    image_size = size,
    patch_size = args.patch,
    num_classes = 10,
    dim = int(args.dimhead),
    depth = 6,
    heads = 8,
    # mlp_dim = 512,
    mlp_dim = 256,
    dropout = 0.1,
    emb_dropout = 0.1,
    )

In [123]:


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Model factory..
print('==> Building model..')
# net = VGG('VGG19')
if args.net=="vit":
    # ViT for cifar10
    net = get_vit()



if args.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/{}-ckpt.t7'.format(args.net))
    net.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

==> Building model..


In [124]:
from tqdm import tqdm

In [125]:

len(trainloader)

98

In [126]:

# trainloader[0]

In [127]:
MAX_EPOCHS = 90

In [152]:
import numpy as np

# Loss is CE
criterion = nn.CrossEntropyLoss()

torch.manual_seed(42)
net = get_vit()

if args.opt == "adam":
    optimizer = optim.Adam(net.parameters(), lr=args.lr)
elif args.opt == "sgd":
    optimizer = optim.SGD(net.parameters(), lr=args.lr)

# use cosine scheduling
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs)

##### Training
scaler = torch.amp.GradScaler('cuda',enabled=use_amp)
def train(epoch,save_flag, run_dir = None, img_idx = None):
    
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    # img_factor = len(trainloader) // img_save_count
    # run_dir = os.path.join(run_dir,  {epoch:02}")

    # data_save=list()
    # main_list=list()
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        # Train with amp
        with torch.amp.autocast('cuda',enabled=use_amp):
            # if(save_flag==True and batch_idx%img_factor==0):

            if(save_flag==True and batch_idx==0):
                batch_dir = os.path.join(run_dir, f'batch {batch_idx}')
                os.makedirs(batch_dir, exist_ok=True)
                # np.save(file_path, np_array)
                # print("\n\tpassed ",batch_dir, type(batch_dir))

                outputs = net(inputs, True, batch_dir, img_idx)
                # outputs = net(inputs, False, 12)
                #here can pass in net(inputs,image_saveflag=1) so it will save the image to disk by making changes in model.
            else:
                outputs = net(inputs)
            loss = criterion(outputs, targets)



        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    # data_save.append(net.transformer.saved_values)

#         progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
#             % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
#     return train_loss/(batch_idx+1),net.transformer.saved_values
    return train_loss/(batch_idx+1)
##### Validation
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

#             progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
#                 % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

    # Save checkpoint.
    acc = 100.*correct/total
#     if acc > best_acc:
#         print('Saving..')
#         state = {"model": net.state_dict(),
#               "optimizer": optimizer.state_dict(),
#               "scaler": scaler.state_dict()}
#         if not os.path.isdir('checkpoint'):
#             os.mkdir('checkpoint')
#         torch.save(state, './checkpoint/'+args.net+'-{}-ckpt.t7'.format(args.patch))
#         best_acc = acc

#     os.makedirs("log", exist_ok=True)
     os.makedirs("results", exist_ok=True)
     os.makedirs("results/log", exist_ok=True)
    content = f'Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, val loss: {test_loss:.5f}, acc: {(acc):.5f}'
    print(content)
    with open(f'log/log_{args.net}_patch{args.patch}.txt', 'a') as appender:
        appender.write(content + "\n")
    return test_loss, acc

list_loss = []
list_acc = []

if usewandb:
    wandb.watch(net)

    

# save_epochs-=1
batch_size = int(args.bs)
# max_epochs = args.n_epochs




if device == 'cuda':
  net.cuda()
main_list=list()
data_save=list()
n_param=5

run_dir = os.path.join(base_dir, f"run {run_number:02}")
os.makedirs(run_dir, exist_ok=True)
print("Run number ",run_number)

img_idx = torch.randperm(batch_size)[:img_save_count]
img_idx= img_idx.sort()[0]
print("chosen images are of batch 0 and numbers : ",[x.item() for x in list(img_idx)])


file_path = os.path.join(run_dir, 'img_idx.npy')
np.save(file_path, img_idx.detach().cpu().numpy())




import shutil
from IPython.display import FileLink

# Specify the directory you want to compress
directory_name = run_dir
zip_filename = f'{run_dir}.zip'




run_number += 1

max_epochs = MAX_EPOCHS

# take_epoch_factor = 
img_save_count = 50 #IMP

save_epochs = 11 #IMP
# epoch_factor = max_epochs  // save_epochs #IMP

epoch_factor = 10 #IMP


print(f"Saving every {epoch_factor} epochs ")




print("Training started")
for i in tqdm(range(start_epoch, max_epochs), desc="Training"):
    epoch = i+1
#     print('\nEpoch: %d' % epoch)
    start = time.time()
#     if(epoch%epoch_factor==0 or epoch == max_epochs):
    
    if(epoch%epoch_factor==0 or epoch == 1 or epoch == max_epochs):
      # Define the new run directory
        
        epoch_dir = os.path.join(run_dir, f"epoch {epoch:02}")
        # print("\n\tpassed into trainloss",run_dir)
        trainloss = train(epoch,True, run_dir = epoch_dir, img_idx = img_idx)
        print("saved epoch")
        # Compress the directory into a zip file, overwriting if it already exists
        shutil.make_archive(zip_filename.replace('.zip', ''), 'zip', directory_name)

#         print(f"Directory '{directory_name}' has been zipped as '{zip_filename}'.")
        print("Click here to download run  : ")
        display(FileLink(zip_filename))
    
    else:
        trainloss = train(epoch,False)



#     if(epoch%n_param!=0 or epoch==0):
#         data_save.append(saved_data)
#     else:
#         data_save.append(saved_data)
#         main_list.append(data_save)
#         data_save=list()
    val_loss, acc = test(epoch)

    scheduler.step() # step cosine scheduling

    list_loss.append(val_loss)
    list_acc.append(acc)

    # Log training..
    if usewandb:
        wandb.log({'epoch': epoch, 'train_loss': trainloss, 'val_loss': val_loss, "val_acc": acc, "lr": optimizer.param_groups[0]["lr"],
        "epoch_time": time.time()-start})

    # Write out csv..
    with open(f'log/log_{args.net}_patch{args.patch}.csv', 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(list_loss)
        writer.writerow(list_acc)
#     print(list_loss)
    print()
    
# writeout wandb
if usewandb:
    wandb.save("wandb_{}.h5".format(args.net))



Run number  7
chosen images are of batch 0 and numbers :  [11, 12, 17, 19, 23, 44, 56, 97, 99, 103, 147, 152, 154, 162, 183, 187, 197, 202, 212, 230, 241, 251, 256, 267, 274, 277, 311, 313, 314, 317, 320, 332, 343, 367, 371, 393, 394, 408, 410, 417, 422, 430, 437, 445, 459, 469, 479, 480, 490, 506]
Saving every 10 epochs 
Training started


Training:   0%|          | 0/90 [00:00<?, ?it/s]

saved epoch
Click here to download run  : 


Training:   1%|          | 1/90 [00:39<58:16, 39.29s/it]

Epoch 1, lr: 0.0005000, val loss: 161.45557, acc: 41.94000



Training:   2%|▏         | 2/90 [01:09<49:48, 33.96s/it]

Epoch 2, lr: 0.0005000, val loss: 150.88297, acc: 44.89000



Training:   3%|▎         | 3/90 [01:39<46:28, 32.06s/it]

Epoch 3, lr: 0.0005000, val loss: 136.06395, acc: 50.32000



Training:   4%|▍         | 4/90 [02:08<44:35, 31.11s/it]

Epoch 4, lr: 0.0005000, val loss: 130.18875, acc: 53.01000



Training:   6%|▌         | 5/90 [02:38<43:23, 30.63s/it]

Epoch 5, lr: 0.0004999, val loss: 125.29897, acc: 55.17000



Training:   7%|▋         | 6/90 [03:08<42:30, 30.36s/it]

Epoch 6, lr: 0.0004999, val loss: 117.74250, acc: 57.21000



Training:   8%|▊         | 7/90 [03:38<41:43, 30.16s/it]

Epoch 7, lr: 0.0004998, val loss: 117.32786, acc: 58.32000



Training:   9%|▉         | 8/90 [04:08<41:02, 30.03s/it]

Epoch 8, lr: 0.0004998, val loss: 115.41125, acc: 58.96000



Training:  10%|█         | 9/90 [04:37<40:26, 29.96s/it]

Epoch 9, lr: 0.0004997, val loss: 108.95495, acc: 61.55000

saved epoch
Click here to download run  : 


Training:  11%|█         | 10/90 [05:24<46:59, 35.25s/it]

Epoch 10, lr: 0.0004996, val loss: 105.39031, acc: 62.43000



Training:  12%|█▏        | 11/90 [05:55<44:25, 33.74s/it]

Epoch 11, lr: 0.0004995, val loss: 102.83137, acc: 63.19000



Training:  13%|█▎        | 12/90 [06:25<42:20, 32.57s/it]

Epoch 12, lr: 0.0004994, val loss: 99.40059, acc: 64.27000



Training:  14%|█▍        | 13/90 [06:54<40:38, 31.67s/it]

Epoch 13, lr: 0.0004993, val loss: 100.67912, acc: 63.98000



Training:  16%|█▌        | 14/90 [07:24<39:26, 31.14s/it]

Epoch 14, lr: 0.0004992, val loss: 95.82338, acc: 65.62000



Training:  17%|█▋        | 15/90 [07:54<38:27, 30.77s/it]

Epoch 15, lr: 0.0004990, val loss: 95.85497, acc: 66.08000



Training:  18%|█▊        | 16/90 [08:24<37:36, 30.49s/it]

Epoch 16, lr: 0.0004989, val loss: 93.15425, acc: 66.94000



Training:  19%|█▉        | 17/90 [08:54<36:53, 30.32s/it]

Epoch 17, lr: 0.0004987, val loss: 92.02762, acc: 67.11000



Training:  20%|██        | 18/90 [09:24<36:13, 30.18s/it]

Epoch 18, lr: 0.0004986, val loss: 90.25629, acc: 67.94000



Training:  21%|██        | 19/90 [09:54<35:38, 30.12s/it]

Epoch 19, lr: 0.0004984, val loss: 88.20175, acc: 69.29000

saved epoch
Click here to download run  : 


Training:  22%|██▏       | 20/90 [10:49<43:48, 37.55s/it]

Epoch 20, lr: 0.0004982, val loss: 84.39274, acc: 69.86000



Training:  23%|██▎       | 21/90 [11:19<40:36, 35.31s/it]

Epoch 21, lr: 0.0004980, val loss: 82.16359, acc: 70.66000



Training:  24%|██▍       | 22/90 [11:49<38:09, 33.68s/it]

Epoch 22, lr: 0.0004978, val loss: 83.45730, acc: 70.65000



Training:  26%|██▌       | 23/90 [12:19<36:23, 32.59s/it]

Epoch 23, lr: 0.0004976, val loss: 77.37180, acc: 72.39000



Training:  27%|██▋       | 24/90 [12:49<35:06, 31.91s/it]

Epoch 24, lr: 0.0004974, val loss: 79.79322, acc: 72.25000



Training:  28%|██▊       | 25/90 [13:19<33:55, 31.32s/it]

Epoch 25, lr: 0.0004972, val loss: 76.89740, acc: 72.39000



Training:  29%|██▉       | 26/90 [13:49<32:55, 30.86s/it]

Epoch 26, lr: 0.0004969, val loss: 76.23590, acc: 73.01000



Training:  30%|███       | 27/90 [14:18<32:03, 30.54s/it]

Epoch 27, lr: 0.0004967, val loss: 71.75855, acc: 74.26000



Training:  31%|███       | 28/90 [14:48<31:19, 30.31s/it]

Epoch 28, lr: 0.0004964, val loss: 72.44552, acc: 74.57000



Training:  32%|███▏      | 29/90 [15:18<30:38, 30.15s/it]

Epoch 29, lr: 0.0004961, val loss: 72.29670, acc: 74.55000

saved epoch
Click here to download run  : 


Training:  33%|███▎      | 30/90 [16:20<39:44, 39.74s/it]

Epoch 30, lr: 0.0004959, val loss: 69.48505, acc: 75.24000



Training:  34%|███▍      | 31/90 [16:50<36:18, 36.93s/it]

Epoch 31, lr: 0.0004956, val loss: 71.22585, acc: 74.50000



Training:  36%|███▌      | 32/90 [17:20<33:40, 34.84s/it]

Epoch 32, lr: 0.0004953, val loss: 68.30499, acc: 75.94000



Training:  37%|███▋      | 33/90 [17:50<31:36, 33.27s/it]

Epoch 33, lr: 0.0004950, val loss: 65.55686, acc: 76.99000



Training:  38%|███▊      | 34/90 [18:20<30:04, 32.23s/it]

Epoch 34, lr: 0.0004946, val loss: 67.70130, acc: 76.12000



Training:  39%|███▉      | 35/90 [18:50<28:51, 31.49s/it]

Epoch 35, lr: 0.0004943, val loss: 65.08933, acc: 77.50000



Training:  40%|████      | 36/90 [19:19<27:53, 30.98s/it]

Epoch 36, lr: 0.0004940, val loss: 65.68675, acc: 77.44000



Training:  41%|████      | 37/90 [19:49<27:03, 30.64s/it]

Epoch 37, lr: 0.0004936, val loss: 64.47322, acc: 77.25000



Training:  42%|████▏     | 38/90 [20:19<26:20, 30.40s/it]

Epoch 38, lr: 0.0004933, val loss: 63.30965, acc: 78.27000



Training:  43%|████▎     | 39/90 [20:49<25:44, 30.28s/it]

Epoch 39, lr: 0.0004929, val loss: 61.47676, acc: 78.34000

saved epoch
Click here to download run  : 


Training:  44%|████▍     | 40/90 [22:00<35:23, 42.47s/it]

Epoch 40, lr: 0.0004925, val loss: 59.79138, acc: 78.97000



Training:  46%|████▌     | 41/90 [22:30<31:40, 38.79s/it]

Epoch 41, lr: 0.0004921, val loss: 62.56235, acc: 77.96000



Training:  47%|████▋     | 42/90 [23:00<28:53, 36.10s/it]

Epoch 42, lr: 0.0004918, val loss: 61.18800, acc: 78.36000



Training:  48%|████▊     | 43/90 [23:30<26:44, 34.14s/it]

Epoch 43, lr: 0.0004913, val loss: 58.46163, acc: 79.74000



Training:  49%|████▉     | 44/90 [23:59<25:10, 32.84s/it]

Epoch 44, lr: 0.0004909, val loss: 59.28314, acc: 79.41000



Training:  50%|█████     | 45/90 [24:29<23:56, 31.91s/it]

Epoch 45, lr: 0.0004905, val loss: 55.91828, acc: 80.55000



Training:  51%|█████     | 46/90 [24:59<22:57, 31.30s/it]

Epoch 46, lr: 0.0004901, val loss: 55.76783, acc: 80.30000



Training:  52%|█████▏    | 47/90 [25:29<22:10, 30.93s/it]

Epoch 47, lr: 0.0004896, val loss: 56.68991, acc: 80.28000



Training:  53%|█████▎    | 48/90 [25:59<21:26, 30.64s/it]

Epoch 48, lr: 0.0004892, val loss: 56.62746, acc: 80.50000



Training:  54%|█████▍    | 49/90 [26:29<20:47, 30.42s/it]

Epoch 49, lr: 0.0004887, val loss: 58.80830, acc: 79.39000

saved epoch
Click here to download run  : 


Training:  56%|█████▌    | 50/90 [27:47<29:43, 44.58s/it]

Epoch 50, lr: 0.0004882, val loss: 56.37315, acc: 80.37000



Training:  57%|█████▋    | 51/90 [28:17<26:07, 40.20s/it]

Epoch 51, lr: 0.0004878, val loss: 52.76902, acc: 81.36000



Training:  58%|█████▊    | 52/90 [28:47<23:31, 37.14s/it]

Epoch 52, lr: 0.0004873, val loss: 53.09667, acc: 81.60000



Training:  59%|█████▉    | 53/90 [29:16<21:31, 34.90s/it]

Epoch 53, lr: 0.0004868, val loss: 52.03132, acc: 81.81000



Training:  60%|██████    | 54/90 [29:46<20:00, 33.34s/it]

Epoch 54, lr: 0.0004863, val loss: 52.79682, acc: 81.56000



Training:  61%|██████    | 55/90 [30:16<18:47, 32.23s/it]

Epoch 55, lr: 0.0004857, val loss: 52.41723, acc: 82.00000



Training:  62%|██████▏   | 56/90 [30:46<17:53, 31.58s/it]

Epoch 56, lr: 0.0004852, val loss: 52.56272, acc: 82.04000



Training:  63%|██████▎   | 57/90 [31:16<17:06, 31.12s/it]

Epoch 57, lr: 0.0004847, val loss: 51.10339, acc: 82.27000



Training:  64%|██████▍   | 58/90 [31:45<16:22, 30.69s/it]

Epoch 58, lr: 0.0004841, val loss: 53.71999, acc: 81.56000



Training:  66%|██████▌   | 59/90 [32:15<15:42, 30.41s/it]

Epoch 59, lr: 0.0004836, val loss: 49.16701, acc: 82.67000

saved epoch
Click here to download run  : 


Training:  67%|██████▋   | 60/90 [33:40<23:24, 46.81s/it]

Epoch 60, lr: 0.0004830, val loss: 50.26346, acc: 82.86000



Training:  68%|██████▊   | 61/90 [34:10<20:11, 41.77s/it]

Epoch 61, lr: 0.0004824, val loss: 49.38930, acc: 83.39000



Training:  69%|██████▉   | 62/90 [34:40<17:49, 38.20s/it]

Epoch 62, lr: 0.0004819, val loss: 47.34471, acc: 83.73000



Training:  70%|███████   | 63/90 [35:10<16:00, 35.58s/it]

Epoch 63, lr: 0.0004813, val loss: 46.48988, acc: 83.70000



Training:  71%|███████   | 64/90 [35:39<14:39, 33.82s/it]

Epoch 64, lr: 0.0004807, val loss: 48.42068, acc: 84.02000



Training:  72%|███████▏  | 65/90 [36:09<13:34, 32.60s/it]

Epoch 65, lr: 0.0004801, val loss: 46.12020, acc: 84.13000



Training:  73%|███████▎  | 66/90 [36:39<12:40, 31.69s/it]

Epoch 66, lr: 0.0004794, val loss: 45.72937, acc: 84.10000



Training:  74%|███████▍  | 67/90 [37:08<11:54, 31.08s/it]

Epoch 67, lr: 0.0004788, val loss: 45.97971, acc: 84.29000



Training:  76%|███████▌  | 68/90 [37:38<11:14, 30.67s/it]

Epoch 68, lr: 0.0004782, val loss: 45.75011, acc: 84.22000



Training:  77%|███████▋  | 69/90 [38:08<10:39, 30.46s/it]

Epoch 69, lr: 0.0004775, val loss: 46.08802, acc: 83.97000

saved epoch
Click here to download run  : 


Training:  78%|███████▊  | 70/90 [39:40<16:17, 48.85s/it]

Epoch 70, lr: 0.0004769, val loss: 46.61062, acc: 83.97000



Training:  79%|███████▉  | 71/90 [40:10<13:41, 43.22s/it]

Epoch 71, lr: 0.0004762, val loss: 45.16851, acc: 83.93000



Training:  80%|████████  | 72/90 [40:39<11:45, 39.18s/it]

Epoch 72, lr: 0.0004755, val loss: 44.51863, acc: 84.24000



Training:  81%|████████  | 73/90 [41:09<10:17, 36.31s/it]

Epoch 73, lr: 0.0004749, val loss: 43.64628, acc: 84.86000



Training:  82%|████████▏ | 74/90 [41:39<09:09, 34.37s/it]

Epoch 74, lr: 0.0004742, val loss: 44.73753, acc: 84.72000



Training:  83%|████████▎ | 75/90 [42:09<08:14, 32.95s/it]

Epoch 75, lr: 0.0004735, val loss: 43.05094, acc: 85.19000



Training:  84%|████████▍ | 76/90 [42:38<07:27, 31.94s/it]

Epoch 76, lr: 0.0004728, val loss: 43.42883, acc: 84.96000



Training:  86%|████████▌ | 77/90 [43:08<06:46, 31.26s/it]

Epoch 77, lr: 0.0004720, val loss: 43.67812, acc: 85.09000



Training:  87%|████████▋ | 78/90 [43:38<06:09, 30.78s/it]

Epoch 78, lr: 0.0004713, val loss: 43.36342, acc: 84.95000



Training:  88%|████████▊ | 79/90 [44:07<05:34, 30.44s/it]

Epoch 79, lr: 0.0004706, val loss: 42.98672, acc: 85.12000

saved epoch
Click here to download run  : 


Training:  89%|████████▉ | 80/90 [45:47<08:31, 51.15s/it]

Epoch 80, lr: 0.0004698, val loss: 40.60809, acc: 85.87000



Training:  90%|█████████ | 81/90 [46:17<06:43, 44.80s/it]

Epoch 81, lr: 0.0004691, val loss: 42.47739, acc: 85.12000



Training:  91%|█████████ | 82/90 [46:46<05:22, 40.28s/it]

Epoch 82, lr: 0.0004683, val loss: 41.10129, acc: 85.98000



Training:  92%|█████████▏| 83/90 [47:16<04:19, 37.05s/it]

Epoch 83, lr: 0.0004675, val loss: 39.57435, acc: 86.27000



Training:  93%|█████████▎| 84/90 [47:46<03:29, 34.85s/it]

Epoch 84, lr: 0.0004668, val loss: 41.62044, acc: 85.61000



Training:  94%|█████████▍| 85/90 [48:15<02:46, 33.24s/it]

Epoch 85, lr: 0.0004660, val loss: 40.01507, acc: 86.49000



Training:  96%|█████████▌| 86/90 [48:45<02:08, 32.16s/it]

Epoch 86, lr: 0.0004652, val loss: 40.38014, acc: 86.29000



Training:  97%|█████████▋| 87/90 [49:14<01:34, 31.42s/it]

Epoch 87, lr: 0.0004644, val loss: 40.06103, acc: 86.60000



Training:  98%|█████████▊| 88/90 [49:44<01:01, 30.95s/it]

Epoch 88, lr: 0.0004636, val loss: 40.01941, acc: 86.46000



Training:  99%|█████████▉| 89/90 [50:14<00:30, 30.61s/it]

Epoch 89, lr: 0.0004627, val loss: 40.54781, acc: 86.67000

saved epoch
Click here to download run  : 


Training: 100%|██████████| 90/90 [52:01<00:00, 34.69s/it]

Epoch 90, lr: 0.0004619, val loss: 40.15518, acc: 86.35000






In [151]:
# !rm -rf results
# !rm -rf log

In [160]:
import os

# Specify the file name
filename = base_dir + '/run 07.zip'

# Get the file size
file_size_bytes = os.path.getsize(filename)

# Convert to megabytes (MB) for readability
file_size_mb = file_size_bytes / (1024 * 1024)

print(f"Size of '{filename}': {file_size_bytes} bytes ({file_size_mb:.2f} MB)")

Size of 'results/runs/run 07.zip': 1295205889 bytes (1235.20 MB)


'results/runs'

In [144]:
# img_idx.detach().numpy()

In [153]:
import shutil
from IPython.display import FileLink

# Specify the directory you want to compress
directory_name = 'log'
zip_filename = 'log.zip'

# Compress the directory into a zip file, overwriting if it already exists
shutil.make_archive(zip_filename.replace('.zip', ''), 'zip', directory_name)

# Optionally generate and display a download link
print(f"Directory '{directory_name}' has been zipped as '{zip_filename}'.")
FileLink(zip_filename)


Directory 'log' has been zipped as 'log.zip'.


In [None]:

net

In [None]:

/content/results/runs/run 04/epoch 00/batch 0/layer 01

In [None]:
!ls

In [None]:
cd 

In [None]:

import os

# Define the directory path you want to check
directory_path = r'results/runs/run 03/epoch 00/batch 0/layer 01/'

# Check if the directory exists
if os.path.isdir(directory_path):
    print(f"The directory '{directory_path}' exists.")
else:
    print(f"The directory '{directory_path}' does not exist.")


In [None]:
file_path = r'results/runs/run 03/epoch 00/batch 0/layer 01/01_attention_out.npy'

# Load the NumPy array from the file
data = np.load(file_path)

In [None]:
data.shape

In [None]:

data.shape
# shape : batch x head x X x Y