In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install timm==0.4.12
!git clone https://github.com/Chungchih/ViT_Training.git
%cd ViT_Training

fatal: destination path 'ViT_Training' already exists and is not an empty directory.
/content/ViT_Training


In [4]:
!pip install bitsandbytes==0.43.3



In [5]:
from transformers import AutoFeatureExtractor, AutoModelForImageClassification, BitsAndBytesConfig
import torch
import torch.nn as nn
import bitsandbytes as bnb
import time
import utils


In [6]:
# Configure 8-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    #bnb_4bit_compute_dtype=torch.float16
)

# Load the model with 8-bit quantization
model = AutoModelForImageClassification.from_pretrained(
    "facebook/deit-tiny-patch16-224",  # example model name
    quantization_config=quantization_config
)

# Load the feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained(
    "facebook/deit-tiny-patch16-224"
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [7]:
class NoiseQuantizedLinear8Bit(nn.Module):
    def __init__(self, original_linear, x_noise_level=0.01, w_noise_level=0.01, quantization_bits=8):
        super().__init__()
        self.original_linear = original_linear
        self.x_noise_level = x_noise_level
        self.w_noise_level = w_noise_level
        self.quantization_bits = quantization_bits

    def add_quantization_noise(self, x, noise_level=0.01):
        x_float = x.float()
        # Generate noise
        noise = torch.randn_like(x_float) * noise_level #* torch.std(x_float)

        # Add noise and clip
        noisy_x = x * (1+noise)
        #noisy_x = torch.clamp(noisy_x, 0, 2**self.quantization_bits-1)  # Assume 8-bit quantization

        return noisy_x

    def forward(self, x):

        weights = self.original_linear.weight.data
        weights_noisy = self.add_quantization_noise(weights, self.w_noise_level)
        self.original_linear.weight.data = weights_noisy

        # Add noise to quantized tensors
        x_noisy = self.add_quantization_noise(x, self.x_noise_level)

        # Perform linear transformation
        output = self.original_linear(x_noisy.float())

        #output = self.quantize_tensor(output, 16)
        return output

def add_noise_to_8bit_model(model, x_noise_level=0.01, w_noise_level=0.01, quantization_bits=8):
    for name, module in model.named_children():

        if hasattr(module, 'weight') and module.__class__.__name__ == 'Linear8bitLt':
            #print(name)
            setattr(model, name, NoiseQuantizedLinear8Bit(original_linear=module, x_noise_level=x_noise_level,
                                    w_noise_level=w_noise_level, quantization_bits=8))
        else:
            add_noise_to_8bit_model(module, x_noise_level, w_noise_level, quantization_bits)
    return model

#model = add_noise_to_8bit_model(model, 0.1, 4)


In [8]:
import os

from torchvision import datasets, transforms

#from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from timm.data import create_transform
import numpy as np


def imgFolder_mapping(data_path,):
    folder_classes = sorted(os.listdir(os.path.join(data_path, 'train')))
    folder_to_model_idx = {}
    original_classes_names = np.genfromtxt(r'/content/ViT_Training/LOC_synset_mapping.txt', delimiter='!', dtype='str')
    original_classes_names = list(map(lambda x: x[0],map(str.split,original_classes_names)))
    for idx, name in enumerate(folder_classes):
        original_idx = original_classes_names.index(name)
        folder_to_model_idx[idx] = original_idx
    return folder_to_model_idx

def trans_idx(idx):
    folder_to_model_idx = imgFolder_mapping(r'/content/drive/MyDrive/mini_imagenet')
    return folder_to_model_idx[idx]

def build_transform(is_train=False):
    resize_im = 224 > 32
    if is_train:
        # this should always dispatch to transforms_imagenet_train
        transform = create_transform(
            input_size=224,
            is_training=True,
            color_jitter=0.3,
            auto_augment='rand-m9-mstd0.5-inc1',
            interpolation='bicubic',
            re_prob=0.25,
            re_mode='pixel',
            re_count=1,
        )
        if not resize_im:
            # replace RandomResizedCropAndInterpolation with
            # RandomCrop
            transform.transforms[0] = transforms.RandomCrop(
                224, padding=4)
        return transform

    t = []
    if True:
        size = int(224 / 0.875)
        t.append(
            transforms.Resize(size, interpolation=3),  # to maintain same ratio w.r.t. 224 images
        )
        t.append(transforms.CenterCrop(224))

    t.append(transforms.ToTensor())
    #t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))
    t.append(transforms.Normalize(feature_extractor.image_mean, feature_extractor.image_std))
    return transforms.Compose(t)

def build_dataset(is_train, path):
    transform = build_transform(is_train,)

    root = os.path.join(path, 'train' if is_train else 'val')
    dataset = datasets.ImageFolder(root, transform=transform, target_transform=trans_idx)
    nb_classes = 1000

    return dataset, nb_classes

#path = 'C:/Computing/mini_imagenet'
#path = r'C:/Computing/zhuhanqing-Lightening-Transformer-25e9859/imagenet-object-localization-challenge/ILSVRC/Data/CLS-LOC/'
path = '/content/drive/MyDrive/mini_imagenet'

dataset_train, nb_classes = build_dataset(is_train=1, path=path)
dataset_val, _ = build_dataset(is_train=0, path=path)

sampler_val = torch.utils.data.SequentialSampler(dataset_val)

In [9]:
from timm.utils import accuracy
def evaluate(data_loader_val, model, device):
    criterion = torch.nn.CrossEntropyLoss()

    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    # switch to evaluation mode
    model.eval()

    num_images = 0
    for images, target in metric_logger.log_every(data_loader_val, 10, header):
        images = images.to(device, non_blocking=True)
        target = target.to(device, non_blocking=True)
        num_images += images.shape[0]

        # compute output
        with torch.amp.autocast('cuda'):
            with torch.no_grad():
                output = model(images).logits
                loss = criterion(output, target)

        acc1, acc5 = accuracy(output, target, topk=(1, 5))

        batch_size = images.shape[0]
        metric_logger.update(loss=loss.item())
        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
        metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
          .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))

    return {k: meter.global_avg for k, meter in metric_logger.meters.items()}

from samplers import RASampler
num_tasks = utils.get_world_size()
global_rank = utils.get_rank()

sampler_train = RASampler(
    dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True
)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, sampler=sampler_val,
    batch_size=int(512),
    num_workers=8,
    pin_memory=True,
    drop_last=False

)
device = 'cuda'
'''
x_noise_level = 0.1
w_noise_level = 0.1

model = AutoModelForImageClassification.from_pretrained(
    "facebook/deit-tiny-patch16-224",  # example model name
    quantization_config=quantization_config)

model = add_noise_to_8bit_model(model=model, x_noise_level=x_noise_level, w_noise_level=w_noise_level, quantization_bits=8)

resevaluate(data_loader_val, model, device='cuda')
'''
results = []
output_dir = '/content/drive/MyDrive/mini_imagenet/finetune'

import json
for x_noise in np.linspace(0,0,1):
  for w_noise in np.linspace(0.,0.3,2):
    model = AutoModelForImageClassification.from_pretrained("facebook/deit-tiny-patch16-224", quantization_config=quantization_config)
    model = add_noise_to_8bit_model(model, x_noise_level=x_noise, w_noise_level=w_noise, quantization_bits=8)
    result = evaluate(data_loader_val, model, device='cuda')
    results.append(result)
    torch.cuda.empty_cache()

    with open('/content/drive/MyDrive/mini_imagenet/finetune/q8_log.txt','a') as f:
      f.write(json.dumps(result)+'\n')

with open('/content/drive/MyDrive/mini_imagenet/finetune/q8_log_all.txt','a') as f:
  f.write(json.dumps(results)+'\n')





`low_cpu_mem_usage` was None, now default to True since model is quantized.


Test:  [ 0/24]  eta: 0:46:53  loss: 1.0992 (1.0992)  acc1: 91.7969 (91.7969)  acc5: 98.0469 (98.0469)  time: 117.2112  data: 106.9824  max mem: 1911
Test:  [10/24]  eta: 0:03:56  loss: 1.6106 (1.5797)  acc1: 75.7812 (77.9652)  acc5: 94.3359 (93.4837)  time: 16.9214  data: 13.9041  max mem: 1934
Test:  [20/24]  eta: 0:00:47  loss: 1.7441 (1.6776)  acc1: 73.2422 (74.8419)  acc5: 91.4062 (92.2061)  time: 6.7185  data: 4.5620  max mem: 1934
Test:  [23/24]  eta: 0:00:10  loss: 1.7441 (1.6522)  acc1: 73.2422 (75.3500)  acc5: 91.4062 (92.6333)  time: 6.5042  data: 4.5597  max mem: 1934


`low_cpu_mem_usage` was None, now default to True since model is quantized.


Test: Total time: 0:04:15 (10.6385 s / it)
* Acc@1 75.350 Acc@5 92.633 loss 1.652
Test:  [ 0/24]  eta: 0:32:56  loss: 1.0992 (1.0992)  acc1: 91.7969 (91.7969)  acc5: 98.0469 (98.0469)  time: 82.3531  data: 79.8022  max mem: 1935
Test:  [10/24]  eta: 0:03:38  loss: 1.6106 (1.5797)  acc1: 75.7812 (77.9652)  acc5: 94.3359 (93.4837)  time: 15.6115  data: 13.5312  max mem: 1958
Test:  [20/24]  eta: 0:00:45  loss: 1.7441 (1.6776)  acc1: 73.2422 (74.8419)  acc5: 91.4062 (92.2061)  time: 7.9334  data: 5.8882  max mem: 1958
Test:  [23/24]  eta: 0:00:10  loss: 1.7441 (1.6522)  acc1: 73.2422 (75.3500)  acc5: 91.4062 (92.6333)  time: 7.7137  data: 5.8881  max mem: 1958
Test: Total time: 0:04:04 (10.2071 s / it)
* Acc@1 75.350 Acc@5 92.633 loss 1.652


In [10]:
torch.cuda.empty_cache()
!nvidia-smi

Tue Dec  3 08:25:34 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P0              35W /  70W |    405MiB / 15360MiB |      2%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    