### cifar-10 数据集加载

In [4]:
from datasets import load_dataset
from transformers import ViTImageProcessor

In [5]:
train_dataset = load_dataset("/data/jc/datasets/cifar-10", split="train", streaming=True)
val_dataset = load_dataset("/data/jc/datasets/cifar-10", split="test", streaming=True)

In [6]:
processor = ViTImageProcessor.from_pretrained('../weights/vit-base-patch16-224-in21k-finetuned-cifar10/')

def preprocess_function(item):
    # Resize the input image to the model's size
    inputs = processor(images=item["img"], return_tensors="pt")
    inputs["labels"] = item["label"]
    return inputs 

train_dataset = train_dataset.map(preprocess_function, remove_columns=["img"], batched=True)
val_dataset = val_dataset.map(preprocess_function, remove_columns=["img"], batched=True)

### 模型测试

#### 1.helper

In [7]:
import torch 
import numpy as np 

def collect_fn(batch):
    batch = {    
        'pixel_values': torch.stack([x['pixel_values'] for x in batch], dim=0),
        'labels': torch.tensor([x['labels'] for x in batch])    
    }
    return batch


def compulate_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    accuracy = np.mean(preds == labels)
    return {"accuracy": accuracy}

#### 2.加载模型

In [1]:
from transformers import ViTForImageClassification

model = ViTForImageClassification.from_pretrained('../weights/vit-base-patch16-224-in21k-finetuned-cifar10')

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# for name, param in model.named_parameters():
#     param.requires_grad = False
#     if any(nd in name for nd in ['norm', 'head', 'patch_embed', 'downsample']):
#         continue
#     print(name, param.requires_grad)

#### 3. 测试完整模型

In [15]:
from transformers import Trainer
from transformers import TrainingArguments

trainer = Trainer(
    model=model,
    data_collator=collect_fn,
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
    compute_metrics=compulate_metrics,
    args = TrainingArguments(
        per_device_train_batch_size=64,
        per_device_eval_batch_size=400,
        output_dir="./logs",
        max_steps=1000,
    )
)

trainer.evaluate()

max_steps is given, it will override any value given in num_train_epochs
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


{'eval_loss': 0.2564162611961365,
 'eval_accuracy': 0.9788,
 'eval_runtime': 30.7967,
 'eval_samples_per_second': 324.711,
 'eval_steps_per_second': 0.227}

#### 4. 测试低秩模型

In [1]:
import sys 
sys.path.append('../')
import low_rank
from transformers import ViTForImageClassification

model = ViTForImageClassification.from_pretrained('../weights/vit-base-patch16-224-in21k-finetuned-cifar10')

count_params = sum(p.numel() for p in model.parameters())
model_lr_transform = low_rank.ModuleLowRank(compress_ratio=2, 
                                name_omit=['norm', 'head', 'patch_embed', 'downsample'],
                                is_approximate=True)
low_rank_model = model_lr_transform(model)
count_lr_params = sum(p.numel() for p in low_rank_model.parameters())

print(f'Original model params: {count_params}, Low rank model params: {count_lr_params}')

  from .autonotebook import tqdm as notebook_tqdm


applying low rank on vit.encoder.layer.0.attention.attention.query
applying low rank on vit.encoder.layer.0.attention.attention.key
applying low rank on vit.encoder.layer.0.attention.attention.value
applying low rank on vit.encoder.layer.0.attention.output.dense
applying low rank on vit.encoder.layer.0.intermediate.dense
applying low rank on vit.encoder.layer.0.output.dense
applying low rank on vit.encoder.layer.1.attention.attention.query
applying low rank on vit.encoder.layer.1.attention.attention.key
applying low rank on vit.encoder.layer.1.attention.attention.value
applying low rank on vit.encoder.layer.1.attention.output.dense
applying low rank on vit.encoder.layer.1.intermediate.dense
applying low rank on vit.encoder.layer.1.output.dense
applying low rank on vit.encoder.layer.2.attention.attention.query
applying low rank on vit.encoder.layer.2.attention.attention.key
applying low rank on vit.encoder.layer.2.attention.attention.value
applying low rank on vit.encoder.layer.2.attent

In [3]:
sv_params = 0
for name, param in low_rank_model.named_parameters():
    if 'sv' in name:
        sv_params += param.numel()

print(f'Singular value params: {sv_params}')

Singular value params: 16588


In [8]:
from transformers import Trainer
from transformers import TrainingArguments

trainer = Trainer(
    model=low_rank_model,
    data_collator=collect_fn,
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
    compute_metrics=compulate_metrics,
    args = TrainingArguments(
        per_device_train_batch_size=64,
        per_device_eval_batch_size=400,
        output_dir="./logs",
        max_steps=1000,
    )
)

trainer.evaluate()

max_steps is given, it will override any value given in num_train_epochs
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


{'eval_loss': 2.0316827297210693,
 'eval_accuracy': 0.6297,
 'eval_runtime': 30.5201,
 'eval_samples_per_second': 327.653,
 'eval_steps_per_second': 0.229}