# 测试Large完整模型的性能

## 载入数据和模型

In [1]:
from datasets import load_dataset

dataset = load_dataset(path="imagefolder", data_dir="E:/jupyter/VIT_example/dataset/hfdataset_Mini") #路径不能有中文

Resolving data files:   0%|          | 0/3768 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/1612 [00:00<?, ?it/s]

In [2]:

from transformers import AutoImageProcessor

# 加载图像处理器
image_processor = AutoImageProcessor.from_pretrained("google/vit-large-patch32-384")

def transforms(examples):
    images = [img.convert("RGB").resize((384, 384)) for img in examples["image"]]  # 图片被转换为RGB通道，同时缩放至224*224
    
    examples["pixel_values"] = image_processor(images, return_tensors="pt")["pixel_values"] # 使用image_processor处理图像，生成pixel_values（张量图像）
    return examples

dataset.set_transform(transforms)
dataset['train'][0].keys()

import torch

def collate_fn(batch): # batch应包含pixel_values和labels
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]), # 返回堆叠的图像张量
        'labels': torch.tensor([x['labels'] for x in batch]) # 返回堆叠的标签张量
    }

import numpy as np
import evaluate

accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")

def compute_metrics(p):
    # 计算预测结果
    predictions = np.argmax(p.predictions, axis=1)

    # 计算各个指标
    accuracy = accuracy_metric.compute(predictions=predictions, references=p.label_ids)
    f1 = f1_metric.compute(predictions=predictions, references=p.label_ids, average='weighted')
    precision = precision_metric.compute(predictions=predictions, references=p.label_ids, average='weighted')
    recall = recall_metric.compute(predictions=predictions, references=p.label_ids, average='weighted')

    # 输出结果
    return {
        'accuracy': accuracy['accuracy'],
        'f1': f1['f1'],
        'precision': precision['precision'],
        'recall': recall['recall']
    }


for split in dataset:
    dataset[split] = dataset[split].rename_column('label', 'labels') # 将 'label' 特征名改为 'labels'

In [3]:
from transformers import ViTForImageClassification

model_name = 'google/vit-large-patch32-384' # 指定VIT模型

In [4]:
from diffusers import DiffusionPipeline
model = ViTForImageClassification.from_pretrained("./vit-large-covid-Full")

In [5]:
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="./vit-base-covid", # 模型保存的目录
  per_device_train_batch_size=16, # 越大模型越稳定但需要消耗更大显存
  evaluation_strategy="steps", #模型评估策略，steps表示每过一定次数评估一次模型
  num_train_epochs=100, #训练轮数
  fp16=True, #半精度浮点，可减少内存占用，需要设备支持
  save_steps=100, #多少步保存一次模型
  eval_steps=100, #多少步评估一次模型
  logging_steps=3, #多少步评估一次日志
  learning_rate=1e-5, #学习率 越低越容易过拟合，越高收敛越慢
  save_total_limit=3, #保存的模型总数
  remove_unused_columns=False, #删除未使用列，减少内存消耗
  push_to_hub=False, #是否将模型发布到hf社区
  report_to='tensorboard', #日志的报告地
  load_best_model_at_end=True, #是否自动加载最优模型
  ignore_data_skip=True #从断点继续训练模型
)

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=image_processor,
    
)


## 基准测试

In [6]:
metrics = trainer.evaluate(dataset['test'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)


***** eval metrics *****
  eval_accuracy           =     0.9615
  eval_f1                 =     0.9613
  eval_loss               =       0.19
  eval_precision          =     0.9613
  eval_recall             =     0.9615
  eval_runtime            = 0:00:33.31
  eval_samples_per_second =     48.384
  eval_steps_per_second   =      6.063


## 外部验证

In [7]:
from datasets import load_dataset

Outerdataset = load_dataset(path="imagefolder", data_dir="E:/jupyter/VIT_example/dataset/Outerdataset") #路径不能有中文
Outerdataset.set_transform(transforms)
# Outerdataset['test'][0].keys()
for split in Outerdataset:
    Outerdataset[split] = Outerdataset[split].rename_column('label', 'labels') # 将 'label' 特征名改为 'labels'

Resolving data files:   0%|          | 0/312 [00:00<?, ?it/s]

In [8]:
metrics = trainer.evaluate(Outerdataset["test"])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** eval metrics *****
  eval_accuracy           =     0.3782
  eval_f1                 =     0.3879
  eval_loss               =     7.0652
  eval_precision          =     0.4138
  eval_recall             =     0.3782
  eval_runtime            = 0:00:15.36
  eval_samples_per_second =     20.306
  eval_steps_per_second   =      2.538


  _warn_prf(average, modifier, msg_start, len(result))


## 对特定图片进行预测

In [10]:
num=1
image=Outerdataset["test"][num]["image"]
Outerdataset["test"][num]

{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=1458x1303>,
 'labels': 0,
 'pixel_values': tensor([[[-0.6392, -0.6314, -0.6235,  ..., -0.9843, -0.9843, -0.9843],
          [-0.6078, -0.6000, -0.6000,  ..., -0.9843, -0.9843, -0.9843],
          [-0.5922, -0.5843, -0.5765,  ..., -0.9843, -0.9843, -0.9843],
          ...,
          [-0.1216, -0.0980, -0.0824,  ..., -0.3647, -0.3804, -0.3882],
          [-0.1137, -0.0980, -0.0902,  ..., -0.3725, -0.3804, -0.3961],
          [-0.1137, -0.0980, -0.0824,  ..., -0.3647, -0.3804, -0.3961]],
 
         [[-0.6392, -0.6314, -0.6235,  ..., -0.9843, -0.9843, -0.9843],
          [-0.6078, -0.6000, -0.6000,  ..., -0.9843, -0.9843, -0.9843],
          [-0.5922, -0.5843, -0.5765,  ..., -0.9843, -0.9843, -0.9843],
          ...,
          [-0.1216, -0.0980, -0.0824,  ..., -0.3647, -0.3804, -0.3882],
          [-0.1137, -0.0980, -0.0902,  ..., -0.3725, -0.3804, -0.3961],
          [-0.1137, -0.0980, -0.0824,  ..., -0.3647, -0.3804, -0.3961

In [16]:
from transformers import pipeline

classifier = pipeline("image-classification", model='./vit-large-covid-Full' )
classifier(image)

[{'score': 0.9999901056289673, 'label': 'Covid'},
 {'score': 6.601129825867247e-06, 'label': 'Lung Opacity'},
 {'score': 2.090611815219745e-06, 'label': 'Normal'},
 {'score': 1.2494034535848186e-06, 'label': 'Viral Pneumonia'}]

## 进行Covid与非Covid的二分类预测

In [17]:
import numpy as np
def compute_metrics(p):
    # 将预测结果中的所有非0（非Covid）标签转换为1
    predictions = np.argmax(p.predictions, axis=1)
    binary_predictions = np.where(predictions == 0, 0, 1)

    # 将真实标签中的所有非0（非Covid）标签转换为1
    binary_references = np.where(p.label_ids == 0, 0, 1)

    # 计算二分类的各个指标
    accuracy = accuracy_metric.compute(predictions=binary_predictions, references=binary_references)['accuracy']
    f1 = f1_metric.compute(predictions=binary_predictions, references=binary_references, average='binary')['f1']
    precision = precision_metric.compute(predictions=binary_predictions, references=binary_references, average='binary')['precision']
    recall = recall_metric.compute(predictions=binary_predictions, references=binary_references, average='binary')['recall']

    # 返回计算的指标
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="./vit-simple-covid", # 模型保存的目录
  per_device_train_batch_size=16, # 越大模型越稳定但需要消耗更大显存
  evaluation_strategy="steps", #模型评估策略，steps表示每过一定次数评估一次模型
  num_train_epochs=100, #训练轮数
  fp16=True, #半精度浮点，可减少内存占用，需要设备支持
  save_steps=100, #多少步保存一次模型
  eval_steps=100, #多少步评估一次模型
  logging_steps=3, #多少步评估一次日志
  learning_rate=2e-4, #学习率 越低越容易过拟合，越高收敛越慢
  save_total_limit=10, #保存的模型总数
  remove_unused_columns=False, #删除未使用列，减少内存消耗
  push_to_hub=False, #是否将模型发布到hf社区
  report_to='tensorboard', #日志的报告地
  load_best_model_at_end=True, #是否自动加载最优模型
  ignore_data_skip=True #从断点继续训练模型
)
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=dataset["train"],
    eval_dataset=Outerdataset["test"],
    tokenizer=image_processor,
    
)


In [18]:
metrics = trainer.evaluate(Outerdataset['test'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)


***** eval metrics *****
  eval_accuracy           =     0.9199
  eval_f1                 =     0.9315
  eval_loss               =     7.0652
  eval_precision          =     0.9189
  eval_recall             =     0.9444
  eval_runtime            = 0:00:15.15
  eval_samples_per_second =     20.581
  eval_steps_per_second   =      2.573
