In [1]:
import torch
from PIL import Image
from torchvision import datasets, transforms
from transformers import Trainer, TrainingArguments, EvaluationStrategy

from src.lisa.model.efficientnet import EfficientNetModify

In [2]:
image_size = 224

#### read image

In [3]:
image_path = "data/makeup color test/Train/face color test/3.jpg"
img = Image.open(image_path)

#### image process

In [4]:
def imageclassify_collect_fn(batch):
    data, labels = zip(*batch)
    data = torch.stack(data,0)
    return {"inputs":data, "labels":torch.tensor(labels)}

In [5]:
# 準備訓練資料
# ImageFolder假設指定路徑下有多個資料夾，每個資料夾內為同一類的圖片，資料加名稱類別名
data_path = "data/makeup color test/Train/"
train_dataset = datasets.ImageFolder(data_path,
                                     transforms.Compose([
                                        transforms.Resize(image_size),
                                        transforms.CenterCrop(image_size),
                                        transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                     ]))

#### modify EfficientNet

In [6]:
model_new = EfficientNetModify.from_pretrained('efficientnet-b0', num_classes = 5, include_top=False)

Loaded pretrained weights for efficientnet-b0


#### Trainer

In [7]:
args_dict = {
    "num_train_epochs":1,          # 訓練代數
    "per_device_train_batch_size":2,    # train時的batch size
    "per_device_eval_batch_size":2,    # eval時的batch size
    "gradient_accumulation_steps":1,    # 每幾個batch update一次參數
    "warmup_steps":500,          # 前幾個batch要做warm up
    "weight_decay":0.00001,          # learning rate
    "eval_steps":500,            # 每幾個step要eval 預設500
    "save_steps":500,            # 每幾個step要save 預設500
    "logging_steps":100,
    "evaluation_strategy":EvaluationStrategy.STEPS,   # 用STEPS來判斷是否要eval
    "dataloader_num_workers":0,      # 開幾個CPU做dataloader
}

In [8]:
train_setting = {
                # -----data setting-----
                "train_dataset" : train_dataset,
#                 "eval_dataset" : valid_dataset,
#                 "train_sampler" : weight_sampler,
#                 "compute_metrics":metric.entity_mention_metric
                }

In [9]:
training_args = TrainingArguments(
                                  output_dir="./results",  # 輸出模型的資料夾
                                  **args_dict
                                 )

In [10]:
# get trainer
trainer = Trainer(
                    model=model_new,
                    args=training_args,
                    train_dataset = train_dataset,
                    data_collator = imageclassify_collect_fn
                  )

In [11]:
trainer.train()

Step,Training Loss,Validation Loss


TrainOutput(global_step=5, training_loss=1.6374141693115234)