In [7]:
!pip install pandas transformers accelerate datasets evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
Installing collected packages: evaluate
Successfully installed evaluate-0.4.3
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


### 이미지를 클래스별로 폴더로 분류

In [1]:
import os
import shutil
import pandas as pd

In [2]:
def sort_images_by_label(csv_path, image_dir, output_base_dir, val_ratio=0.1, random_seed=42):
    """
    CSV 파일의 label에 따라 이미지들을 분류하고 train/val로 나누어 저장하는 함수
    
    Args:
        csv_path (str): Label 정보가 있는 CSV 파일 경로
        image_dir (str): 원본 이미지들이 있는 디렉토리 경로
        output_base_dir (str): 분류된 이미지들을 저장할 기본 디렉토리 경로
        val_ratio (float): 검증 셋의 비율 (기본값: 0.1)
        random_seed (int): 랜덤 시드 (기본값: 42)
    """
    # CSV 파일 읽기
    df = pd.read_csv(csv_path)
    
    # train과 validation 디렉토리 생성
    train_dir = os.path.join(output_base_dir, 'train')
    val_dir = os.path.join(output_base_dir, 'val')
    
    # 각 분류별 하위 디렉토리 경로 설정
    directories = {
        'train': {
            'good': os.path.join(train_dir, 'good'),
            'bad': os.path.join(train_dir, 'bad')
        },
        'val': {
            'good': os.path.join(val_dir, 'good'),
            'bad': os.path.join(val_dir, 'bad')
        }
    }
    
    # 모든 디렉토리 생성
    for split_dirs in directories.values():
        for dir_path in split_dirs.values():
            if os.path.exists(dir_path):
                shutil.rmtree(dir_path)
            os.makedirs(dir_path)
    
    # good과 bad 데이터 분리
    good_df = df[df['bad'] == 0]
    bad_df = df[df['bad'] == 1]
    
    # train/val 분할을 위한 함수
    def split_and_copy_files(sub_df, label):
        # 데이터 섞기
        sub_df = sub_df.sample(frac=1, random_state=random_seed)
        
        # train/val 분할 인덱스 계산
        val_size = int(len(sub_df) * val_ratio)
        val_df = sub_df[:val_size]
        train_df = sub_df[val_size:]
        
        # 파일 복사 함수
        def copy_files(split_df, split_type):
            for _, row in split_df.iterrows():
                src_path = os.path.join(image_dir, row['file_name'])
                dst_dir = directories[split_type][label]
                if os.path.exists(src_path):
                    shutil.copy2(src_path, dst_dir)
        
        # 파일 복사 실행
        copy_files(train_df, 'train')
        copy_files(val_df, 'val')
        
        return len(train_df), len(val_df)
    
    # good과 bad 각각에 대해 분할 및 복사 실행
    good_train_count, good_val_count = split_and_copy_files(good_df, 'good')
    bad_train_count, bad_val_count = split_and_copy_files(bad_df, 'bad')
    
    # 결과 출력
    print("\nImage sorting and splitting completed!")
    print("\nTraining set:")
    print(f"- Good images: {good_train_count}")
    print(f"- Bad images: {bad_train_count}")
    print(f"- Total: {good_train_count + bad_train_count}")
    
    print("\nValidation set:")
    print(f"- Good images: {good_val_count}")
    print(f"- Bad images: {bad_val_count}")
    print(f"- Total: {good_val_count + bad_val_count}")
    
    # 각 디렉토리의 실제 파일 수 확인
    print("\nActual files in directories:")
    for split_name, split_dirs in directories.items():
        print(f"\n{split_name.capitalize()} set:")
        for label, dir_path in split_dirs.items():
            print(f"- {label}: {len(os.listdir(dir_path))}")

In [3]:
csv_path = "./task1/train_labels.csv"
image_dir = "./task1/train/"
output_dir = "./task1/sorted_images"

sort_images_by_label(csv_path, image_dir, output_dir, val_ratio=0.1, random_seed=42)


Image sorting and splitting completed!

Training set:
- Good images: 1459
- Bad images: 1692
- Total: 3151

Validation set:
- Good images: 162
- Bad images: 187
- Total: 349

Actual files in directories:

Train set:
- good: 1459
- bad: 1692

Val set:
- good: 162
- bad: 187


In [8]:
import warnings
warnings.filterwarnings('ignore')
import random
import torch
import numpy as np
import os
os.environ['TRANSFORMERS_CACHE'] = './models'
from datasets import load_dataset
from evaluate import load
from transformers import ViTForImageClassification, ViTImageProcessor, ViTConfig, TrainingArguments, Trainer

In [9]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(42)

In [10]:
dataset = load_dataset("imagefolder", data_dir="./task1/sorted_images/")
dataset = dataset.rename_column("label", "labels")

Resolving data files:   0%|          | 0/3151 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/349 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/3151 [00:00<?, ?files/s]

Downloading data:   0%|          | 0/349 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [11]:
model_name = "google/vit-base-patch16-224"
processor = ViTImageProcessor.from_pretrained(model_name)

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

In [12]:
def transform(example_batch):
    inputs = processor([x for x in example_batch['image']], return_tensors='pt')
    inputs['labels'] = example_batch['labels']
    return inputs

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [13]:
prepared_ds = dataset.with_transform(transform).shuffle()

metric = load("accuracy", trust_remote_code=True)
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [14]:
labels = dataset['train'].features['labels'].names

model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True
)

config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
training_args = TrainingArguments(
    output_dir="./results/vit-experience-1",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=2,
    num_train_epochs=200,
    learning_rate=5e-5,
    remove_unused_columns=False,
    label_smoothing_factor=0.1,
    warmup_ratio=0.1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    seed=42
)

In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],
    tokenizer=processor,
)

In [17]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

Epoch,Training Loss,Validation Loss,Accuracy
0,0.6067,0.562575,0.753582
1,0.4291,0.368955,0.908309
2,0.272,0.247296,0.988539
4,0.2102,0.209612,0.997135
5,0.1986,0.207368,0.997135
6,0.1976,0.206602,0.997135
8,0.2051,0.205989,0.997135
9,0.1967,0.205719,0.997135
10,0.1966,0.205491,0.997135
12,0.2048,0.205357,0.997135


KeyboardInterrupt: 

In [18]:
metrics = trainer.evaluate(prepared_ds['validation'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** eval metrics *****
  eval_accuracy =    1.0
  eval_loss     = 0.2014
