In [10]:
import torch, evaluate, numpy as np, wandb
from datasets import load_dataset
from transformers import AutoImageProcessor, ViTHybridImageProcessor, TrainingArguments
from sklearn.metrics import confusion_matrix, f1_score
from torchvision.transforms import (
    Compose,
    
    GaussianBlur,
    ColorJitter,
    RandomAffine,
    RandomVerticalFlip,
    RandomRotation,
    RandomHorizontalFlip,
    RandomResizedCrop,

    CenterCrop,
    Resize,
    Normalize,
    ToTensor,
)

In [2]:
#seed = torch.random.initial_seed()
seed = 14835532971117265721
torch.manual_seed(14835532971117265721)

#Parameters
testset_size = 0.2
epoch = 1
batch_size = 8
lr = 5e-5
name = f"Test_{epoch}E_{batch_size}B_{lr}_{testset_size}"

ds = load_dataset("imagefolder", data_dir="/home/dxd_jy/joel/Capstone/Dataset/Reject/Too Little")

Resolving data files:   0%|          | 0/6804 [00:00<?, ?it/s]

In [3]:
labels1 = ds["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels1):
    label2id[label] = i
    id2label[i] = label

model_checkpoint = "google/vit-hybrid-base-bit-384"
image_processor = ViTHybridImageProcessor.from_pretrained(model_checkpoint)

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
if "height" in image_processor.size:
    size = (image_processor.size["height"], image_processor.size["width"])
    crop_size = size
    max_size = None
elif "shortest_edge" in image_processor.size:
    size = image_processor.size["shortest_edge"]
    crop_size = (size, size)
    max_size = image_processor.size.get("longest_edge")

# def random_kernel_size():
#     return random.randint(1,3) * 2 + 1

In [4]:
train_transforms = Compose(
        [
            # GaussianBlur(kernel_size=random_kernel_size(), sigma=(0.01, 1)),
            # ColorJitter(brightness=(0.5,2), contrast=(0.5,3), saturation=(0.1,3), hue=(-0.5,0.5))
            # RandomAffine(degrees=90),
            # RandomVerticalFlip(),
            # RandomRotation(degrees=90),
            RandomHorizontalFlip(),
            RandomResizedCrop(crop_size),
            ToTensor(),
            normalize,
        ]
    )

val_transforms = Compose(
        [
            Resize(size),
            CenterCrop(crop_size),
            ToTensor(),
            normalize,
        ]
    )

def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch

def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
    return example_batch

In [5]:
# split up training into training + validation
splits = ds["train"].train_test_split(test_size=testset_size, stratify_by_column="label", seed=seed)
train_ds = splits['train']
val_ds = splits['test']
train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)

In [6]:
# split up training into training + validation
splits = ds["train"].train_test_split(test_size=testset_size, stratify_by_column="label", seed=42)
train_ds = splits['train']
val_ds = splits['test']
splits2 = val_ds.train_test_split(test_size=0.5, stratify_by_column="label", seed=42)
val_ds = splits2['train']
eval_ds = splits2['test']
train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val) 
eval_ds.set_transform(preprocess_val)

In [7]:
eval_ds

Dataset({
    features: ['image', 'label'],
    num_rows: 681
})

In [None]:
val_ds[1]['image']

In [None]:
eval_ds[1]['image']

In [8]:
   
model = ViTHybridImageProcessor.from_pretrained(
    model_checkpoint, 
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes = True, # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
)
model_name = model_checkpoint.split("/")[-1]
args = TrainingArguments(
    f"{model_name}-{name}",
    remove_unused_columns=          False,
    evaluation_strategy=            "epoch",
    save_strategy=                  "epoch",
    learning_rate=                  lr,
    per_device_train_batch_size=    batch_size,
    gradient_accumulation_steps=    4,
    per_device_eval_batch_size=     batch_size,
    num_train_epochs=               epoch,
    warmup_ratio=                   0.1,
    log_level=                      "info",
    logging_steps=                  1,
    logging_strategy=               "epoch",
    load_best_model_at_end=         True,
    metric_for_best_model=          "accuracy",
    report_to=                      "wandb"
)


KeyError: 'vit-hybrid'

In [None]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")
#cfm = evaluate.load("confusion_matrix")
# metric = evaluate.load("accuracy")
# cfm = evaluate.load("BucketHeadP65/confusion_matrix")

def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""

    labels = eval_pred.label_ids
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy_results = accuracy.compute(predictions=predictions, references=labels)
    f1_results = f1.compute(predictions=predictions, references=labels, average="weighted")
    #cfm_results = cfm.compute(predictions=predictions, references=labels)
    cm = confusion_matrix(predictions, labels)
    print(cm)
    combined_results = {**accuracy_results, **f1_results}#, **cfm_results}
    # print(combined_results)
    # print(type(combined_results['f1']))
 
    f1score = f1_score(labels, predictions, average="weighted")
    wandb.log({"f1_score": f1score})
    
    wandb.log({f"my_conf_mat_{name}" : wandb.plot.confusion_matrix( 
            preds=predictions, y_true=labels,
            class_names=id2label
    )})

    return combined_results

def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}


In [None]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)

wandb.init(project="huggingface", 
           name=name,
           config={
               "Model_Name": name,
               "Labels": id2label,
               "Epochs": epoch,
               "Batch_size": batch_size,
               "Learning_Rate": lr,
               "Test_Size": testset_size,
               "Train_Size": len(train_ds),
               "Total Test Data": len(val_ds),
               "Pytorch GPU": torch.cuda.is_available(),
               #"Seed": seed
           })

train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()


In [None]:

metrics = trainer.evaluate()
print(metrics)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)