In [1]:
import os
import shutil
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/dogvscat

Mounted at /content/drive
/content/drive/MyDrive/dogvscat


In [None]:
!pip install transformers peft torch datasets scikit-learn


In [3]:
!ls

 binary_classification	 images.tar.gz		     test_binary       train_multiclass
 dataset.ipynb		 multiclass_classification   test_multiclass   Untitled0.ipynb
 images			 resnet-E.ipynb（副本）      train_binary     'Vit Pet.ipynb'


In [4]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [20]:
import os
import time
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import ViTImageProcessor, AutoModelForImageClassification, AdamW
from peft import PeftConfig, PeftModel
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm

# Load Google Drive paths
train_multiclass_data_dir = "./train_multiclass"
test_multiclass_data_dir = "./test_multiclass"

# Load the processor and model
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
repo_name = 'alanahmet/vit-base-patch16-224-finetuned-lora-oxfordPets'

classes = os.listdir(train_multiclass_data_dir)
multiclass_label2id = {c: idx for idx, c in enumerate(classes)}
multiclass_id2label = {idx: c for idx, c in enumerate(classes)}

# Load model configuration and inference model
config = PeftConfig.from_pretrained(repo_name)
model = AutoModelForImageClassification.from_pretrained(
    config.base_model_name_or_path,
    label2id=multiclass_label2id,
    id2label=multiclass_id2label,
    ignore_mismatched_sizes=True
)
inference_model = PeftModel.from_pretrained(model, repo_name)


# Custom Dataset class with limited data loading
class CustomDataset(Dataset):
    def __init__(self, data_dir, label2id, processor):
        self.data_dir = data_dir
        self.label2id = label2id
        self.processor = processor
        self.image_paths = []
        self.labels = []
        self.load_images_from_folder()

    def load_images_from_folder(self):
        for class_name in os.listdir(self.data_dir):
            label = self.label2id[class_name]
            folder_path = os.path.join(self.data_dir, class_name)
            # Limit the number of images loaded for each class
            for idx, filename in enumerate(os.listdir(folder_path)):
                img_path = os.path.join(folder_path, filename)
                self.image_paths.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        inputs = self.processor(image, return_tensors="pt")
        return inputs["pixel_values"].squeeze(0), label

# Prepare training and testing datasets and dataloaders (with limited data)
train_dataset = CustomDataset(train_multiclass_data_dir, multiclass_label2id, processor)
test_dataset = CustomDataset(test_multiclass_data_dir, multiclass_label2id, processor)

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=16)  # Smaller batch size for testing
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=16)


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([37]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([37, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Training function with timing and logging
def train_model(model, dataloader, optimizer, loss_fn, epochs):
    model.train()
    epoch_losses = []  # To store loss for each epoch
    epoch_times = []  # To store time for each epoch
    for epoch in range(epochs):
        total_loss = 0
        epoch_start_time = time.time()

        for pixel_values, labels in tqdm(dataloader):
            pixel_values = pixel_values.to("cuda")
            labels = labels.to("cuda")

            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            loss = loss_fn(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        epoch_end_time = time.time()
        avg_loss = total_loss / len(dataloader)
        epoch_time = epoch_end_time - epoch_start_time

        epoch_losses.append(avg_loss)
        epoch_times.append(epoch_time)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Time: {epoch_time:.2f} seconds")

    return epoch_losses, epoch_times

# Evaluation function with timing and logging
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []
    total_time = 0

    with torch.no_grad():
        for pixel_values, labels in tqdm(dataloader):
            pixel_values = pixel_values.to("cuda")
            labels = labels.to("cuda")

            start_time = time.time()
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            predicted_class_idx = logits.argmax(dim=-1)
            end_time = time.time()

            total_time += (end_time - start_time)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted_class_idx.cpu().numpy())

    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted", zero_division=1)
    avg_inference_time = total_time / len(dataloader)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Total inference time: {total_time:.2f} seconds")
    print(f"Average inference time per batch: {avg_inference_time:.4f} seconds")

    return accuracy, precision, recall, f1, total_time, avg_inference_time

# Experiment function to run different configurations
def run_experiment(model, lr, epochs):
    # Copy base model and inference model
    model.to("cuda")

    optimizer = AdamW(model.parameters(), lr=lr)
    loss_fn = torch.nn.CrossEntropyLoss()

    print(f"Running experiment with lr={lr}, epochs={epochs}")

    # Train model and record losses and times
    epoch_losses, epoch_times = train_model(model, train_dataloader, optimizer, loss_fn, epochs)

    # Evaluate model
    accuracy, precision, recall, f1, total_time, avg_inference_time = evaluate_model(model, test_dataloader)

    return {
        "learning_rate": lr,
        "epochs": epochs,
        "epoch_losses": epoch_losses,
        "epoch_times": epoch_times,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "total_inference_time": total_time,
        "avg_inference_time_per_batch": avg_inference_time
    }

# Run multiple experiments with different configurations
experiments = [
    {"lr": 5e-5, "epochs": 10},
    {"lr": 1e-5, "epochs": 10},
    {"lr": 3e-5, "epochs": 10},
    {"lr": 5e-5, "epochs": 20},
    {"lr": 1e-5, "epochs": 20},
    {"lr": 3e-5, "epochs": 20}
]

results = []
for experiment in experiments:
    inference_model = PeftModel.from_pretrained(model, repo_name)
    print(f"Running experiment with lr={experiment['lr']}, epochs={experiment['epochs']}")
    result = run_experiment(inference_model, experiment["lr"], experiment["epochs"])
    results.append(result)



Running experiment with lr=5e-05, epochs=10
Running experiment with lr=5e-05, epochs=10


  self.pid = os.fork()
  self.pid = os.fork()
100%|██████████| 370/370 [01:28<00:00,  4.17it/s]


Epoch 1/10, Loss: 13.6932, Time: 88.84 seconds


100%|██████████| 370/370 [01:31<00:00,  4.03it/s]


Epoch 2/10, Loss: 8.5618, Time: 91.72 seconds


100%|██████████| 370/370 [01:24<00:00,  4.37it/s]


Epoch 3/10, Loss: 3.7830, Time: 84.70 seconds


100%|██████████| 370/370 [01:27<00:00,  4.25it/s]


Epoch 4/10, Loss: 1.0009, Time: 87.07 seconds


100%|██████████| 370/370 [01:31<00:00,  4.05it/s]


Epoch 5/10, Loss: 0.3863, Time: 91.40 seconds


100%|██████████| 370/370 [01:25<00:00,  4.32it/s]


Epoch 6/10, Loss: 0.2504, Time: 85.63 seconds


100%|██████████| 370/370 [01:25<00:00,  4.35it/s]


Epoch 7/10, Loss: 0.1920, Time: 85.13 seconds


100%|██████████| 370/370 [01:28<00:00,  4.19it/s]


Epoch 8/10, Loss: 0.1584, Time: 88.38 seconds


100%|██████████| 370/370 [01:25<00:00,  4.30it/s]


Epoch 9/10, Loss: 0.1361, Time: 86.01 seconds


100%|██████████| 370/370 [01:25<00:00,  4.32it/s]


Epoch 10/10, Loss: 0.1200, Time: 85.75 seconds


100%|██████████| 93/93 [00:21<00:00,  4.26it/s]


Accuracy: 0.9675
Precision: 0.9686
Recall: 0.9675
F1-score: 0.9675
Total inference time: 4.48 seconds
Average inference time per batch: 0.0482 seconds




Running experiment with lr=1e-05, epochs=10
Running experiment with lr=1e-05, epochs=10


  self.pid = os.fork()
100%|██████████| 370/370 [01:29<00:00,  4.13it/s]


Epoch 1/10, Loss: 15.7291, Time: 89.58 seconds


100%|██████████| 370/370 [01:24<00:00,  4.37it/s]


Epoch 2/10, Loss: 14.6974, Time: 84.59 seconds


100%|██████████| 370/370 [01:25<00:00,  4.34it/s]


Epoch 3/10, Loss: 13.6670, Time: 85.31 seconds


 14%|█▍        | 52/370 [00:13<01:15,  4.23it/s]

In [29]:
import os
import time
from PIL import Image
import torch
from transformers import ViTImageProcessor, AutoModelForImageClassification
from peft import PeftConfig, PeftModel
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm

# Load Google Drive paths
train_multiclass_data_dir = "./train_multiclass"
multiclass_data_dir = "./test_multiclass"

# Load the processor and model
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
repo_name = 'alanahmet/vit-base-patch16-224-finetuned-lora-oxfordPets'

# Load model configuration and inference model
config = PeftConfig.from_pretrained(repo_name)
model = AutoModelForImageClassification.from_pretrained(
    config.base_model_name_or_path,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)
inference_model = PeftModel.from_pretrained(model, repo_name)

# Load a limited number of images for testing (use smaller batches)
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for idx, filename in enumerate(os.listdir(folder)):
        img_path = os.path.join(folder, filename)
        try:
            image = Image.open(img_path).convert("RGB")
            images.append(image)
            labels.append(label)
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
    return images, labels


# Multiclass Classification (All Classes)
test_multiclass_images = []
test_multiclass_labels = []
classes = os.listdir(multiclass_data_dir)
multiclass_label2id = {c: idx for idx, c in enumerate(classes)}
multiclass_id2label = {idx: c for idx, c in enumerate(classes)}

# Load only 5 images per class for fast testing
for class_name in classes:
    print(f"Loading images for class: {class_name}")
    images, labels = load_images_from_folder(os.path.join(multiclass_data_dir, class_name), multiclass_label2id[class_name])
    test_multiclass_images.extend(images)
    test_multiclass_labels.extend(labels)

# Function to perform inference and calculate metrics
def evaluate_model(images, true_labels, id2label, binary=False):
    y_true = []
    y_pred = []
    start_time = time.time()

    for image, true_label in tqdm(zip(images, true_labels), total=len(images)):
        encoding = processor(image, return_tensors="pt")
        with torch.no_grad():
            outputs = inference_model(**encoding)
            logits = outputs.logits
        predicted_class_idx = logits.argmax(-1).item()
        y_true.append(true_label)
        y_pred.append(predicted_class_idx)

    end_time = time.time()
    avg_inference_time = (end_time - start_time) / len(images)

    # Ensure binary classification is handled correctly
    unique_true_labels = set(y_true)
    unique_pred_labels = set(y_pred)

    if binary and len(unique_true_labels) > 2:
        print(f"Warning: More than two unique classes found in binary classification: {unique_true_labels}. Proceeding with evaluation.")

    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)

    if binary and len(unique_true_labels) <= 2:
        precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=1)
    else:
        precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted", zero_division=1)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Average inference time per image: {avg_inference_time:.4f} seconds")

    return accuracy, precision, recall, f1, avg_inference_time


# Evaluate on Binary Classification (Cats vs Dogs)
# print("\nBinary Classification (Cats vs Dogs):")
# evaluate_model(test_binary_images, test_binary_labels, binary_classes, binary=True)

# Evaluate on Multiclass Classification (All Classes)
print("\nMulticlass Classification (All Classes):")
evaluate_model(test_multiclass_images, test_multiclass_labels, multiclass_id2label, binary=False)




Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([37]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([37, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading images for class: pug
Loading images for class: american_bulldog
Loading images for class: yorkshire_terrier
Loading images for class: great_pyrenees
Loading images for class: shiba_inu
Loading images for class: Persian
Loading images for class: saint_bernard
Loading images for class: beagle
Loading images for class: Maine_Coon
Loading images for class: japanese_chin
Loading images for class: Bengal
Loading images for class: Sphynx
Loading images for class: english_cocker_spaniel
Loading images for class: newfoundland
Loading images for class: chihuahua
Loading images for class: english_setter
Loading images for class: miniature_pinscher
Loading images for class: Russian_Blue
Loading images for class: wheaten_terrier
Loading images for class: havanese
Loading images for class: american_pit_bull_terrier
Loading images for class: British_Shorthair
Loading images for class: Egyptian_Mau
Loading images for class: boxer
Loading images for class: leonberger
Loading images for class: 

100%|██████████| 1479/1479 [19:41<00:00,  1.25it/s]

Accuracy: 0.0264
Precision: 0.0277
Recall: 0.0264
F1-score: 0.9736
Average inference time per image: 0.7988 seconds





(0.02636916835699797,
 0.027688451326703938,
 0.02636916835699797,
 0.9735965915986199,
 0.7987600715681055)