In [None]:
!pip install evaluate accelerate -q
!pip install scikit-learn
!pip install transformers datasets torchvision

In [3]:
import torch
import numpy as np
import evaluate
from datasets import load_from_disk
from transformers import (
    AutoFeatureExtractor, 
    AutoModelForImageClassification, 
    TrainingArguments, 
    Trainer
)

DATA_PATH = "processed_bird_data" 
MODEL_NAME = "google/mobilenet_v2_1.0_224"
OUTPUT_DIR = "baseline_model_checkpoints"

In [None]:
print(f"Loading pre-processed data instantly from {DATA_PATH}...")
try:
    # Loads the DatasetDict with 'train' and 'validation' keys
    dataset = load_from_disk(DATA_PATH)
except FileNotFoundError:
    print(f"Data folder '{DATA_PATH}' not found. Do preprocessing first.")
    raise

feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)

def transform(batch):

    inputs = feature_extractor([x for x in batch["image"]], return_tensors="pt")
    inputs["label"] = batch["label"]
    return inputs

# Attach the transformation logic to the dataset
dataset = dataset.with_transform(transform)

Loading pre-processed data instantly from processed_bird_data...


In [None]:

# Baseline model, which uses weights pre-trained
print("Initializing Baseline model.")
model = AutoModelForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=200,
    ignore_mismatched_sizes=True
)
    
# Metrics:
accuracy = evaluate.load("accuracy")

def compute_metrics(p):
    # Converts the model's output (logits/probabilities) into a single predicted class
    preds = np.argmax(p.predictions, axis=1)
    return accuracy.compute(predictions=preds, references=p.label_ids)

device = "cpu"
if torch.backends.mps.is_available():
    device = "mps"
    print("Activating GPU acceleration.")
model.to(device)

# Training setup:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=16,
    num_train_epochs=5,
    learning_rate=2e-4,
    save_strategy="epoch",
    eval_strategy="epoch",
    logging_steps=10,
    dataloader_num_workers=0,
    remove_unused_columns=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_metrics,
    tokenizer=feature_extractor,
)

# Start training:
print("Starting finetuning process.")
trainer.train()

# Save the final model:
trainer.save_model("new_baseline_model")
print("Baseline training complete.")

Initializing Baseline model.


model.safetensors:   0%|          | 0.00/14.2M [00:00<?, ?B/s]

Some weights of MobileNetV2ForImageClassification were not initialized from the model checkpoint at google/mobilenet_v2_1.0_224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1001]) in the checkpoint and torch.Size([200]) in the model instantiated
- classifier.weight: found shape torch.Size([1001, 1280]) in the checkpoint and torch.Size([200, 1280]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading builder script: 0.00B [00:00, ?B/s]

Activating GPU acceleration.
Starting finetuning process.


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,3.264,3.274218,0.247878
2,1.9267,2.332361,0.427844
3,1.1117,2.248989,0.448217
4,0.7188,1.94926,0.514431
5,0.5224,1.743432,0.584041




Baseline training complete.
