In [2]:
from datasets import load_dataset

#loading dataset via Hugging Face API
ds = load_dataset('ChrisGuarino/cats')

#Data Exploration
train_data = ds['train']
# test_data = ds['test']
validation_data = ds['validation']

In [3]:
train_data.features

{'image': Image(decode=True, id=None),
 'labels': ClassLabel(names=['prim', 'rupe', 'notcat'], id=None)}

In [4]:
#Load in the image processor from Hugging Face Hub 
from transformers import ViTImageProcessor
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

In [5]:
def process_example(example):
    inputs = processor(example['image'], return_tensors='pt')
    inputs['labels'] = example['labels']
    return inputs
process_example(ds['train'][0])

{'pixel_values': tensor([[[[-0.7569, -0.7490, -0.6549,  ...,  0.4039,  0.3882,  0.3725],
          [-0.7490, -0.7490, -0.6549,  ...,  0.3961,  0.3804,  0.3882],
          [-0.7569, -0.7412, -0.6627,  ...,  0.4275,  0.4118,  0.4196],
          ...,
          [ 0.4980,  0.4980,  0.4902,  ...,  0.7490,  0.7176,  0.6863],
          [ 0.5059,  0.4980,  0.4902,  ...,  0.7804,  0.7255,  0.6706],
          [ 0.5216,  0.5059,  0.4902,  ...,  0.7569,  0.7255,  0.6706]],

         [[-0.7647, -0.7569, -0.6627,  ...,  0.1216,  0.1137,  0.1137],
          [-0.7569, -0.7490, -0.6627,  ...,  0.1137,  0.1059,  0.1137],
          [-0.7647, -0.7490, -0.6706,  ...,  0.1294,  0.1294,  0.1451],
          ...,
          [ 0.4353,  0.4353,  0.4275,  ...,  0.6157,  0.5765,  0.5529],
          [ 0.4353,  0.4353,  0.4275,  ...,  0.6392,  0.5843,  0.5294],
          [ 0.4353,  0.4275,  0.4275,  ...,  0.6157,  0.5765,  0.5373]],

         [[-0.7804, -0.7804, -0.6863,  ..., -0.1922, -0.2000, -0.2000],
          [-0

In [6]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = processor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

prepared_ds = ds.with_transform(transform)

In [7]:
prepared_ds

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 342
    })
    validation: Dataset({
        features: ['image', 'labels'],
        num_rows: 38
    })
})

In [8]:
import torch
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [9]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy",trust_remote_code=True)
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


  metric = load_metric("accuracy",trust_remote_code=True)


In [10]:
from transformers import ViTForImageClassification

labels = ds['train'].features['labels'].names
# labels = {0: 'prim', 1: 'rupe'}  # Replace with your actual label mapping


model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

  return self.fget.__get__(instance, owner)()
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

In [12]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
  output_dir="model",
  per_device_train_batch_size=16,
  evaluation_strategy="epoch",
  num_train_epochs=4,
  fp16=False,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='none',
  load_best_model_at_end=True,
  save_strategy="epoch"
) 

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],  # Make sure you have a validation set
    tokenizer=processor,
)

## Training

In [13]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

 11%|█▏        | 10/88 [00:35<04:32,  3.49s/it]

{'loss': 0.8023, 'learning_rate': 0.00017727272727272728, 'epoch': 0.45}


 23%|██▎       | 20/88 [01:07<03:34,  3.16s/it]

{'loss': 0.241, 'learning_rate': 0.00015454545454545454, 'epoch': 0.91}


                                               
 25%|██▌       | 22/88 [01:17<02:56,  2.67s/it]

{'eval_loss': 0.08480961620807648, 'eval_accuracy': 1.0, 'eval_runtime': 5.5068, 'eval_samples_per_second': 6.901, 'eval_steps_per_second': 0.908, 'epoch': 1.0}


 34%|███▍      | 30/88 [01:47<03:15,  3.37s/it]

{'loss': 0.0851, 'learning_rate': 0.0001318181818181818, 'epoch': 1.36}


 45%|████▌     | 40/88 [02:20<02:39,  3.32s/it]

{'loss': 0.0447, 'learning_rate': 0.00010909090909090909, 'epoch': 1.82}


                                               
 50%|█████     | 44/88 [02:35<01:45,  2.40s/it]

{'eval_loss': 0.034477006644010544, 'eval_accuracy': 1.0, 'eval_runtime': 5.2309, 'eval_samples_per_second': 7.265, 'eval_steps_per_second': 0.956, 'epoch': 2.0}


 57%|█████▋    | 50/88 [02:58<02:09,  3.41s/it]

{'loss': 0.0718, 'learning_rate': 8.636363636363637e-05, 'epoch': 2.27}


 68%|██████▊   | 60/88 [03:33<01:31,  3.27s/it]

{'loss': 0.0423, 'learning_rate': 6.363636363636364e-05, 'epoch': 2.73}


                                               
 75%|███████▌  | 66/88 [03:54<00:50,  2.28s/it]

{'eval_loss': 0.026531875133514404, 'eval_accuracy': 1.0, 'eval_runtime': 4.7654, 'eval_samples_per_second': 7.974, 'eval_steps_per_second': 1.049, 'epoch': 3.0}


 80%|███████▉  | 70/88 [04:11<01:09,  3.84s/it]

{'loss': 0.0262, 'learning_rate': 4.0909090909090915e-05, 'epoch': 3.18}


 91%|█████████ | 80/88 [04:41<00:24,  3.01s/it]

{'loss': 0.0236, 'learning_rate': 1.8181818181818182e-05, 'epoch': 3.64}


                                               
100%|██████████| 88/88 [05:09<00:00,  2.34s/it]

{'eval_loss': 0.02446766011416912, 'eval_accuracy': 1.0, 'eval_runtime': 4.7811, 'eval_samples_per_second': 7.948, 'eval_steps_per_second': 1.046, 'epoch': 4.0}


100%|██████████| 88/88 [05:12<00:00,  3.56s/it]


{'train_runtime': 313.815, 'train_samples_per_second': 4.359, 'train_steps_per_second': 0.28, 'train_loss': 0.15409146113829178, 'epoch': 4.0}
***** train metrics *****
  epoch                    =        4.0
  train_loss               =     0.1541
  train_runtime            = 0:05:13.81
  train_samples_per_second =      4.359
  train_steps_per_second   =       0.28


In [14]:
!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/chrisguarino/.cache/huggingface/token
Login successful


In [15]:
trainer.push_to_hub("ChrisGuarino/yummy_model")

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]
training_args.bin: 100%|██████████| 4.66k/4.66k [00:00<00:00, 28.2kB/s]
model.safetensors: 100%|██████████| 343M/343M [00:23<00:00, 14.7MB/s]
Upload 2 LFS files: 100%|██████████| 2/2 [00:23<00:00, 11.77s/it]


CommitInfo(commit_url='https://huggingface.co/ChrisGuarino/model/commit/4b8e96b1112b41d04be185d2ee516371892a9062', commit_message='ChrisGuarino/yummy_model', commit_description='', oid='4b8e96b1112b41d04be185d2ee516371892a9062', pr_url=None, pr_revision=None, pr_num=None)