In [2]:
from datasets import load_dataset

#loading dataset via Hugging Face API
ds = load_dataset('ChrisGuarino/cats')

#Data Exploration
train_data = ds['train']
# test_data = ds['test']
validation_data = ds['validation']

In [3]:
train_data.features

{'image': Image(decode=True, id=None),
 'labels': ClassLabel(names=['prim', 'rupe'], id=None)}

In [4]:
#Load in the image processor from Hugging Face Hub 
from transformers import ViTImageProcessor
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

In [5]:
def process_example(example):
    inputs = processor(example['image'], return_tensors='pt')
    inputs['labels'] = example['labels']
    return inputs
process_example(ds['train'][0])

{'pixel_values': tensor([[[[-0.9216, -0.8824, -0.7725,  ..., -0.8980, -0.8902, -0.8980],
          [-0.8824, -0.8431, -0.6627,  ..., -0.8824, -0.8980, -0.8980],
          [-0.5765, -0.6078, -0.6000,  ..., -0.8745, -0.8902, -0.8902],
          ...,
          [-0.5373, -0.6000, -0.5373,  ..., -0.5922, -0.6235, -0.6784],
          [-0.5137, -0.5137, -0.5451,  ..., -0.5843, -0.6078, -0.7490],
          [-0.6235, -0.4902, -0.6157,  ..., -0.5608, -0.5922, -0.6863]],

         [[-0.9765, -0.9529, -0.9059,  ..., -0.9294, -0.9216, -0.9216],
          [-0.9608, -0.9373, -0.8902,  ..., -0.9137, -0.9294, -0.9294],
          [-0.8588, -0.8902, -0.8980,  ..., -0.9137, -0.9216, -0.9294],
          ...,
          [-0.7176, -0.7725, -0.7176,  ..., -0.7647, -0.7882, -0.8353],
          [-0.6784, -0.6784, -0.7255,  ..., -0.7647, -0.7882, -0.8745],
          [-0.7882, -0.6549, -0.7804,  ..., -0.7412, -0.7725, -0.8353]],

         [[-1.0000, -0.9922, -0.9373,  ..., -0.9451, -0.9451, -0.9451],
          [-0

In [6]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = processor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

prepared_ds = ds.with_transform(transform)

In [9]:
prepared_ds

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 207
    })
    validation: Dataset({
        features: ['image', 'labels'],
        num_rows: 24
    })
})

In [10]:
import torch
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [11]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy",trust_remote_code=True)
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


  metric = load_metric("accuracy",trust_remote_code=True)


In [12]:
from transformers import ViTForImageClassification

labels = ds['train'].features['labels'].names
# labels = {0: 'prim', 1: 'rupe'}  # Replace with your actual label mapping


model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

  return self.fget.__get__(instance, owner)()
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

In [14]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
  output_dir="cat_ds",
  per_device_train_batch_size=16,
  evaluation_strategy="epoch",
  num_train_epochs=4,
  fp16=False,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='none',
  load_best_model_at_end=True,
  save_strategy="epoch"
) 

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],  # Make sure you have a validation set
    tokenizer=processor,
)

## Training

In [15]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

  0%|          | 0/52 [00:00<?, ?it/s]

 19%|█▉        | 10/52 [01:04<03:31,  5.03s/it]

{'loss': 0.4194, 'learning_rate': 0.00016153846153846155, 'epoch': 0.77}


 25%|██▌       | 13/52 [01:16<02:39,  4.09s/it]
 25%|██▌       | 13/52 [01:21<02:39,  4.09s/it]

{'eval_loss': 0.07365512102842331, 'eval_accuracy': 1.0, 'eval_runtime': 5.5846, 'eval_samples_per_second': 4.298, 'eval_steps_per_second': 0.537, 'epoch': 1.0}


 38%|███▊      | 20/52 [02:03<02:46,  5.21s/it]

{'loss': 0.063, 'learning_rate': 0.0001230769230769231, 'epoch': 1.54}


 50%|█████     | 26/52 [02:27<01:38,  3.79s/it]
 50%|█████     | 26/52 [02:33<01:38,  3.79s/it]

{'eval_loss': 0.026600712910294533, 'eval_accuracy': 1.0, 'eval_runtime': 5.1625, 'eval_samples_per_second': 4.649, 'eval_steps_per_second': 0.581, 'epoch': 2.0}


 58%|█████▊    | 30/52 [02:58<02:05,  5.68s/it]

{'loss': 0.051, 'learning_rate': 8.461538461538461e-05, 'epoch': 2.31}


                                               
 75%|███████▌  | 39/52 [03:42<00:49,  3.77s/it]

{'eval_loss': 0.015066702850162983, 'eval_accuracy': 1.0, 'eval_runtime': 4.9529, 'eval_samples_per_second': 4.846, 'eval_steps_per_second': 0.606, 'epoch': 3.0}


 77%|███████▋  | 40/52 [03:53<01:28,  7.40s/it]

{'loss': 0.0308, 'learning_rate': 4.615384615384616e-05, 'epoch': 3.08}


 96%|█████████▌| 50/52 [04:39<00:09,  4.71s/it]

{'loss': 0.014, 'learning_rate': 7.692307692307694e-06, 'epoch': 3.85}


                                               
100%|██████████| 52/52 [04:50<00:00,  3.76s/it]

{'eval_loss': 0.013692905195057392, 'eval_accuracy': 1.0, 'eval_runtime': 4.8268, 'eval_samples_per_second': 4.972, 'eval_steps_per_second': 0.622, 'epoch': 4.0}


100%|██████████| 52/52 [04:54<00:00,  5.67s/it]


{'train_runtime': 294.988, 'train_samples_per_second': 2.807, 'train_steps_per_second': 0.176, 'train_loss': 0.11166981543199374, 'epoch': 4.0}
***** train metrics *****
  epoch                    =        4.0
  train_loss               =     0.1117
  train_runtime            = 0:04:54.98
  train_samples_per_second =      2.807
  train_steps_per_second   =      0.176


In [19]:
!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/chrisguarino/.cache/huggingface/token
Login successful


In [20]:
trainer.push_to_hub("ChrisGuarino/yummy_model")


model.safetensors:   0%|          | 16.4k/343M [00:00<1:53:21, 50.5kB/s]
training_args.bin: 100%|██████████| 4.66k/4.66k [00:00<00:00, 10.7kB/s] 
model.safetensors: 100%|██████████| 343M/343M [00:29<00:00, 11.4MB/s] 
Upload 2 LFS files: 100%|██████████| 2/2 [00:30<00:00, 15.18s/it]


CommitInfo(commit_url='https://huggingface.co/ChrisGuarino/cat_ds/commit/641b55632b5aa79142fccce983ad8c89cadc9cd2', commit_message='ChrisGuarino/yummy_model', commit_description='', oid='641b55632b5aa79142fccce983ad8c89cadc9cd2', pr_url=None, pr_revision=None, pr_num=None)