In [3]:
from datasets import load_dataset

#loading dataset via Hugging Face API
ds = load_dataset('ChrisGuarino/cats')

#Data Exploration
train_data = ds['train']
# test_data = ds['test']
validation_data = ds['validation']

In [4]:
train_data.features

{'image': Image(decode=True, id=None),
 'labels': ClassLabel(names=['prim', 'rupe'], id=None)}

In [5]:
#Load in the image processor from Hugging Face Hub 
from transformers import ViTImageProcessor
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

In [6]:
def process_example(example):
    inputs = processor(example['image'], return_tensors='pt')
    inputs['labels'] = example['labels']
    return inputs
process_example(ds['train'][0])

{'pixel_values': tensor([[[[-0.9216, -0.8824, -0.7725,  ..., -0.8980, -0.8902, -0.8980],
          [-0.8824, -0.8431, -0.6627,  ..., -0.8824, -0.8980, -0.8980],
          [-0.5765, -0.6078, -0.6000,  ..., -0.8745, -0.8902, -0.8902],
          ...,
          [-0.5373, -0.6000, -0.5373,  ..., -0.5922, -0.6235, -0.6784],
          [-0.5137, -0.5137, -0.5451,  ..., -0.5843, -0.6078, -0.7490],
          [-0.6235, -0.4902, -0.6157,  ..., -0.5608, -0.5922, -0.6863]],

         [[-0.9765, -0.9529, -0.9059,  ..., -0.9294, -0.9216, -0.9216],
          [-0.9608, -0.9373, -0.8902,  ..., -0.9137, -0.9294, -0.9294],
          [-0.8588, -0.8902, -0.8980,  ..., -0.9137, -0.9216, -0.9294],
          ...,
          [-0.7176, -0.7725, -0.7176,  ..., -0.7647, -0.7882, -0.8353],
          [-0.6784, -0.6784, -0.7255,  ..., -0.7647, -0.7882, -0.8745],
          [-0.7882, -0.6549, -0.7804,  ..., -0.7412, -0.7725, -0.8353]],

         [[-1.0000, -0.9922, -0.9373,  ..., -0.9451, -0.9451, -0.9451],
          [-0

In [7]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = processor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

prepared_ds = ds.with_transform(transform)

In [8]:
prepared_ds

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 207
    })
    validation: Dataset({
        features: ['image', 'labels'],
        num_rows: 24
    })
})

In [9]:
import torch
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [10]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy",trust_remote_code=True)
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


  metric = load_metric("accuracy",trust_remote_code=True)


In [11]:
from transformers import ViTForImageClassification

labels = ds['train'].features['labels'].names
# labels = {0: 'prim', 1: 'rupe'}  # Replace with your actual label mapping


model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

  return self.fget.__get__(instance, owner)()
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

In [13]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
  output_dir="cat_ds",
  per_device_train_batch_size=16,
  evaluation_strategy="epoch",
  num_train_epochs=4,
  fp16=False,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='none',
  load_best_model_at_end=True,
  save_strategy="epoch"
) 

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],  # Make sure you have a validation set
    tokenizer=processor,
)

## Training

In [14]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

 19%|█▉        | 10/52 [02:10<08:42, 12.43s/it]

{'loss': 0.5106, 'learning_rate': 0.00016153846153846155, 'epoch': 0.77}


 25%|██▌       | 13/52 [02:45<07:18, 11.25s/it]
 25%|██▌       | 13/52 [02:55<07:18, 11.25s/it]

{'eval_loss': 0.09747540950775146, 'eval_accuracy': 1.0, 'eval_runtime': 10.6319, 'eval_samples_per_second': 2.257, 'eval_steps_per_second': 0.282, 'epoch': 1.0}


 38%|███▊      | 20/52 [04:29<06:48, 12.78s/it]

{'loss': 0.0781, 'learning_rate': 0.0001230769230769231, 'epoch': 1.54}


 50%|█████     | 26/52 [05:37<04:39, 10.75s/it]
 50%|█████     | 26/52 [05:47<04:39, 10.75s/it]

{'eval_loss': 0.02915300987660885, 'eval_accuracy': 1.0, 'eval_runtime': 10.2096, 'eval_samples_per_second': 2.351, 'eval_steps_per_second': 0.294, 'epoch': 2.0}


 58%|█████▊    | 30/52 [06:44<05:05, 13.88s/it]

{'loss': 0.035, 'learning_rate': 8.461538461538461e-05, 'epoch': 2.31}


 75%|███████▌  | 39/52 [08:29<02:24, 11.12s/it]
 75%|███████▌  | 39/52 [08:40<02:24, 11.12s/it]

{'eval_loss': 0.015924831852316856, 'eval_accuracy': 1.0, 'eval_runtime': 10.3279, 'eval_samples_per_second': 2.324, 'eval_steps_per_second': 0.29, 'epoch': 3.0}


 77%|███████▋  | 40/52 [08:57<03:14, 16.19s/it]

{'loss': 0.0171, 'learning_rate': 4.615384615384616e-05, 'epoch': 3.08}


 96%|█████████▌| 50/52 [11:01<00:24, 12.28s/it]

{'loss': 0.0129, 'learning_rate': 7.692307692307694e-06, 'epoch': 3.85}


                                               
100%|██████████| 52/52 [11:31<00:00, 10.74s/it]

{'eval_loss': 0.013783477246761322, 'eval_accuracy': 1.0, 'eval_runtime': 10.0744, 'eval_samples_per_second': 2.382, 'eval_steps_per_second': 0.298, 'epoch': 4.0}


100%|██████████| 52/52 [11:33<00:00, 13.33s/it]


{'train_runtime': 693.3707, 'train_samples_per_second': 1.194, 'train_steps_per_second': 0.075, 'train_loss': 0.12617188778061134, 'epoch': 4.0}
***** train metrics *****
  epoch                    =        4.0
  train_loss               =     0.1262
  train_runtime            = 0:11:33.37
  train_samples_per_second =      1.194
  train_steps_per_second   =      0.075


In [16]:
trainer.push_to_hub("ChrisGuarino/cats")

CommitInfo(commit_url='https://huggingface.co/ChrisGuarino/cat_ds/commit/e01cac9b9a43cbcda3d5fbc6147ef2a2deacec83', commit_message='ChrisGuarino/cats', commit_description='', oid='e01cac9b9a43cbcda3d5fbc6147ef2a2deacec83', pr_url=None, pr_revision=None, pr_num=None)