In [25]:
from datasets import load_dataset

#loading dataset via Hugging Face API
ds = load_dataset('ChrisGuarino/cats')

#Data Exploration
train_data = ds['train']
# test_data = ds['test']
validation_data = ds['validation']


Downloading readme: 100%|██████████| 542/542 [00:00<00:00, 104kB/s]

[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
Downloading data: 100%|██████████| 471M/471M [00:25<00:00, 18.2MB/s]

[A
[A
[A
[A
[A
[A
[A
[A
Downloading data: 100%|██████████| 60.7M/60.7M [00:02<00:00, 22.0MB/s]

[A
Generating train split: 100%|██████████| 342/342 [00:02<00:00, 149.52 examples/s]

[A
Generating validation split: 100%|██████████| 38/38 [00:00<00:00, 162.63 examples/s]


In [26]:
train_data.features

{'image': Image(decode=True, id=None),
 'labels': ClassLabel(names=['prim', 'rupe', 'notcat'], id=None)}

In [27]:
#Load in the image processor from Hugging Face Hub 
from transformers import ViTImageProcessor
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

In [28]:
def process_example(example):
    inputs = processor(example['image'], return_tensors='pt')
    inputs['labels'] = example['labels']
    return inputs
process_example(ds['train'][0])

{'pixel_values': tensor([[[[-0.7569, -0.7490, -0.6549,  ...,  0.4039,  0.3882,  0.3725],
          [-0.7490, -0.7490, -0.6549,  ...,  0.3961,  0.3804,  0.3882],
          [-0.7569, -0.7412, -0.6627,  ...,  0.4275,  0.4118,  0.4196],
          ...,
          [ 0.4980,  0.4980,  0.4902,  ...,  0.7490,  0.7176,  0.6863],
          [ 0.5059,  0.4980,  0.4902,  ...,  0.7804,  0.7255,  0.6706],
          [ 0.5216,  0.5059,  0.4902,  ...,  0.7569,  0.7255,  0.6706]],

         [[-0.7647, -0.7569, -0.6627,  ...,  0.1216,  0.1137,  0.1137],
          [-0.7569, -0.7490, -0.6627,  ...,  0.1137,  0.1059,  0.1137],
          [-0.7647, -0.7490, -0.6706,  ...,  0.1294,  0.1294,  0.1451],
          ...,
          [ 0.4353,  0.4353,  0.4275,  ...,  0.6157,  0.5765,  0.5529],
          [ 0.4353,  0.4353,  0.4275,  ...,  0.6392,  0.5843,  0.5294],
          [ 0.4353,  0.4275,  0.4275,  ...,  0.6157,  0.5765,  0.5373]],

         [[-0.7804, -0.7804, -0.6863,  ..., -0.1922, -0.2000, -0.2000],
          [-0

In [29]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = processor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

prepared_ds = ds.with_transform(transform)

In [30]:
prepared_ds

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 342
    })
    validation: Dataset({
        features: ['image', 'labels'],
        num_rows: 38
    })
})

In [31]:
import torch
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [32]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy",trust_remote_code=True)
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


In [33]:
from transformers import ViTForImageClassification

labels = ds['train'].features['labels'].names
# labels = {0: 'prim', 1: 'rupe'}  # Replace with your actual label mapping


model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [34]:
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

In [35]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
  output_dir="model",
  per_device_train_batch_size=16,
  evaluation_strategy="epoch",
  num_train_epochs=4,
  fp16=False,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='none',
  load_best_model_at_end=True,
  save_strategy="epoch"
) 

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],  # Make sure you have a validation set
    tokenizer=processor,
)

## Training

In [36]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

 15%|█▍        | 13/88 [22:33:26<130:08:16, 6246.62s/it]
 11%|█▏        | 10/88 [04:44<37:33, 28.89s/it]
 11%|█▏        | 10/88 [04:44<37:33, 28.89s/it]

{'loss': 0.7764, 'learning_rate': 0.00017727272727272728, 'epoch': 0.45}


 23%|██▎       | 20/88 [10:22<31:53, 28.14s/it]
 23%|██▎       | 20/88 [10:22<31:53, 28.14s/it]

{'loss': 0.1941, 'learning_rate': 0.00015454545454545454, 'epoch': 0.91}


 25%|██▌       | 22/88 [10:55<23:46, 21.61s/it]
[A
[A
[A
[A
[A

[A[A                                       
                                               
 25%|██▌       | 22/88 [11:19<23:46, 21.61s/it]
[A

{'eval_loss': 0.06949551403522491, 'eval_accuracy': 1.0, 'eval_runtime': 23.3963, 'eval_samples_per_second': 1.624, 'eval_steps_per_second': 0.214, 'epoch': 1.0}


 34%|███▍      | 30/88 [15:38<27:13, 28.17s/it]
 34%|███▍      | 30/88 [15:38<27:13, 28.17s/it]

{'loss': 0.0688, 'learning_rate': 0.0001318181818181818, 'epoch': 1.36}


 45%|████▌     | 40/88 [19:56<20:55, 26.16s/it]
 45%|████▌     | 40/88 [19:56<20:55, 26.16s/it]

{'loss': 0.0528, 'learning_rate': 0.00010909090909090909, 'epoch': 1.82}


 50%|█████     | 44/88 [21:21<15:04, 20.56s/it]
[A
[A
[A
[A
[A

[A[A                                       
                                               
 50%|█████     | 44/88 [21:44<15:04, 20.56s/it]
[A

{'eval_loss': 0.032007306814193726, 'eval_accuracy': 1.0, 'eval_runtime': 23.4714, 'eval_samples_per_second': 1.619, 'eval_steps_per_second': 0.213, 'epoch': 2.0}


 57%|█████▋    | 50/88 [24:56<22:32, 35.59s/it]
 57%|█████▋    | 50/88 [24:56<22:32, 35.59s/it]

{'loss': 0.0323, 'learning_rate': 8.636363636363637e-05, 'epoch': 2.27}


 68%|██████▊   | 60/88 [29:34<11:59, 25.71s/it]
 68%|██████▊   | 60/88 [29:34<11:59, 25.71s/it]

{'loss': 0.0262, 'learning_rate': 6.363636363636364e-05, 'epoch': 2.73}


 75%|███████▌  | 66/88 [31:58<07:39, 20.89s/it]
[A
[A
[A
[A
[A

[A[A                                       
                                               
 75%|███████▌  | 66/88 [32:22<07:39, 20.89s/it]
[A

{'eval_loss': 0.02552206628024578, 'eval_accuracy': 1.0, 'eval_runtime': 23.2412, 'eval_samples_per_second': 1.635, 'eval_steps_per_second': 0.215, 'epoch': 3.0}


 80%|███████▉  | 70/88 [34:30<09:28, 31.56s/it]
 80%|███████▉  | 70/88 [34:30<09:28, 31.56s/it]

{'loss': 0.0246, 'learning_rate': 4.0909090909090915e-05, 'epoch': 3.18}


 91%|█████████ | 80/88 [38:56<03:36, 27.05s/it]
 91%|█████████ | 80/88 [38:56<03:36, 27.05s/it]

{'loss': 0.0224, 'learning_rate': 1.8181818181818182e-05, 'epoch': 3.64}


100%|██████████| 88/88 [42:07<00:00, 20.89s/it]
[A
[A
[A
[A
[A

[A[A                                       
                                                
100%|██████████| 88/88 [42:30<00:00, 20.89s/it]
[A

{'eval_loss': 0.02296990528702736, 'eval_accuracy': 1.0, 'eval_runtime': 23.4419, 'eval_samples_per_second': 1.621, 'eval_steps_per_second': 0.213, 'epoch': 4.0}



100%|██████████| 88/88 [42:33<00:00, 29.02s/it] 


{'train_runtime': 2553.5531, 'train_samples_per_second': 0.536, 'train_steps_per_second': 0.034, 'train_loss': 0.1380413117056543, 'epoch': 4.0}
***** train metrics *****
  epoch                    =        4.0
  train_loss               =      0.138
  train_runtime            = 0:42:33.55
  train_samples_per_second =      0.536
  train_steps_per_second   =      0.034


In [37]:
!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/christopherguarino/.cache/huggingface/token
Login successful


In [38]:
trainer.push_to_hub("ChrisGuarino/yummy_model")

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]
[A

[A[A

[A[A
training_args.bin: 100%|██████████| 4.66k/4.66k [00:00<00:00, 15.4kB/s]

[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
model.safetensors: 100%|██████████| 343M/343M [00:13<00:00, 26.0MB/s]
Upload 2 LFS files: 100%|██████████| 2/2 [00:13<00:00,  6.82s/it]


CommitInfo(commit_url='https://huggingface.co/ChrisGuarino/model/commit/069c1515e52554abfbce5f47df49b532c186566f', commit_message='ChrisGuarino/yummy_model', commit_description='', oid='069c1515e52554abfbce5f47df49b532c186566f', pr_url=None, pr_revision=None, pr_num=None)