In [13]:
from datasets import load_dataset

#loading dataset via Hugging Face API
ds = load_dataset('ChrisGuarino/cats')

#Data Exploration
train_data = ds['train']
# test_data = ds['test']
validation_data = ds['validation']

Downloading readme: 100%|██████████| 542/542 [00:00<00:00, 1.00MB/s]
Downloading data: 100%|██████████| 477M/477M [00:33<00:00, 14.3MB/s]
Downloading data: 100%|██████████| 55.5M/55.5M [00:03<00:00, 15.4MB/s]
Generating train split: 100%|██████████| 342/342 [00:02<00:00, 120.56 examples/s]
Generating validation split: 100%|██████████| 39/39 [00:00<00:00, 178.38 examples/s]


In [14]:
train_data.features

{'image': Image(decode=True, id=None),
 'labels': ClassLabel(names=['prim', 'rupe', 'notcat'], id=None)}

In [15]:
#Load in the image processor from Hugging Face Hub 
from transformers import ViTImageProcessor
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

In [16]:
def process_example(example):
    inputs = processor(example['image'], return_tensors='pt')
    inputs['labels'] = example['labels']
    return inputs
process_example(ds['train'][0])

{'pixel_values': tensor([[[[-0.5765, -0.5608, -0.5294,  ...,  0.1529,  0.1529,  0.1529],
          [-0.5294, -0.5373, -0.5529,  ...,  0.1529,  0.1529,  0.1529],
          [-0.5216, -0.5216, -0.5216,  ...,  0.1608,  0.1529,  0.1529],
          ...,
          [-0.7569, -0.7725, -0.7961,  ..., -0.8353, -0.8431, -0.8431],
          [-0.7725, -0.7725, -0.7961,  ..., -0.8353, -0.8353, -0.8275],
          [-0.7882, -0.7569, -0.7569,  ..., -0.8353, -0.8275, -0.8353]],

         [[-0.6627, -0.6549, -0.6235,  ...,  0.0431,  0.0431,  0.0431],
          [-0.6314, -0.6235, -0.6471,  ...,  0.0431,  0.0431,  0.0431],
          [-0.6235, -0.6157, -0.6157,  ...,  0.0510,  0.0510,  0.0510],
          ...,
          [-0.7255, -0.7490, -0.7804,  ..., -0.8275, -0.8196, -0.8196],
          [-0.7333, -0.7333, -0.7804,  ..., -0.8275, -0.8275, -0.8118],
          [-0.7490, -0.7255, -0.7412,  ..., -0.8275, -0.8196, -0.8039]],

         [[-0.6157, -0.6000, -0.5765,  ...,  0.2078,  0.2157,  0.2157],
          [-0

In [17]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = processor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

prepared_ds = ds.with_transform(transform)

In [18]:
prepared_ds

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 342
    })
    validation: Dataset({
        features: ['image', 'labels'],
        num_rows: 39
    })
})

In [19]:
import torch
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [20]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy",trust_remote_code=True)
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


In [21]:
from transformers import ViTForImageClassification

labels = ds['train'].features['labels'].names
# labels = {0: 'prim', 1: 'rupe'}  # Replace with your actual label mapping


model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

In [23]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
  output_dir="model",
  per_device_train_batch_size=16,
  evaluation_strategy="epoch",
  num_train_epochs=4,
  fp16=False,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='none',
  load_best_model_at_end=True,
  save_strategy="epoch"
) 

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],  # Make sure you have a validation set
    tokenizer=processor,
)

## Training

In [24]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()



ValueError: mean must have 1 elements if it is an iterable, got 3

In [19]:
!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/chrisguarino/.cache/huggingface/token
Login successful


In [20]:
trainer.push_to_hub("ChrisGuarino/yummy_model")


model.safetensors:   0%|          | 16.4k/343M [00:00<1:53:21, 50.5kB/s]
training_args.bin: 100%|██████████| 4.66k/4.66k [00:00<00:00, 10.7kB/s] 
model.safetensors: 100%|██████████| 343M/343M [00:29<00:00, 11.4MB/s] 
Upload 2 LFS files: 100%|██████████| 2/2 [00:30<00:00, 15.18s/it]


CommitInfo(commit_url='https://huggingface.co/ChrisGuarino/cat_ds/commit/641b55632b5aa79142fccce983ad8c89cadc9cd2', commit_message='ChrisGuarino/yummy_model', commit_description='', oid='641b55632b5aa79142fccce983ad8c89cadc9cd2', pr_url=None, pr_revision=None, pr_num=None)