In [1]:
%cd E:\Github\Product_classifier\Notebooks\Food_classifier

E:\Github\Product_classifier\Notebooks\Food_classifier


# Loading our dataset

In [2]:
from datasets import load_dataset

food = load_dataset("food101", split="train[:10000]")

## Previewing the data we've obtained

In [3]:
food

Dataset({
    features: ['image', 'label'],
    num_rows: 10000
})

## Splitting our data into train and test sets

In [4]:
food_splitted = food.train_test_split(0.2)

In [5]:
food_splitted

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 8000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 2000
    })
})

## Identifying the labels in correlation to its ID and vice versa

In [6]:
labels = food_splitted["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

In [7]:
label2id

{'apple_pie': '0',
 'baby_back_ribs': '1',
 'baklava': '2',
 'beef_carpaccio': '3',
 'beef_tartare': '4',
 'beet_salad': '5',
 'beignets': '6',
 'bibimbap': '7',
 'bread_pudding': '8',
 'breakfast_burrito': '9',
 'bruschetta': '10',
 'caesar_salad': '11',
 'cannoli': '12',
 'caprese_salad': '13',
 'carrot_cake': '14',
 'ceviche': '15',
 'cheesecake': '16',
 'cheese_plate': '17',
 'chicken_curry': '18',
 'chicken_quesadilla': '19',
 'chicken_wings': '20',
 'chocolate_cake': '21',
 'chocolate_mousse': '22',
 'churros': '23',
 'clam_chowder': '24',
 'club_sandwich': '25',
 'crab_cakes': '26',
 'creme_brulee': '27',
 'croque_madame': '28',
 'cup_cakes': '29',
 'deviled_eggs': '30',
 'donuts': '31',
 'dumplings': '32',
 'edamame': '33',
 'eggs_benedict': '34',
 'escargots': '35',
 'falafel': '36',
 'filet_mignon': '37',
 'fish_and_chips': '38',
 'foie_gras': '39',
 'french_fries': '40',
 'french_onion_soup': '41',
 'french_toast': '42',
 'fried_calamari': '43',
 'fried_rice': '44',
 'frozen

# Identifying the image processor 

In [8]:
from transformers import AutoImageProcessor

processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k", use_fast=True)

In [9]:
processor.size

size = (processor.size['height'], processor.size['width'])
size

(224, 224)

## Augementing our images with the properties set in our processor

In [10]:
from torchvision.transforms import RandomResizedCrop, Compose, ToTensor, Normalize

transform = Compose([
    RandomResizedCrop(size),
    ToTensor(),
    Normalize(mean=processor.image_mean, std=processor.image_std)
])

### Preprocessing our images and deleting the original image files

In [11]:
def preprocess_images(samples):
    samples['pixel_values'] = [transform(img.convert("RGB")) for img in samples["image"]]
    del samples['image']
    return samples

In [12]:
food_splitted_trans = food_splitted.with_transform(preprocess_images)       # Preprocess images with when calling the dataset

In [13]:
food_splitted_trans['train'][0]

{'label': 53,
 'pixel_values': tensor([[[-0.7804, -0.7569, -0.7020,  ..., -0.3020, -0.3412, -0.3490],
          [-0.7412, -0.7176, -0.6863,  ..., -0.2706, -0.2941, -0.3098],
          [-0.7255, -0.7020, -0.6863,  ..., -0.2471, -0.2471, -0.2627],
          ...,
          [ 0.5451,  0.5451,  0.4980,  ..., -0.4275, -0.4510, -0.4510],
          [ 0.5608,  0.5529,  0.5294,  ..., -0.4353, -0.4431, -0.4275],
          [ 0.5608,  0.5451,  0.5451,  ..., -0.4667, -0.4667, -0.4510]],
 
         [[-0.7647, -0.7804, -0.7647,  ..., -0.4824, -0.5216, -0.5529],
          [-0.7333, -0.7490, -0.7569,  ..., -0.4902, -0.5294, -0.5451],
          [-0.7412, -0.7412, -0.7647,  ..., -0.5059, -0.5216, -0.5373],
          ...,
          [ 0.5686,  0.5686,  0.5216,  ..., -0.5137, -0.5294, -0.5294],
          [ 0.5765,  0.5765,  0.5529,  ..., -0.5137, -0.5137, -0.5059],
          [ 0.5765,  0.5686,  0.5686,  ..., -0.5373, -0.5373, -0.5137]],
 
         [[-0.7412, -0.7490, -0.7333,  ..., -0.6784, -0.7176, -0.7176]

# Importing our model for image classification

In [14]:
from transformers import AutoModelForImageClassification, Trainer, TrainingArguments

model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=len(labels),id2label=id2label,label2id=label2id)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Identifying the data collator (Data Loader)

In [15]:
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

## Identifying the evaulation metrics for the model

In [16]:
import evaluate 
import numpy as np

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

## Identifying the training arguments, number of epochs, batch size, learning rate, and other hyperparameters for training the model.

### Identifying the data trainer using our model, training arguments, data loader, evaulation metrics, our datasets and images processor.

In [17]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    remove_unused_columns=False,
    num_train_epochs=3,              # total number of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    learning_rate=1e-4,
    per_device_eval_batch_size=16,   # batch size for evaluation
    weight_decay=0.01,               # strength of weight decay
    save_strategy='epoch',
    logging_steps=50,
    push_to_hub=False,
    metric_for_best_model='accuracy',
)

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    data_collator=data_collator,
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    train_dataset=food_splitted_trans['train'],         # training dataset
    eval_dataset=food_splitted_trans['test'],            # evaluation dataset
    processing_class = processor
)


# Model Training

In [18]:
trainer.train()

Step,Training Loss
50,3.6391
100,2.319
150,1.5281
200,1.2219
250,0.9043
300,0.7759
350,0.6271
400,0.5813
450,0.5195
500,0.5113


TrainOutput(global_step=1500, training_loss=0.6021390005747477, metrics={'train_runtime': 607.6896, 'train_samples_per_second': 39.494, 'train_steps_per_second': 2.468, 'total_flos': 1.8614579687424e+18, 'train_loss': 0.6021390005747477, 'epoch': 3.0})

In [22]:
from sklearn.metrics import classification_report
import numpy as np

# Get predictions
predictions = trainer.predict(food_splitted_trans['test'])
y_true = predictions.label_ids
y_pred = np.argmax(predictions.predictions, axis=1)

# Get all possible label names from full training set
all_label_names = food_splitted["train"].features["label"].names

# Get only the labels that appear in the current predictions
unique_labels = np.unique(np.concatenate([y_true, y_pred]))

# Print classification report with the correct subset of label names
print("Classification Report:")
print(classification_report(
    y_true,
    y_pred,
    labels=unique_labels,
    target_names=[all_label_names[i] for i in unique_labels]
))


Classification Report:
                precision    recall  f1-score   support

      beignets       0.96      0.95      0.95       156
    bruschetta       0.87      0.86      0.86       168
 chicken_wings       0.98      0.95      0.96       174
chocolate_cake       0.94      0.96      0.95        51
 eggs_benedict       0.92      0.95      0.93       151
   greek_salad       0.91      0.94      0.92       125
     guacamole       0.97      0.96      0.96       158
     hamburger       0.89      0.91      0.90       146
         pizza       0.91      0.93      0.92       166
     pork_chop       0.83      0.83      0.83       144
     prime_rib       0.87      0.87      0.87       133
         ramen       0.95      0.93      0.94       151
       risotto       0.95      0.92      0.93       138
       sashimi       0.95      0.97      0.96       139

      accuracy                           0.92      2000
     macro avg       0.92      0.92      0.92      2000
  weighted avg       0.