<a href="https://colab.research.google.com/github/Fuad3004/5-Classes-of-Food-101-Dataset/blob/main/Python_file.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import requests
import zipfile
from pathlib import Path

data_path=Path("data/")
image_path=data_path/"5_Classes_of_food_101"


if image_path.is_dir():
  print(f"file exist...Skipping downloading....")

else:
  print(f"Downloading")
  image_path.mkdir(parents=True, exist_ok=True)

  with open(data_path/"5_Classes_of_food_101.zip","wb") as f:
    request=requests.get("https://github.com/Fuad3004/5-Classes-of-Food-101-Dataset/raw/main/5_Classes_of_Food101_Dataset.zip")
    print("Downloading...")
    f.write(request.content)

  with zipfile.ZipFile(data_path/"5_Classes_of_food_101.zip","r") as zip_ref:
    print("Unzipping...")
    zip_ref.extractall(image_path)

  os.remove(data_path / "5_Classes_of_food_101.zip")

Downloading
Downloading...
Unzipping...


In [2]:
import os 
os.makedirs("python_file")

In [3]:
%%writefile python_file/data_setup.py

import os 
import torch
from torchvision import datasets, transforms 
from torch.utils.data import DataLoader


NUM_WORKERS=os.cpu_count()
def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int=NUM_WORKERS):
  

  # Use ImageFolder to Creare Dataset--->>>

  train_data= datasets.ImageFolder(train_dir, transform=transform)

  test_data= datasets.ImageFolder(test_dir, transform=transform)


  class_names= train_data.classes
  # Turn images into DataLoader

  train_dataloader= DataLoader(
                                dataset=train_data,
                                batch_size= batch_size,
                                shuffle= True,
                                num_workers= NUM_WORKERS,
                                pin_memory=True
  )

  test_dataloader= DataLoader(
                                dataset=train_data,
                                batch_size= batch_size,
                                shuffle= True,
                                num_workers= NUM_WORKERS,
                                pin_memory=True
  )

  return train_dataloader, test_dataloader, class_names

Writing python_file/data_setup.py


In [4]:
%%writefile python_file/tinyvggmodel.py
"""
Contains PyTorch model code to instantiate a TinyVGG model.
"""
import torch
from torch import nn 

class TinyVGG(nn.Module):
  """Creates the TinyVGG architecture.

  Replicates the TinyVGG architecture from the CNN explainer website in PyTorch.
  See the original architecture here: https://poloclub.github.io/cnn-explainer/

  Args:
    input_shape: An integer indicating number of input channels.
    hidden_units: An integer indicating number of hidden units between layers.
    output_shape: An integer indicating number of output units.
  """
  def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
      super().__init__()
      self.conv_block_1 = nn.Sequential(
          nn.Conv2d(in_channels=input_shape, 
                    out_channels=hidden_units, 
                    kernel_size=3, 
                    stride=1, 
                    padding=1),  
          nn.ReLU(),
          nn.Conv2d(in_channels=hidden_units, 
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=1),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2,
                        stride=2)
      )
      self.conv_block_2 = nn.Sequential(
          nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
          nn.ReLU(),
          nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
          nn.ReLU(),
          nn.MaxPool2d(2)
      )
      self.classifier = nn.Sequential(
          nn.Flatten(),
          # Where did this in_features shape come from? 
          # It's because each layer of our network compresses and changes the shape of our inputs data.
          nn.Linear(in_features=hidden_units*16*16,
                    out_features=output_shape)
      )

  def forward(self, x: torch.Tensor):
      x = self.conv_block_1(x)
      #print(x.shape)
      x = self.conv_block_2(x)
      #print(x.shape)
      x = self.classifier(x)
      return x
      # return self.classifier(self.conv_block_2(self.conv_block_1(x))) # <- leverage the benefits of operator fusion

Writing python_file/tinyvggmodel.py


In [5]:
%%writefile python_file/traintest.py
import torch
from tqdm.auto import tqdm
from typing import Dict, List, Tuple


def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:
  
  model.train()

  train_loss, train_acc= 0, 0

  for batch, (X,y) in enumerate(dataloader):

    X, y= X.to(device), y.to(device)

    y_pred =model(X)

    loss= loss_fn(y_pred, y)
    train_loss +=loss.item()

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()
    
    y_pred_class= torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  train_loss =train_loss/ len(dataloader)
  train_acc = train_acc / len(dataloader)
  
  return train_loss, train_acc

def test_step(
              model : torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple [float, float]:

  model.eval()

  test_loss, test_acc = 0, 0

  with torch.inference_mode():
    for batch, (X,y) in enumerate(dataloader):

      X, y = X.to(device), y.to(device)

      test_pred_logits=model(X)

      loss= loss_fn(test_pred_logits,y)
      test_loss += loss.item()

      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)

  return test_loss, test_acc

  #Combine train and test lopp

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
  """Trains and tests a PyTorch model.

  Passes a target PyTorch models through train_step() and test_step()
  functions for a number of epochs, training and testing the model
  in the same epoch loop.

  Calculates, prints and stores evaluation metrics throughout.

  Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]} 
    For example if training for epochs=2: 
                 {train_loss: [2.0616, 1.0537],
                  train_acc: [0.3945, 0.3945],
                  test_loss: [1.2641, 1.5706],
                  test_acc: [0.3400, 0.2973]} 
  """
  # Create empty results dictionary
  results = {"train_loss": [],
      "train_acc": [],
      "test_loss": [],
      "test_acc": []
  }

  # Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
      train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
      test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

      # Print out what's happening
      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
      )

      # Update results dictionary
      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)

  # Return the filled results at the end of the epochs
  return results

Writing python_file/traintest.py


In [9]:
%%writefile python_file/saving.py
import torch

from pathlib import Path


def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  


    # Create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)

  # Create model save path
  assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
  model_save_path = target_dir_path / model_name

  # Save the model state_dict()
  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)
          

Overwriting python_file/saving.py


In [22]:
%%writefile python_file/evaluate.py

import os
import torch

from timeit import default_timer as timer 
from python_file import data_setup, traintest, tinyvggmodel, saving

from torchvision import transforms

# Setup hyperparameters
NUM_EPOCHS = 20
BATCH_SIZE = 32
HIDDEN_UNITS = 10
LEARNING_RATE = 0.0001

# Setup directories
train_dir = "data/5_Classes_of_food_101/train"
test_dir = "data/5_Classes_of_food_101/test"

# Setup target device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Create transforms
data_transform = transforms.Compose([
  transforms.Resize((64, 64)),
  transforms.ToTensor()
])

# Create DataLoaders with help from data_setup.py
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=32,
    num_workers=os.cpu_count()
)

# Create model with help from tinyvggmodel.py
model = tinyvggmodel.TinyVGG(
    input_shape=3,
    hidden_units=HIDDEN_UNITS,
    output_shape=len(class_names)
).to(device)

# Set loss and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)

start_time = timer()

# Start training with help from traintest.py
traintest.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=NUM_EPOCHS,
             device=device)

end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")     

#save the model

saving.save_model(model=model,
               target_dir="models",
               model_name="food101_model_with_tinyvgg.pth")



Overwriting python_file/evaluate.py
