### Imports 

In [74]:
# Imports
import torch as pt
from torch import nn

print(f"Torch version: {pt.__version__}")

# if pt.cuda.is_available():
#     device = 'cuda'
# if pt.backends.mps.is_available():
#     device = 'mps'
# else:
#     device= 'cpu'
device = 'cpu'
print(f'device: {device}')

Torch version: 2.0.1
device: cpu


### Downloading a custom dataset

In [75]:
from pathlib import Path
import importLib
from sys import path
import zipfile


# Create directory
data_path = Path(f"{path[0]}/data")
image_path = data_path / 'pizza_steak_sushi'
if image_path.exists():
    print('Already exists')
else:
    image_path.mkdir(parents=True)


# Download pizza, steak and sushi data
# open skapar en zip fil som sedan fylls genom request
importLib.import_from_github('https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip',directory=data_path)
with zipfile.ZipFile(data_path/'pizza_steak_sushi.zip', 'r') as zip_ref:
    print('Unzipping pizza, steak and sushi data')
    zip_ref.extractall(image_path)
Path.unlink(data_path/'pizza_steak_sushi.zip')


Already exists
/Users/gustavgamstedt/Desktop/github to hemma/PyTorch/04/data/pizza_steak_sushi.zip doesn't exist, download
Unzipping pizza, steak and sushi data


In [76]:
# Setup training and testing paths
train_dir = image_path / 'train'
test_dir = image_path / 'test'

train_dir, test_dir

(PosixPath('/Users/gustavgamstedt/Desktop/github to hemma/PyTorch/04/data/pizza_steak_sushi/train'),
 PosixPath('/Users/gustavgamstedt/Desktop/github to hemma/PyTorch/04/data/pizza_steak_sushi/test'))

### Create dataset and dataloaders

In [77]:
from torchvision import transforms
simple_transform = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor()
])

In [78]:
from torch.utils.data import DataLoader
from torchvision import datasets

train_dataset = datasets.ImageFolder(root = train_dir, transform=simple_transform)
test_dataset = datasets.ImageFolder(root = test_dir, transform=simple_transform)

In [79]:
import os
BATCH_SIZE = 32
NUM_WORKERS = round(os.cpu_count()*(3/4))
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=32,
    num_workers=NUM_WORKERS,
    shuffle=True
)

test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=32,
    num_workers=NUM_WORKERS,
    shuffle=False
)

### Create model

In [80]:
from torch import nn

class ModelWithoutAugmentation(nn.Module):
    def __init__(self, input_features:int,output_features:int, hidden_units:int=10):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(input_features, hidden_units,
                      kernel_size=3, stride=1, padding=0),
            nn.ReLU(),


            nn.Conv2d(hidden_units, hidden_units,
                      kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units,
                      kernel_size=3, stride=1, padding=0),
            nn.ReLU(),


            nn.Conv2d(hidden_units, hidden_units,
                      kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(hidden_units*13*13, output_features)
        )
    def forward(self, X:pt.Tensor) -> pt.Tensor:
        X_change = self.conv_block_1(X)
        X_change = self.conv_block_2(X_change)
        # print(X_change.shape)
        X_change = self.classifier(X_change)
        return X_change

In [81]:
pt.manual_seed(42)
model0 = ModelWithoutAugmentation(input_features=3, output_features=len(train_dataset.classes), hidden_units=10).to(device)
model0

ModelWithoutAugmentation(
  (conv_block_1): Sequential(
    (0): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=1690, out_features=3, bias=True)
  )
)

#### Testing model with random data

In [82]:
imgs, labels = next(iter(train_dataloader))
print(imgs.shape, len(labels))
# model0(imgs[0].unsqueeze(0))
model0(imgs.to(device))

torch.Size([32, 3, 64, 64]) 32


tensor([[ 2.0795e-02, -1.9510e-03,  9.5212e-03],
        [ 1.8440e-02,  2.4668e-03,  6.6609e-03],
        [ 1.7695e-02,  1.0282e-03,  9.4975e-03],
        [ 2.4446e-02, -3.3489e-03,  9.5877e-03],
        [ 1.9939e-02,  6.9129e-04,  1.0778e-02],
        [ 2.1281e-02,  2.0434e-03,  5.0047e-03],
        [ 2.0996e-02,  1.6414e-04,  1.2481e-02],
        [ 2.1566e-02, -1.9607e-03,  9.7175e-03],
        [ 2.4500e-02, -4.7904e-03,  8.5394e-03],
        [ 2.0239e-02, -4.7979e-04,  1.0907e-02],
        [ 2.2219e-02, -4.1815e-04,  9.8173e-03],
        [ 2.2318e-02, -2.1642e-03,  9.4428e-03],
        [ 2.1851e-02, -3.7225e-03,  8.3784e-03],
        [ 2.2881e-02, -1.7559e-03,  1.0299e-02],
        [ 2.1635e-02, -4.3995e-03,  9.4989e-03],
        [ 2.2101e-02, -4.1470e-03,  9.3903e-03],
        [ 2.1226e-02, -4.4215e-03,  1.1476e-02],
        [ 2.1698e-02, -2.7458e-03,  8.4966e-03],
        [ 1.9974e-02, -3.2037e-07,  8.4496e-03],
        [ 1.8308e-02,  1.6378e-03,  8.5490e-03],
        [ 2.0768e-02

### Summarize a model

In [83]:
try:
    import torchinfo
except ModuleNotFoundError:
    print('Module not found, installing module')
    !pip3 install torchinfo
    import torchinfo

In [84]:
torchinfo.summary(model0, input_size=[32,3,64,64],device=device)

Layer (type:depth-idx)                   Output Shape              Param #
ModelWithoutAugmentation                 [32, 3]                   --
├─Sequential: 1-1                        [32, 10, 30, 30]          --
│    └─Conv2d: 2-1                       [32, 10, 62, 62]          280
│    └─ReLU: 2-2                         [32, 10, 62, 62]          --
│    └─Conv2d: 2-3                       [32, 10, 60, 60]          910
│    └─ReLU: 2-4                         [32, 10, 60, 60]          --
│    └─MaxPool2d: 2-5                    [32, 10, 30, 30]          --
├─Sequential: 1-2                        [32, 10, 13, 13]          --
│    └─Conv2d: 2-6                       [32, 10, 28, 28]          910
│    └─ReLU: 2-7                         [32, 10, 28, 28]          --
│    └─Conv2d: 2-8                       [32, 10, 26, 26]          910
│    └─ReLU: 2-9                         [32, 10, 26, 26]          --
│    └─MaxPool2d: 2-10                   [32, 10, 13, 13]          --
├─Sequentia

### Create train and test loop functions

In [85]:
from torch.utils.data import DataLoader
def train_step(model: pt.nn.Module, 
               dataloader:DataLoader, 
               loss_fn: pt.nn.Module, 
               optimizer:pt.optim.Optimizer, 
               device:pt.device, 
               show:bool=False):
    """Performs a training step with model trying to learn on data_loader

    args:
        model: the model which will be trained on
        dataloader: A generator like loader for the data
        optimizer: Optimizer which optimizes the code through gradient descend
        loss_fn: function which calculates how far from the right answer each of the predictions were
        accuracy_fn: function which calculates how meny predictions were right
        device: chosen device for the neural network to run on (cpu/gpu/tpu)
        show: if true display the loss and acc in console 
        
    returns:
        (loss, accuracy)"""
    # Put model in training mode
    model.train()
     
    # Setup train loss and train accuracy values
    train_loss, train_acc = 0,0

    # Loop through data loader batches
    for X,y in dataloader:
        # Send data to target device
        X,y = X.to(device), y.to(device)

        y_logits = model(X)
        
        loss = loss_fn(y_logits, y)
        train_loss+=loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_preds = pt.argmax(pt.softmax(y_logits, dim=1), dim=1) # Softmax is actually unnecessary, but can be useful for visualization and also to give completeness
        train_acc += (y_preds == y).sum().item()/len(y_preds)
    train_loss /= len(dataloader)
    train_acc  /= len(dataloader)

    if show:
        print(f'Train loss: {train_loss} | Train acc: {train_acc}')
    return train_loss, train_acc

In [86]:
def test_step(model: pt.nn.Module, 
              dataloader:DataLoader, 
              loss_fn: pt.nn.Module, 
              device:pt.device, 
              show:bool=False):
    """Performs a testing loop step on model going over data_loader.

    args:
        model: the model which will be trained on
        dataloader: A generator like loader for the data
        loss_fn: function which calculates how far from the right answer each of the predictions were
        accuracy_fn: function which calculates how meny predictions were right
        device: chosen device for the neural network to run on (cpu/gpu/tpu)
        show: if true display the loss and acc in console 

    returns:
        (loss, accuracy)"""
    test_acc, test_loss = 0,0
    
    model.eval()
    with pt.inference_mode():
        for X,y in dataloader:
            X,y = X.to(device), y.to(device)
            y_logits = model(X)
            loss = loss_fn(y_logits, y)
            test_loss+=loss.item()

            y_preds = pt.argmax(pt.softmax(y_logits, dim=1), dim=1)
            test_acc += (y_preds== y).sum().item()/len(y_preds)
    test_loss /= len(dataloader)
    test_acc  /= len(dataloader)
    
    print(f'Test loss: {test_loss} | Test acc: {test_acc}') if show else None
    return test_loss, test_acc

In [87]:

from tqdm.notebook import tqdm
def train(epochs:int, model:pt.nn.Module, train_dataloader:DataLoader, test_dataloader:DataLoader, loss_fn, optimizer:pt.optim.Optimizer, device:pt.device,show:bool):
    # Create an empty dictionary to hold results in
    results = {
        "train_loss":[], 
        "train_acc":[], 
        "test_loss":[], 
        "test_acc":[]
        }
    
    for epoch in tqdm(range(epochs)):
        print(f'Epoch: {epoch}') if show else None
        train_loss, train_acc = train_step(
            model=model,
            dataloader=train_dataloader,
            loss_fn=loss_fn,
            optimizer=optimizer,
            device=device,
            show=show)
            
        test_loss, test_acc = test_step(
            model=model, 
            dataloader= test_dataloader, 
            loss_fn= loss_fn, 
            device= device, 
            show=show
            )
        
        # Append our values to results
        results["train_loss"].append(train_loss); results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss); results["test_acc"].append(test_acc)

In [88]:
pt.manual_seed(42)
pt.cuda.manual_seed(42)

# Set number of epochs
NUM_EPOCHS = 5

# Recreate an instance of TinyVGG
model_0 = ModelWithoutAugmentation(3, len(train_dataset.classes), 10).to(device)

loss_fn = nn.CrossEntropyLoss()

optimizer = pt.optim.Adam(params=model_0.parameters(), lr=0.001)

from ml_funcs import Timer
timer = Timer()

# Train model_0
model_0_results = train(
    epochs=5,
    model=model_0,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    device=device,
    show=True)
timer.show_as_print()


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 0


In [59]:
from ml_funcs import Model_operations
Model_operations.eval_model(model_0, test_dataloader, loss_fn, device)

{'model_name': 'ModelWithoutAugmentation',
 'model_loss': 1.0976074934005737,
 'model_acc': 0.2604166666666667}