In [15]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision
from torch import nn
from torchvision import transforms
from torchinfo import summary

from GoingModular import data_setup, engine

device = "cuda" if torch.cuda.is_available() else "cpu"


In [16]:
# EfficientNet_B0_Weights 

weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT 
# weights
auto_transforms=weights.transforms()
train_dataloader,test_dataloader,class_names=data_setup.create_dataloaders(train_dir=r'../data/pizza_steak_sushi/test',
                                                                           test_dir= r'../data/pizza_steak_sushi/train',
                                                                           transform=auto_transforms,
                                                                           batch_size=32)
train_dataloader,test_dataloader,class_names

model=torchvision.models.efficientnet_b0(weights=weights).to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to C:\Users\Jakub Machura/.cache\torch\hub\checkpoints\efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 104MB/s] 


In [19]:
model.state_dict
summary(model=model,
        input_size=(32,3,224,224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   True
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 16, 112

We can freeze all of the layers/parameters in the features section by setting the attribute requires_grad=False
For parameters with requires_grad=False, PyTorch doesn't track gradient updates and in turn, these parameters won't be changed by our optimizer during training.

In essence, a parameter with requires_grad=False is "untrainable" or "frozen" in place.

In [23]:
for param in model.features.parameters():
    param.requires_grad = False

output_shape=len(class_names)

model.classifier=torch.nn.Sequential(
    torch.nn.Dropout(p=0.2,inplace=True),
    torch.nn.Linear(in_features=1280,
                    out_features=output_shape,
                    bias=True
                    )
)
# # Do a summary *after* freezing the features and changing the output classifier layer (uncomment for actual output)
summary(model, 
        input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape" (batch_size, color_channels, height, width)
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])


Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [27]:
loss_fn=nn.CrossEntropyLoss()
optim=torch.optim.Adam(model.parameters(),lr=0.01)


from timeit import default_timer as Timer

star_time=Timer()

results=engine.train(model=model,
                     train_dataloader=train_dataloader,
                     test_dataloader=test_dataloader,
                     optimizer=optim,
                     loss_fn=loss_fn,
                     epochs=10,
                     device=device)
end_time=Timer()

print(f"[INFO] Total training time: {end_time-star_time:.3f} seconds")


 10%|█         | 1/10 [00:05<00:52,  5.79s/it]

Epoch: 1 | train_loss: 0.3532 | train_acc: 0.8239 | test_loss: 0.3279 | test_acc: 0.8867


 20%|██        | 2/10 [00:11<00:46,  5.82s/it]

Epoch: 2 | train_loss: 0.0700 | train_acc: 0.9896 | test_loss: 0.5646 | test_acc: 0.7305


 30%|███       | 3/10 [00:17<00:42,  6.07s/it]

Epoch: 3 | train_loss: 0.0925 | train_acc: 0.9688 | test_loss: 0.4976 | test_acc: 0.7461


 40%|████      | 4/10 [00:23<00:36,  6.03s/it]

Epoch: 4 | train_loss: 0.0484 | train_acc: 0.9896 | test_loss: 0.4078 | test_acc: 0.7695


 50%|█████     | 5/10 [00:29<00:29,  5.83s/it]

Epoch: 5 | train_loss: 0.0193 | train_acc: 1.0000 | test_loss: 0.4116 | test_acc: 0.8828


 60%|██████    | 6/10 [00:34<00:22,  5.60s/it]

Epoch: 6 | train_loss: 0.0846 | train_acc: 0.9896 | test_loss: 0.4270 | test_acc: 0.8711


 70%|███████   | 7/10 [00:40<00:17,  5.69s/it]

Epoch: 7 | train_loss: 0.0111 | train_acc: 1.0000 | test_loss: 0.4082 | test_acc: 0.8711


 80%|████████  | 8/10 [00:46<00:11,  5.77s/it]

Epoch: 8 | train_loss: 0.0283 | train_acc: 1.0000 | test_loss: 0.4129 | test_acc: 0.8711


 90%|█████████ | 9/10 [00:51<00:05,  5.62s/it]

Epoch: 9 | train_loss: 0.1046 | train_acc: 0.9697 | test_loss: 0.4277 | test_acc: 0.8750


100%|██████████| 10/10 [00:57<00:00,  5.75s/it]

Epoch: 10 | train_loss: 0.0106 | train_acc: 1.0000 | test_loss: 0.4363 | test_acc: 0.8555
[INFO] Total training time: 57.525 seconds





In [30]:
from torch.utils.tensorboard import SummaryWriter

writer=SummaryWriter()


modify standart train function used before to use SummaryWtiretr

In [38]:
from typing import Dict, List
from tqdm.auto import tqdm

from GoingModular.engine import train_step, test_step

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:

    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer,
                                           device=device)
        test_loss, test_acc = test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)
        
        
        
        ### New: Experiment tracking ###
        # Add loss results to SummaryWriter

        writer.add_scalars(main_tag="Loss",
        tag_scalar_dict={"train_loss":train_loss,
        "test_loss":test_loss},
        global_step=epoch
        )
        writer.add_scalars(main_tag="Accuracy",
        tag_scalar_dict={"train_acc":train_acc,
        "test_acc":test_acc},
        global_step=epoch
        )

        writer.add_graph(model=model,
                        # Pass in an example input
                        input_to_model=torch.randn(32,3,224,224).to(device)
        )
        
    writer.close()

    return results


In [40]:
# Note: Not using engine.train() since the original script isn't updated to use writer

results = train(model=model,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optim,
                loss_fn=loss_fn,
                epochs=5,
                device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.0139 | train_acc: 1.0000 | test_loss: 0.4932 | test_acc: 0.7461


 20%|██        | 1/5 [00:09<00:38,  9.64s/it]

Epoch: 2 | train_loss: 0.0883 | train_acc: 0.9697 | test_loss: 0.4444 | test_acc: 0.8828


 40%|████      | 2/5 [00:19<00:28,  9.58s/it]

Epoch: 3 | train_loss: 0.0093 | train_acc: 1.0000 | test_loss: 0.4521 | test_acc: 0.8438


 60%|██████    | 3/5 [00:28<00:19,  9.69s/it]

Epoch: 4 | train_loss: 0.0104 | train_acc: 1.0000 | test_loss: 0.5085 | test_acc: 0.8047


 80%|████████  | 4/5 [00:39<00:09,  9.91s/it]

Epoch: 5 | train_loss: 0.0525 | train_acc: 0.9792 | test_loss: 0.4914 | test_acc: 0.8203


100%|██████████| 5/5 [00:49<00:00,  9.97s/it]


In [43]:
%load_ext tensorboard
%tensorboard --logdir runs