In [1]:
data_dir = r"C:\Users\Nasra\OneDrive\سطح المكتب\project AI\Ants-beesClassifier\Data\raw"



In practice, very few people train an entire Convolutional Network from scratch (with random initialization), because it is relatively rare to have a dataset of sufficient size. Instead, it is common to pretrain a ConvNet on a very large dataset (e.g. ImageNet, which contains 1.2 million images with 1000 categories), and then use the ConvNet either as an initialization or a fixed feature extractor for the task of interest.



In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory

cudnn.benchmark = True
plt.ion()   # interactive mode

<contextlib.ExitStack at 0x14dc5e009b0>

## Load Data

We will use torchvision and torch.utils.data packages for loading the
data.

The problem we're going to solve today is to train a model to classify
**ants** and **bees**. We have about 120 training images each for ants and bees.
There are 75 validation images for each class. Usually, this is a very
small dataset to generalize upon, if trained from scratch. Since we
are using transfer learning, we should be able to generalize reasonably
well.

This dataset is a very small subset of imagenet.

.. Note ::
   Download the data from
   [here](https://download.pytorch.org/tutorial/hymenoptera_data.zip)
   and extract it to the current directory.



In [4]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}



 ImageFolder is a generic data loader where the images are arranged in this way:

root/dog/xxx.png

root/dog/xxy.png

root/dog/xxz.png

root/cat/123.png

root/cat/nsdf3.png

root/cat/asd932_.png

In [5]:
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}

In [6]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

In [7]:
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
dataset_sizes

{'train': 243, 'val': 153}

In [8]:
dataset_sizes["train"]

243

In [9]:
class_names = image_datasets['train'].classes
class_names

['ants', 'bees']

In [10]:
models.list_models()

['alexnet',
 'convnext_base',
 'convnext_large',
 'convnext_small',
 'convnext_tiny',
 'deeplabv3_mobilenet_v3_large',
 'deeplabv3_resnet101',
 'deeplabv3_resnet50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b2',
 'efficientnet_b3',
 'efficientnet_b4',
 'efficientnet_b5',
 'efficientnet_b6',
 'efficientnet_b7',
 'efficientnet_v2_l',
 'efficientnet_v2_m',
 'efficientnet_v2_s',
 'fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcn_resnet101',
 'fcn_resnet50',
 'fcos_resnet50_fpn',
 'googlenet',
 'inception_v3',
 'keypointrcnn_resnet50_fpn',
 'lraspp_mobilenet_v3_large',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'maxvit_t',
 'mc3_18',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet_v2',
 'mobilenet_v3_large',
 'mobilenet_v3_small',
 'mvit_v1_b',
 'mvit_v2_s',
 'quantized_googlenet',
 '

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [12]:
device

device(type='cpu')

## Finetuning the ConvNet

Load a pretrained model and reset final fully connected layer.




In [13]:
models.list_models()

['alexnet',
 'convnext_base',
 'convnext_large',
 'convnext_small',
 'convnext_tiny',
 'deeplabv3_mobilenet_v3_large',
 'deeplabv3_resnet101',
 'deeplabv3_resnet50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b2',
 'efficientnet_b3',
 'efficientnet_b4',
 'efficientnet_b5',
 'efficientnet_b6',
 'efficientnet_b7',
 'efficientnet_v2_l',
 'efficientnet_v2_m',
 'efficientnet_v2_s',
 'fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcn_resnet101',
 'fcn_resnet50',
 'fcos_resnet50_fpn',
 'googlenet',
 'inception_v3',
 'keypointrcnn_resnet50_fpn',
 'lraspp_mobilenet_v3_large',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'maxvit_t',
 'mc3_18',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet_v2',
 'mobilenet_v3_large',
 'mobilenet_v3_small',
 'mvit_v1_b',
 'mvit_v2_s',
 'quantized_googlenet',
 '

In [14]:
model_ft = models.resnet18(weights='IMAGENET1K_V1')

In [15]:
model_ft.modules

<bound method Module.modules of ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   

In [16]:
num_ftrs = model_ft.fc.in_features
num_ftrs

512

In [17]:
# Here the size of each output sample is set to 2.
model_ft.fc = nn.Linear(num_ftrs, 2)

# Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))
model_ft.fc = nn.Linear(num_ftrs, len(class_names))

len(class_names)

2

In [18]:
model_ft = model_ft.to(device)

setting up the loss function, optimizer, and learning rate scheduler for a PyTorch model

This line is defining the loss function for the model. CrossEntropyLoss is commonly used for classification tasks. It combines LogSoftMax and Negative Log Likelihood loss in one single class. It's useful when training a classification problem with multiple classes

In [19]:
criterion = nn.CrossEntropyLoss()

This line is defining the optimizer for the model. Stochastic Gradient Descent (SGD) is a popular optimization algorithm in deep learning. It updates the model parameters iteratively in the direction that minimally reduces the loss function. The learning rate (lr) is set to 0.001, and momentum is set to 0.9. Momentum is a term added to the weight update that helps accelerate SGD in the relevant direction and dampens oscillations

In [20]:
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

This line is defining the learning rate scheduler for the model. A learning rate scheduler adjusts the learning rate according to a schedule. In this case, a StepLR scheduler is used, which decays the learning rate by a factor of gamma every step_size epochs. Here, step_size is set to 7, and gamma is set to 0.1. This means that the learning rate will be multiplied by 0.1 every 7 epochs

In [21]:
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

## Training the model

Now, let's write a general function to train a model. Here, we will
illustrate:

-  Scheduling the learning rate
-  Saving the best model

In the following, parameter ``scheduler`` is an LR scheduler object from
``torch.optim.lr_scheduler``.


---




This code block is creating a temporary directory and saving the state of a PyTorch model to a file within that directory. It also initializes a variable to keep track of the best accuracy achieved during training.



---

    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0


---
with **TemporaryDirectory()** as tempdir: This line creates a temporary directory. The with statement is used here to ensure that the directory is properly cleaned up afterwards.

 The **TemporaryDirectory()** function from the tempfile module generates a temporary directory in a safe way, and it's automatically deleted when it's no longer needed 1.

**best_model_params_path = os.path.join(tempdir, 'best_model_params.pt'):**
This line joins the path of the temporary directory with the filename 'best_model_params.pt' to create a full path for the file where the model's parameters will be saved.

**torch.save(model.state_dict(), best_model_params_path):** This line saves the state of the model to a file. **The state_dict()** function returns a dictionary containing a whole state of the model. This includes the model parameters (weights and biases), the optimizer state, the epoch number, etc.

 The **torch.save()** function is then used to save this state to a file 3.

**best_acc = 0.0:** This line initializes a variable to keep track of the best accuracy achieved during training.


---




The code you've provided is performing a forward pass through a model, computing the loss, and updating the model parameters in the case of training. Here's a detailed explanation:


---


    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()


---


**with torch.set_grad_enabled(phase == 'train'):**:
 This line sets the computation graph to build during the forward pass. If phase is 'train', the computation graph will be built, allowing us to perform backpropagation during the backward pass.

  **If phase is not 'train' (i.e., it's 'val' or 'test')**, no computation graph will be built, which is useful when we want to evaluate the model without tracking the intermediate computations 1.

**outputs = model(inputs)**: This line performs a forward pass through the model with the given inputs. The model's parameters are used to compute the output.

**_, preds = torch.max(outputs, 1):** This line finds the maximum value along dimension 1 of the outputs tensor (which corresponds to the highest predicted class probabilities) and returns the indices of the maximum values. The underscore _ is a convention in Python to indicate that we're ignoring the first returned value (the actual maximum values), and we're only interested in the second returned value (the indices of the maximums).

**loss = criterion(outputs, labels):** This line computes the loss between the model's outputs and the true labels. The criterion (loss function) measures how well the model's predictions match the true labels.

if phase == 'train':: This line checks if the current phase is training. If it is, the model parameters are updated based on the computed loss.

**loss.backward():** This line performs backpropagation, which computes the gradients of the loss with respect to the model parameters.

**optimizer.step():** This line updates the model parameters using the computed gradients. The optimizer determines how the model parameters should be updated in order to minimize the loss


---



In [22]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    #save the best model
                    torch.save(model.state_dict(), best_model_params_path)

            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
    return model

### Train and evaluate

It should take around 15-25 min on CPU. On GPU though, it takes less than a
minute.




In [26]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=5)

Epoch 0/4
----------
train Loss: 0.3572 Acc: 0.8354
val Loss: 0.3671 Acc: 0.8889

Epoch 1/4
----------
train Loss: 0.4253 Acc: 0.8148
val Loss: 0.8751 Acc: 0.7451

Epoch 2/4
----------
train Loss: 0.3870 Acc: 0.8683
val Loss: 0.3837 Acc: 0.8497

Epoch 3/4
----------
train Loss: 0.5249 Acc: 0.7901
val Loss: 0.2734 Acc: 0.8758

Epoch 4/4
----------
train Loss: 0.3747 Acc: 0.8354
val Loss: 0.3544 Acc: 0.8758

Training complete in 2m 17s
Best val Acc: 0.888889


In [27]:
model_conv = torchvision.models.resnet18(weights='IMAGENET1K_V1')
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

### Train and evaluate

On CPU this will take about half the time compared to previous scenario.
This is expected as gradients don't need to be computed for most of the
network. However, forward does need to be computed.




In [28]:
model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train Loss: 0.6665 Acc: 0.6461
val Loss: 0.2532 Acc: 0.9216

Epoch 1/9
----------
train Loss: 0.4197 Acc: 0.8230
val Loss: 0.2093 Acc: 0.9346

Epoch 2/9
----------
train Loss: 0.5221 Acc: 0.7572
val Loss: 0.2172 Acc: 0.9346

Epoch 3/9
----------
train Loss: 0.5682 Acc: 0.7531
val Loss: 0.4860 Acc: 0.8039

Epoch 4/9
----------
train Loss: 0.3473 Acc: 0.8560
val Loss: 0.3170 Acc: 0.8627

Epoch 5/9
----------
train Loss: 0.6142 Acc: 0.7613
val Loss: 0.3432 Acc: 0.8693

Epoch 6/9
----------
train Loss: 0.4555 Acc: 0.8066
val Loss: 0.1726 Acc: 0.9608

Epoch 7/9
----------
train Loss: 0.3578 Acc: 0.8189
val Loss: 0.1777 Acc: 0.9542

Epoch 8/9
----------
train Loss: 0.3814 Acc: 0.8230
val Loss: 0.1808 Acc: 0.9477

Epoch 9/9
----------
train Loss: 0.3443 Acc: 0.8765
val Loss: 0.1727 Acc: 0.9542

Training complete in 2m 49s
Best val Acc: 0.960784


## Inference on custom images

Use the trained model to make predictions on custom images and visualize
the predicted class labels along with the images.




In [29]:
def visualize_model_predictions(model,img_path):
    was_training = model.training
    model.eval()

    img = Image.open(img_path)
    img = data_transforms['val'](img)
    img = img.unsqueeze(0)
    img = img.to(device)

    with torch.no_grad():
        outputs = model(img)
        _, preds = torch.max(outputs, 1)

        ax = plt.subplot(2,2,1)
        ax.axis('off')
        ax.set_title(f'Predicted: {class_names[preds[0]]}')
        imshow(img.cpu().data[0])

        model.train(mode=was_training)

In [30]:
def imshow(inp, title=None):
    """Display image for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

In [36]:
# الحل الصحيح - استخدم raw string للمسار
import os

# خيار 1: raw string
img_path = r'C:\Users\Nasra\OneDrive\سطح المكتب\project AI\Ants-beesClassifier\Data\raw\val\ants\800px-Meat_eater_ant_qeen_excavating_hole.jpgg'

# خيار 2: forward slashes (أسهل)
# img_path = 'C:/Users/Nasra/OneDrive/سطح المكتب/project AI/Ants-beesClassifier/Data/valduition/bees/OIP (1).jpeg'

# خيار 3: os.path.join (الأفضل)
#img_path = os.path.join('"C:\Users\Nasra\OneDrive\سطح المكتب\project AI\project AI222\valduition\bees\OIP.jpeg"')

visualize_model_predictions(
    model_conv,
    img_path=img_path
)

plt.ioff()
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Nasra\\OneDrive\\سطح المكتب\\project AI\\Ants-beesClassifier\\Data\\raw\\val\\ants\\800px-Meat_eater_ant_qeen_excavating_hole.jpgg'

In [45]:
visualize_model_predictions(
    model_conv,
    img_path='C:\Users\Nasra\OneDrive\سطح المكتب\project AI\Ants-beesClassifier\Data\valduition\ants\download.jpeg'
)

plt.ioff()
plt.show()

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (2932952506.py, line 3)

In [46]:
torch.save(model_conv, "ant_bee_model2.pt")


In [47]:
torch.save(model_conv.state_dict(), "ant_bee_model.pt")

In [48]:
model_c = torch.load("ant_bee_model.pt")

In [52]:

# الحل الصحيح - استخدم raw string
img_path = r"C:\Users\Nasra\OneDrive\سطح المكتب\project AI\Ants-beesClassifier\Data\valduition\bees\OIP.jpeg"
img = Image.open(img_path)
img = data_transforms['val'](img)
img = img.unsqueeze(0)
img = img.to(device)

with torch.no_grad():
    outputs = model_conv(img)
    _, preds = torch.max(outputs, 1)
    print(f'Predicted: {class_names[preds[0]]}')

Predicted: bees
