In [2]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

In [3]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [4]:
# Hyperparameters
batch_size = 32
learning_rate = 0.001
num_epochs = 50

In [5]:
# Data Transformations
#تصویر ورودی رو تبدیل میکنیم به 224 پیکسل در 224 پیکسل
#بعد تبدیل به تنسور میکنیم
#و بعد نرمالایز اش میکنیم
#این نرمالایز کردن باعث میشه اعداد تو ی رنج خاص باشن و شبکمون راحتر و بهتر اموزش ببینه
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
# Load Food-101 Dataset
#دانلود داده تربن و جدا سازی پنج هزار داده
#  و اختصاص دادن چهارهزار برای ترین و هزار تا برای اعتبار سنجی
dataset = datasets.Food101(root='./data', split='train', download=True, transform=transform)
dataset = torch.utils.data.Subset(dataset, list(np.random.choice(range(len(dataset)), 5000, replace=False)))
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [4000, 1000])

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

#دانلود تست و لود کردنش
test_dataset = datasets.Food101(root='./data', split='test', download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to ./data/food-101.tar.gz


100%|██████████| 5.00G/5.00G [04:11<00:00, 19.9MB/s]


Extracting ./data/food-101.tar.gz to ./data


In [8]:
#مشاهده طول  ترین لودر  ولیدیشن لودر  و تست لودر
# طول ترین لودر باید چهار هزار تقسیم بر اندازه بچ که 32 هست باشه
len(train_loader), len(val_loader), len(test_loader)

(125, 32, 790)

In [9]:
# Define Training Function with Validation
#حلقه اموزش
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        #مقادیر اولیه صفر
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        #حلقه فور برای هر بچ
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device) # Move data to the GPU
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            #مقدار لاس
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            #مقدار درست بودن داده ها
            correct_train += (predicted == labels).sum().item()

        # Calculate the average loss and accuracy for the epoch
        avg_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct_train / total_train

        # Evaluate the model on the validation set
        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion)

        # Report the training loss, training accuracy, and validation metrics at the end of the epoch
        print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {avg_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

In [10]:
# Define Evaluation Function
def evaluate_model(model, data_loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device) # Move data to the GPU
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
    val_loss /= len(data_loader)
    accuracy = 100 * correct / len(data_loader.dataset)
    return val_loss, accuracy

In [11]:
# Function to count trainable parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [12]:
# 1. Train from scratch (Random Weights)
#مدلی که پریترین نیست
#از افیشنت استفاده میکنیم که از لایه های کانولوشنی قوی استفاده شده
#نسخه بی 3 افیشنت بی صفر سبک تربن و بی 7 قوی ترین
model_scratch = models.efficientnet_b3(pretrained=False)
num_ftrs = model_scratch.classifier[1].in_features
#اضافه کردن یک لایه لینیر به شبکمون برای انجام دسته بندی به تعداد کلاس ها
model_scratch.classifier[1] = nn.Linear(num_ftrs, len(test_dataset.classes))
#تعداد پارامتر شبکه
num_params_scratch = count_parameters(model_scratch)
print(f'Number of trainable parameters (scratch): {num_params_scratch}')
model_scratch.to(device) # Move model to GPU



Number of trainable parameters (scratch): 10851469


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_scratch.parameters(), lr=learning_rate)

print("Training from scratch (Random Weights)")
train_model(model_scratch, train_loader, val_loader, criterion, optimizer, num_epochs)
test_loss_scratch, accuracy_scratch = evaluate_model(model_scratch, test_loader, criterion)
print(f'Test Loss: {test_loss_scratch:.4f}, Test Accuracy: {accuracy_scratch:.4f}')

Training from scratch (Random Weights)
Epoch [1/50], Training Loss: 4.7128, Training Accuracy: 1.32%, Validation Loss: 4.6515, Validation Accuracy: 1.30%
Epoch [2/50], Training Loss: 4.6146, Training Accuracy: 1.50%, Validation Loss: 4.6841, Validation Accuracy: 1.40%
Epoch [3/50], Training Loss: 4.5694, Training Accuracy: 1.57%, Validation Loss: 4.6078, Validation Accuracy: 1.30%
Epoch [4/50], Training Loss: 4.5358, Training Accuracy: 1.57%, Validation Loss: 4.6863, Validation Accuracy: 1.80%
Epoch [5/50], Training Loss: 4.5171, Training Accuracy: 1.85%, Validation Loss: 4.5219, Validation Accuracy: 1.30%
Epoch [6/50], Training Loss: 4.4862, Training Accuracy: 2.17%, Validation Loss: 4.8034, Validation Accuracy: 2.90%
Epoch [7/50], Training Loss: 4.4330, Training Accuracy: 2.65%, Validation Loss: 4.4625, Validation Accuracy: 2.90%
Epoch [8/50], Training Loss: 4.3919, Training Accuracy: 3.15%, Validation Loss: 4.4148, Validation Accuracy: 4.00%
Epoch [9/50], Training Loss: 4.3559, Trai

KeyboardInterrupt: 

 مدل از دقت پایین شروع کرده دقت اموزش میتونه افزایش پیدا کنه و به 90 هم برسه
 ولی دقت داده اعتبار سنجی از یجا بیشتر نمیشه چرا چون با این داده محدود داریم اموزش میدیم اورفیت میکنه و مدل نمیتونه به نقطه ای همگرا بشه که برا داده هایی که ندیده هم عملکرد مناسبی داشته باشه

In [14]:
# 2. Fine-tune Pretrained Model
#اینجا مدل پریترین شده است
#در اینجا مدل از وزن های تصادفی شروع نمیکنه و میره وزن های پیش اموخته رو دانلود و استفاده میکنه
#وزن های این شبکه روی ایمیج نت هزار کلاسه انجام شده
model_pretrained = models.efficientnet_b3(pretrained=True)
model_pretrained.classifier[1] = nn.Linear(num_ftrs, len(test_dataset.classes))
num_params_pretrained = count_parameters(model_pretrained)
print(f'Number of trainable parameters (pretrained): {num_params_pretrained}')
model_pretrained.to(device) # Move model to GPU

Downloading: "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_rwightman-b3899882.pth
100%|██████████| 47.2M/47.2M [00:00<00:00, 199MB/s]


Number of trainable parameters (pretrained): 10851469


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_pretrained.parameters(), lr=learning_rate)

print("Fine-tuning Pretrained Model")
train_model(model_pretrained, train_loader, val_loader, criterion, optimizer, num_epochs)
test_loss_pretrained, accuracy_pretrained = evaluate_model(model_pretrained, test_loader, criterion)
print(f'Test Loss: {test_loss_pretrained:.4f}, Test Accuracy: {accuracy_pretrained:.4f}')

Fine-tuning Pretrained Model
Epoch [1/50], Training Loss: 3.5996, Training Accuracy: 19.07%, Validation Loss: 2.7374, Validation Accuracy: 31.20%
Epoch [2/50], Training Loss: 1.9156, Training Accuracy: 51.70%, Validation Loss: 2.2767, Validation Accuracy: 43.60%
Epoch [3/50], Training Loss: 0.9497, Training Accuracy: 75.05%, Validation Loss: 2.2850, Validation Accuracy: 45.30%
Epoch [4/50], Training Loss: 0.4842, Training Accuracy: 86.83%, Validation Loss: 2.3827, Validation Accuracy: 45.90%
Epoch [5/50], Training Loss: 0.3066, Training Accuracy: 91.42%, Validation Loss: 2.6243, Validation Accuracy: 44.80%
Epoch [6/50], Training Loss: 0.2010, Training Accuracy: 94.30%, Validation Loss: 2.7728, Validation Accuracy: 44.50%


KeyboardInterrupt: 

تفاوت مدل پریترین شده در دقت داده ترین و دقت داده اعتبار سنجی خیلی بهتر شده

In [16]:
# 3. Freeze some layers and Fine-tune others
#مدل پریترین هست ولی یکسری لایه هارو منجمد میکنیم
model_frozen = models.efficientnet_b3(pretrained=True)
                        #فیچر ینی لایه های کانولوشنیه و ویژگی استخراج مینکه
                        #دو نقطه منهای یک ینی همه بلوک ها به غیر از بلوک اخر
                        #به جز بلوک اخر همه لایه ها منجمد و فقط لایه اخر اموزش میبینه
for layer in model_frozen.features[:-1]:
    for param in layer.parameters():
      #برای اینکه یک پارامتری اموزش داده نشه اینو فالس میذاریم
        param.requires_grad = False

model_frozen.classifier[1] = nn.Linear(num_ftrs, len(test_dataset.classes))
num_params_frozen = count_parameters(model_frozen)
print(f'Number of trainable parameters (frozen): {num_params_frozen}')
model_frozen.to(device) # Move model to GPU

Number of trainable parameters (frozen): 748133


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

در اینجا باید حواسمون باشه که منجمد کردن لایه های زیاد باعث اندرفیت میشه

In [17]:
# Only parameters of final layer are being updated
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_frozen.classifier[1].parameters(), lr=learning_rate)

print("Freezing some layers and Fine-tuning others")
train_model(model_frozen, train_loader, val_loader, criterion, optimizer, num_epochs)
test_loss_frozen, accuracy_frozen = evaluate_model(model_frozen, test_loader, criterion)
print(f'Test Loss: {test_loss_frozen:.4f}, Test Accuracy: {accuracy_frozen:.4f}')

Freezing some layers and Fine-tuning others
Epoch [1/50], Training Loss: 4.2876, Training Accuracy: 10.38%, Validation Loss: 3.9248, Validation Accuracy: 22.70%
Epoch [2/50], Training Loss: 3.3470, Training Accuracy: 36.35%, Validation Loss: 3.5078, Validation Accuracy: 26.70%
Epoch [3/50], Training Loss: 2.7967, Training Accuracy: 47.67%, Validation Loss: 3.2543, Validation Accuracy: 28.80%
Epoch [4/50], Training Loss: 2.4224, Training Accuracy: 53.20%, Validation Loss: 3.1082, Validation Accuracy: 30.90%
Epoch [5/50], Training Loss: 2.1598, Training Accuracy: 58.00%, Validation Loss: 3.0333, Validation Accuracy: 30.50%
Epoch [6/50], Training Loss: 1.9322, Training Accuracy: 62.25%, Validation Loss: 2.9289, Validation Accuracy: 31.30%
Epoch [7/50], Training Loss: 1.7514, Training Accuracy: 66.00%, Validation Loss: 2.8805, Validation Accuracy: 32.70%
Epoch [8/50], Training Loss: 1.6189, Training Accuracy: 67.35%, Validation Loss: 2.8609, Validation Accuracy: 33.30%
Epoch [9/50], Traini

KeyboardInterrupt: 

در اینجا مدل ما زودتر ایپاک رو طی میکنه چون لایه های زیادی منجمده و طبیغتا از درصد دقت پایین تری ترین میشه

In [18]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [19]:
from torchinfo import summary

In [21]:
summary(model_frozen,(batch_size,3,224,224))

Layer (type:depth-idx)                                  Output Shape              Param #
EfficientNet                                            [32, 101]                 --
├─Sequential: 1-1                                       [32, 1536, 7, 7]          --
│    └─Conv2dNormActivation: 2-1                        [32, 40, 112, 112]        --
│    │    └─Conv2d: 3-1                                 [32, 40, 112, 112]        (1,080)
│    │    └─BatchNorm2d: 3-2                            [32, 40, 112, 112]        (80)
│    │    └─SiLU: 3-3                                   [32, 40, 112, 112]        --
│    └─Sequential: 2-2                                  [32, 24, 112, 112]        --
│    │    └─MBConv: 3-4                                 [32, 24, 112, 112]        (2,298)
│    │    └─MBConv: 3-5                                 [32, 24, 112, 112]        (1,206)
│    └─Sequential: 2-3                                  [32, 32, 56, 56]          --
│    │    └─MBConv: 3-6                    

ساختار مدل

In [None]:
# Compare Results
print("Comparison of Different Training Approaches:")
print(f'From Scratch - Test Loss: {test_loss_scratch:.4f}, Test Accuracy: {accuracy_scratch:.4f}, Trainable Parameters: {num_params_scratch}')
print(f'Pretrained Fine-tuning - Test Loss: {test_loss_pretrained:.4f}, Test Accuracy: {accuracy_pretrained:.4f}, Trainable Parameters: {num_params_pretrained}')
print(f'Freezing and Fine-tuning - Test Loss: {test_loss_frozen:.4f}, Test Accuracy: {accuracy_frozen:.4f}, Trainable Parameters: {num_params_frozen}')