In [1]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
import torch
import os
import torchvision
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from google.colab import drive
warnings.filterwarnings("ignore")
drive.mount('/content/drive')

Mounted at /content/drive


# Get mean and STD of RGB channel for Normalize

In [2]:
# Initialize sums and counts
mean_sum = np.zeros(3)  # For R, G, B
std_sum = np.zeros(3)
pixel_count = 0


dataset_path = "/content/drive/MyDrive/flowers/"
folders = ['train','test']

for folder in folders:
  folder_path = os.path.join(dataset_path+folder)
  for class_name in os.listdir(folder_path):
    class_path = os.path.join(folder_path, class_name)

    for image in os.listdir(class_path):
      image_path = os.path.join(class_path, image)
      image = Image.open(image_path).convert("RGB")  # Ensure RGB format
      image_np = np.array(image) / 255.0 # Normalize pixel values to [0, 1]

      pixel_count += image_np.shape[0] * image_np.shape[1]  # Total pixels
      mean_sum += np.sum(image_np, axis=(0, 1))  # Sum for each channel
      std_sum += np.sum(image_np ** 2, axis=(0, 1))  # Sum of squares for each channel

# Calculate mean and std
mean = mean_sum / pixel_count
std = np.sqrt(std_sum / pixel_count - mean ** 2)

In [3]:
print(mean, std)

[0.4666734  0.42472738 0.30415504] [0.30175002 0.27238393 0.29652723]


In [4]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),  # Slight rotation for variety
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [5]:
img_dataset = {}

img_dataset['train'] = torchvision.datasets.ImageFolder(root='/content/drive/MyDrive/flowers/train', transform=train_transform)
img_dataset['test'] = torchvision.datasets.ImageFolder(root='/content/drive/MyDrive/flowers/test', transform=test_transform)

In [6]:
class_names = img_dataset['train'].classes
print(class_names)

['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']


In [7]:
class_idx = img_dataset['train'].class_to_idx
print(class_idx)

{'daisy': 0, 'dandelion': 1, 'roses': 2, 'sunflowers': 3, 'tulips': 4}


In [8]:
train_loader = DataLoader(img_dataset['train'], batch_size=64, shuffle=True)
test_loader = DataLoader(img_dataset['test'], batch_size=64, shuffle=False)

In [9]:
data, label = next(iter(train_loader))
print(data.shape, label.shape)

torch.Size([64, 3, 224, 224]) torch.Size([64])


In [10]:
model = torchvision.models.efficientnet_b0(pretrained=True)
model

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 94.2MB/s]


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [11]:
num_of_input = model.classifier[1].in_features
num_of_input

1280

In [24]:
model.classifier = nn.Sequential(
    nn.Linear(num_of_input, 512),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(512, 5)
)

model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [25]:
for param in model.parameters():
    param.requires_grad = False

for param in model.classifier.parameters():
    param.requires_grad = True

for param in model.parameters():
    print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
Fals

In [26]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [27]:
model.to(device)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [28]:
loss = nn.CrossEntropyLoss()
# lambda function will return model.parameters() if p.requires_grad is true
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

In [29]:
n_epochs = 10


for epoch in range(n_epochs):
    train_loss = 0.0
    train_accuracy = 0

    model.train()
    for data, label in train_loader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()

        output = model(data)

        loss_val = loss(output, label)

        loss_val.backward()

        optimizer.step()

        train_loss += loss_val.item()*data.size(0)
        #calculate accuracy
        _, pred = torch.max(output, dim=1)

        train_accuracy += torch.sum(pred==label).item()

    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.dataset)

    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
            epoch+1,
            train_loss
            ))
    print(f"Train accuracy: {train_accuracy/len(train_loader.dataset):.3f}")


Epoch: 1 	Training Loss: 0.589642
Train accuracy: 0.809
Epoch: 2 	Training Loss: 0.338644
Train accuracy: 0.876
Epoch: 3 	Training Loss: 0.297216
Train accuracy: 0.899
Epoch: 4 	Training Loss: 0.272377
Train accuracy: 0.899
Epoch: 5 	Training Loss: 0.252054
Train accuracy: 0.902
Epoch: 6 	Training Loss: 0.207773
Train accuracy: 0.924
Epoch: 7 	Training Loss: 0.187394
Train accuracy: 0.932
Epoch: 8 	Training Loss: 0.183982
Train accuracy: 0.933
Epoch: 9 	Training Loss: 0.180753
Train accuracy: 0.936
Epoch: 10 	Training Loss: 0.147157
Train accuracy: 0.947


In [30]:
model.eval()
with torch.no_grad():
  test_loss = 0.0
  test_accuracy = 0

  for data, label in test_loader:
    data, label = data.to(device), label.to(device)

    output = model(data)

    loss_val = loss(output, label)

    test_loss += loss_val.item()*data.size(0)

    _, pred = torch.max(output, dim=1)

    test_accuracy += torch.sum(pred==label).item()

  test_loss = test_loss/len(test_loader.dataset)
  print('Test Loss: {:.6f}'.format(test_loss))
  print(f"Test accuracy: {test_accuracy/len(test_loader.dataset):.3f}")

Test Loss: 0.569258
Test accuracy: 0.840
