In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
from torch.utils.data import ConcatDataset
from PIL import Image
import os
import torchvision.models as models
import time
import copy
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import random
from collections import defaultdict
import pandas as pd

import torch.nn.functional as F



device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_indexes(arr, value):
    indexes = []
    for i in range(len(arr)):
        if arr[i] == value:
            indexes.append(i)
    return indexes

def get_length_per_class(dataloader, classes):
    class_counts = defaultdict(int)
    total = 0
    for batch in dataloader:
        _, labels = batch 
        labels = labels.numpy().tolist()
        for label in labels:
            class_counts[label] += 1
            total +=1

    class_counts = dict(sorted(class_counts.items()))
    for class_label, count in class_counts.items():
        print(f"Class {classes[class_label]}: {count} samples out of {total}")
def load_data(data_dir,
                           batch_size,
                           data_type,
                           noise_type,
                           noise_percentage,                           
                           transform,                           
                           data_percentage=1,
                           show_classes = False, random_seed=21):
    
    if noise_type == "None":
        noise_type = ""
        noise_percentage = ""
    else:
        noise_type = "/" + str(noise_type)
        noise_percentage = "/" + str(noise_percentage)
    path = data_dir + noise_type + "/" + data_type + noise_percentage
    print("path: ", path)
    dataset = ImageFolder(root=path, transform=transform)
    original_classes = dataset.classes 
    num_samples = len(dataset)
    indices = list(range(num_samples))

    labels = dataset.targets
    class_to_idx = dataset.class_to_idx
    needed_length = int(num_samples*data_percentage/100)
    expected_length_per_class = int(needed_length/len(original_classes))
    print(f"needed_length: {needed_length}, expected_length_per_class: {expected_length_per_class}")
    if data_percentage != 100:
        new_indices = []
        for key, value in class_to_idx.items():
            all_indixes_of_class = get_indexes(labels, value)
            new_indices.extend(all_indixes_of_class[:expected_length_per_class])
    else:
        new_indices = indices
    length_dataset = len(new_indices)
    print("length of final dataset:", length_dataset)

    
    # sampler = SubsetRandomSampler(new_indices)

    dataloader = DataLoader(dataset, sampler=new_indices, batch_size=batch_size)

    if show_classes:
        get_length_per_class(dataloader, original_classes)
        
    random.shuffle(new_indices)

   
    dataloader = DataLoader(dataset, sampler=new_indices, batch_size=batch_size)

    return dataloader, length_dataset, original_classes


def train_model(model, criterion, optimizer, scheduler, num_epochs=25, batch_show = 1792):
    since = time.time()
    valid_acc = []
    train_acc = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()  

            running_loss = 0.0
            running_corrects = 0
            l = 0

            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.clone().detach().to(device)
                labels = labels.clone().detach().to(device)

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    if not isinstance(outputs, torch.Tensor):
                        outputs = outputs.logits
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                l += len(inputs)
                
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train':
                scheduler.step()
                train_acc.append(epoch_acc.item())
            else:
                valid_acc.append(epoch_acc.item())
                
            
            
            
            print('\n{} Loss: {:.4f} Acc: {:.4f}\n'.format(
                phase, epoch_loss, epoch_acc))

            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    
   

    accuracy_history = [train_acc, valid_acc]
    model.load_state_dict(best_model_wts)
    return model, best_acc.item(), accuracy_history
    
    




device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [2]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4680, 0.4647, 0.3441], std=[0.2322, 0.2272, 0.2394]) 
])   

noise_type = "gaussian_noise"
noise_percentage = 10
data_percentage = 100
total_size = 21000

train_size = data_percentage*total_size/100
data_dir = '/kaggle/input/vegetableimages/vegetable_images'

train_loader, train_size, classes = load_data(data_dir = data_dir,
                           batch_size = 64,
                           data_type = "train",
                           noise_type = "None",
                           noise_percentage = 0,                           
                           transform = transform,                           
                           data_percentage=data_percentage)

valid_loader, valid_size, _ = load_data(data_dir = data_dir,
                           batch_size = 64,
                           data_type = "validation",
                           noise_type = "None",
                           noise_percentage = 0,                           
                           transform = transform,                           
                           data_percentage=data_percentage)

valid_loader_with_noise, _, _ = load_data(data_dir = data_dir,
                           batch_size = 64,
                           data_type = "validation",
                           noise_type = noise_type,
                           noise_percentage = noise_percentage,                           
                           transform = transform,                           
                           data_percentage=data_percentage)
dataloaders = {'train':  train_loader, 
               'val': valid_loader_with_noise
               }
dataloaders_with_noise = {'train':  train_loader, 
               'val': valid_loader_with_noise
               }


test_loader,test_size_, _ = load_data(data_dir = data_dir,
                           batch_size = 64,
                           data_type = "test",
                           noise_type = "gaussian_noise",
                           noise_percentage = noise_percentage,                           
                           transform = transform,                           
                           data_percentage=data_percentage)


test_loader_without_noise, _, _ = load_data(data_dir =data_dir,
                           batch_size = 64,
                           data_type = "test",
                           noise_type = "None",
                           noise_percentage = 0,                           
                           transform = transform,                           
                           data_percentage=data_percentage)
dataset_sizes = {'train':  train_size, 
        'val': valid_size,
        'test': test_size_
       }




path:  /kaggle/input/vegetableimages/vegetable_images/train
needed_length: 15000, expected_length_per_class: 1000
length of final dataset: 15000
path:  /kaggle/input/vegetableimages/vegetable_images/validation
needed_length: 3000, expected_length_per_class: 200
length of final dataset: 3000
path:  /kaggle/input/vegetableimages/vegetable_images/gaussian_noise/validation/10
needed_length: 3000, expected_length_per_class: 200
length of final dataset: 3000
path:  /kaggle/input/vegetableimages/vegetable_images/gaussian_noise/test/10
needed_length: 3000, expected_length_per_class: 200
length of final dataset: 3000
path:  /kaggle/input/vegetableimages/vegetable_images/test
needed_length: 3000, expected_length_per_class: 200
length of final dataset: 3000


In [3]:
def load_vgg16():
    vgg16 = models.vgg16(weights = True)
    in_features = vgg16._modules['classifier'][-1].in_features
    out_features = len(classes)
    vgg16._modules['classifier'][-1] = nn.Linear(in_features, out_features, bias=True)
    vgg16 = vgg16.to(device)
    for param in vgg16.parameters():
        param.requires_grad = True
    return vgg16
    
    
    

In [4]:
criterion = nn.CrossEntropyLoss()
learning_rates = [0.005]
momentums = [0, 0.1]
steps = [3, 4]
gammas = [0.1,  0.5]
c = 0
columns = ['model', 'num_epochs', 'learning_rate', 'momentum', 'step', 'gamma', 'accuracy', 'history']
num_epochs = 10
ht_results = pd.DataFrame(columns = columns)
best_valid_score = 0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for lr in learning_rates:
    for m in momentums:
        for s in steps:
            for g in gammas: 
                c+=1
                print(f"STARTING {c} ITERATION")
                vgg16_cr = load_vgg16()
                print(f"Starting to test lr = {lr}, m = {m}, s = {s}, g = {g}")
                optimizer = optim.SGD(vgg16_cr.parameters(), lr= lr, momentum = m)
                step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
                vgg16_cr, valid_score, history = train_model(vgg16_cr, criterion, optimizer, 
                                                      step_lr_scheduler, num_epochs=num_epochs)
                if best_valid_score < valid_score:
                    best_valid_score = valid_score
                    best_vgg16 = vgg16_cr
                ht_results.loc[len(ht_results.index)] = ['vgg16', num_epochs, lr, m, s, g, valid_score, history]                
                print(ht_results.loc[len(ht_results.index) - 1])
                
        
        



STARTING 1 ITERATION


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 258MB/s] 


Starting to test lr = 0.005, m = 0, s = 3, g = 0.1
Epoch 1/10
----------

train Loss: 0.2291 Acc: 0.9321


val Loss: 0.1520 Acc: 0.9490

Epoch 2/10
----------

train Loss: 0.0235 Acc: 0.9941


val Loss: 0.0759 Acc: 0.9743

Epoch 3/10
----------

train Loss: 0.0097 Acc: 0.9977


val Loss: 0.0724 Acc: 0.9777

Epoch 4/10
----------

train Loss: 0.0089 Acc: 0.9969


val Loss: 0.0605 Acc: 0.9813

Epoch 5/10
----------

train Loss: 0.0049 Acc: 0.9984


val Loss: 0.0811 Acc: 0.9800

Epoch 6/10
----------

train Loss: 0.0026 Acc: 0.9992


val Loss: 0.0685 Acc: 0.9817

Epoch 7/10
----------

train Loss: 0.0019 Acc: 0.9994


val Loss: 0.0792 Acc: 0.9797

Epoch 8/10
----------

train Loss: 0.0019 Acc: 0.9997


val Loss: 0.0672 Acc: 0.9823

Epoch 9/10
----------

train Loss: 0.0021 Acc: 0.9996


val Loss: 0.0731 Acc: 0.9810

Epoch 10/10
----------

train Loss: 0.0019 Acc: 0.9993


val Loss: 0.0786 Acc: 0.9807

Training complete in 52m 7s
Best val Acc: 0.982333
model                                



Starting to test lr = 0.005, m = 0, s = 4, g = 0.1
Epoch 1/10
----------

train Loss: 0.2180 Acc: 0.9353


val Loss: 0.0712 Acc: 0.9780

Epoch 2/10
----------

train Loss: 0.0208 Acc: 0.9947


val Loss: 0.0979 Acc: 0.9667

Epoch 3/10
----------

train Loss: 0.0131 Acc: 0.9959


val Loss: 0.0873 Acc: 0.9697

Epoch 4/10
----------

train Loss: 0.0058 Acc: 0.9983


val Loss: 0.0634 Acc: 0.9780

Epoch 5/10
----------

train Loss: 0.0051 Acc: 0.9984


val Loss: 0.1154 Acc: 0.9647

Epoch 6/10
----------

train Loss: 0.0030 Acc: 0.9992


val Loss: 0.0635 Acc: 0.9770

Epoch 7/10
----------

train Loss: 0.0026 Acc: 0.9993


val Loss: 0.0678 Acc: 0.9767

Epoch 8/10
----------

train Loss: 0.0022 Acc: 0.9996


val Loss: 0.0644 Acc: 0.9787

Epoch 9/10
----------

train Loss: 0.0021 Acc: 0.9994


val Loss: 0.0586 Acc: 0.9797

Epoch 10/10
----------

train Loss: 0.0022 Acc: 0.9994


val Loss: 0.0576 Acc: 0.9803

Training complete in 50m 8s
Best val Acc: 0.980333
model                                



Starting to test lr = 0.005, m = 0, s = 4, g = 0.5
Epoch 1/10
----------

train Loss: 0.2321 Acc: 0.9315


val Loss: 0.0852 Acc: 0.9730

Epoch 2/10
----------

train Loss: 0.0219 Acc: 0.9937


val Loss: 0.1599 Acc: 0.9583

Epoch 3/10
----------

train Loss: 0.0133 Acc: 0.9960


val Loss: 0.0400 Acc: 0.9860

Epoch 4/10
----------

train Loss: 0.0056 Acc: 0.9982


val Loss: 0.0720 Acc: 0.9803

Epoch 5/10
----------

train Loss: 0.0043 Acc: 0.9987


val Loss: 0.0622 Acc: 0.9767

Epoch 6/10
----------

train Loss: 0.0027 Acc: 0.9993


val Loss: 0.0486 Acc: 0.9830

Epoch 7/10
----------

train Loss: 0.0023 Acc: 0.9995


val Loss: 0.0510 Acc: 0.9837

Epoch 8/10
----------

train Loss: 0.0016 Acc: 0.9996


val Loss: 0.0414 Acc: 0.9877

Epoch 9/10
----------

train Loss: 0.0019 Acc: 0.9996


val Loss: 0.0439 Acc: 0.9873

Epoch 10/10
----------

train Loss: 0.0013 Acc: 0.9998


val Loss: 0.0443 Acc: 0.9870

Training complete in 51m 32s
Best val Acc: 0.987667
model                               



Starting to test lr = 0.005, m = 0.1, s = 3, g = 0.1
Epoch 1/10
----------

train Loss: 0.2131 Acc: 0.9370


val Loss: 0.0965 Acc: 0.9700

Epoch 2/10
----------

train Loss: 0.0242 Acc: 0.9926


val Loss: 0.0729 Acc: 0.9770

Epoch 3/10
----------

train Loss: 0.0114 Acc: 0.9967


val Loss: 0.0566 Acc: 0.9817

Epoch 4/10
----------

train Loss: 0.0058 Acc: 0.9983


val Loss: 0.0770 Acc: 0.9763

Epoch 5/10
----------

train Loss: 0.0042 Acc: 0.9989


val Loss: 0.0754 Acc: 0.9777

Epoch 6/10
----------

train Loss: 0.0026 Acc: 0.9993


val Loss: 0.0333 Acc: 0.9910

Epoch 7/10
----------

train Loss: 0.0020 Acc: 0.9996


val Loss: 0.0317 Acc: 0.9907

Epoch 8/10
----------

train Loss: 0.0018 Acc: 0.9996


val Loss: 0.0327 Acc: 0.9907

Epoch 9/10
----------

train Loss: 0.0015 Acc: 0.9999


val Loss: 0.0313 Acc: 0.9913

Epoch 10/10
----------

train Loss: 0.0017 Acc: 0.9997


val Loss: 0.0313 Acc: 0.9907

Training complete in 51m 60s
Best val Acc: 0.991333
model                             



Starting to test lr = 0.005, m = 0.1, s = 3, g = 0.5
Epoch 1/10
----------

train Loss: 0.2150 Acc: 0.9371


val Loss: 0.1048 Acc: 0.9627

Epoch 2/10
----------

train Loss: 0.0192 Acc: 0.9945


val Loss: 0.1531 Acc: 0.9597

Epoch 3/10
----------

train Loss: 0.0109 Acc: 0.9971


val Loss: 0.0456 Acc: 0.9847

Epoch 4/10
----------

train Loss: 0.0042 Acc: 0.9987


val Loss: 0.0376 Acc: 0.9877

Epoch 5/10
----------

train Loss: 0.0045 Acc: 0.9989


val Loss: 0.1058 Acc: 0.9700

Epoch 6/10
----------

train Loss: 0.0024 Acc: 0.9993


val Loss: 0.0500 Acc: 0.9857

Epoch 7/10
----------

train Loss: 0.0018 Acc: 0.9994


val Loss: 0.0449 Acc: 0.9870

Epoch 8/10
----------

train Loss: 0.0021 Acc: 0.9995


val Loss: 0.0460 Acc: 0.9863

Epoch 9/10
----------

train Loss: 0.0015 Acc: 0.9998


val Loss: 0.0493 Acc: 0.9857

Epoch 10/10
----------

train Loss: 0.0020 Acc: 0.9992


val Loss: 0.0486 Acc: 0.9860

Training complete in 51m 9s
Best val Acc: 0.987667
model                              



Starting to test lr = 0.005, m = 0.1, s = 4, g = 0.1
Epoch 1/10
----------

train Loss: 0.2074 Acc: 0.9378


val Loss: 0.1422 Acc: 0.9540

Epoch 2/10
----------

train Loss: 0.0243 Acc: 0.9931


val Loss: 0.0492 Acc: 0.9827

Epoch 3/10
----------

train Loss: 0.0097 Acc: 0.9972


val Loss: 0.0541 Acc: 0.9807

Epoch 4/10
----------

train Loss: 0.0077 Acc: 0.9978


val Loss: 0.0502 Acc: 0.9830

Epoch 5/10
----------

train Loss: 0.0057 Acc: 0.9983


val Loss: 0.0619 Acc: 0.9800

Epoch 6/10
----------

train Loss: 0.0032 Acc: 0.9992


val Loss: 0.0389 Acc: 0.9873

Epoch 7/10
----------

train Loss: 0.0024 Acc: 0.9993


val Loss: 0.0334 Acc: 0.9900

Epoch 8/10
----------

train Loss: 0.0020 Acc: 0.9995


val Loss: 0.0336 Acc: 0.9900

Epoch 9/10
----------

train Loss: 0.0018 Acc: 0.9995


val Loss: 0.0332 Acc: 0.9897

Epoch 10/10
----------

train Loss: 0.0013 Acc: 0.9997


val Loss: 0.0368 Acc: 0.9893

Training complete in 52m 2s
Best val Acc: 0.990000
model                              



Starting to test lr = 0.005, m = 0.1, s = 4, g = 0.5
Epoch 1/10
----------

train Loss: 0.2109 Acc: 0.9381


val Loss: 0.0955 Acc: 0.9653

Epoch 2/10
----------

train Loss: 0.0195 Acc: 0.9944


val Loss: 0.1050 Acc: 0.9653

Epoch 3/10
----------

train Loss: 0.0123 Acc: 0.9959


val Loss: 0.0712 Acc: 0.9770

Epoch 4/10
----------

train Loss: 0.0071 Acc: 0.9979


val Loss: 0.0369 Acc: 0.9870

Epoch 5/10
----------

train Loss: 0.0032 Acc: 0.9990


val Loss: 0.0823 Acc: 0.9730

Epoch 6/10
----------

train Loss: 0.0021 Acc: 0.9995


val Loss: 0.0703 Acc: 0.9757

Epoch 7/10
----------

train Loss: 0.0019 Acc: 0.9994


val Loss: 0.0672 Acc: 0.9767

Epoch 8/10
----------

train Loss: 0.0012 Acc: 0.9998


val Loss: 0.0659 Acc: 0.9767

Epoch 9/10
----------

train Loss: 0.0015 Acc: 0.9995


val Loss: 0.0666 Acc: 0.9777

Epoch 10/10
----------

train Loss: 0.0013 Acc: 0.9997


val Loss: 0.0644 Acc: 0.9780

Training complete in 52m 7s
Best val Acc: 0.987000
model                              

In [5]:
ht_results.to_csv('vgg16_results.csv', index=False)
torch.save(best_vgg16, "best_vgg16.pth")

In [6]:
!zip -r file.zip /kaggle/working

from IPython.display import FileLink
FileLink(r'file.zip')

  adding: kaggle/working/ (stored 0%)
  adding: kaggle/working/vgg16_results.csv (deflated 82%)
  adding: kaggle/working/.virtual_documents/ (stored 0%)
  adding: kaggle/working/best_vgg16.pth (deflated 7%)
