#Unzipping the datasets prior to training/testing

### Importing necessary libraries


In [1]:
import torchvision
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from torchvision import transforms
import pickle

In [2]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
# Batch size
batch_size=32

In [4]:
# Transformer to tensor
transformer=transforms.Compose([
    transforms.Resize((256,256)),
    transforms.ToTensor(),
])

## Unzipping the augmented BRACOL datasets (training and validation)

In [5]:
!pip install patool
import patoolib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting patool
  Downloading patool-1.12-py2.py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.5/77.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: patool
Successfully installed patool-1.12


In [6]:
zip_path = '/content/gdrive/MyDrive/BRACOL_split_augmented.zip'

In [7]:
patoolib.extract_archive(zip_path, outdir="/content")

patool: Extracting /content/gdrive/MyDrive/BRACOL_split_augmented.zip ...
patool: running /usr/bin/7z x -o/content -- /content/gdrive/MyDrive/BRACOL_split_augmented.zip
patool: ... /content/gdrive/MyDrive/BRACOL_split_augmented.zip extracted to `/content'.


'/content'

In [8]:
def load_dataset(d_path):
    dataset_manual = torchvision.datasets.ImageFolder(d_path, transform=transformer)
    print("Follwing classes are there : \n",dataset_manual.classes)
    train_loader_manual = torch.utils.data.DataLoader(dataset_manual)
    return train_loader_manual

In [9]:
train_dataset = load_dataset('/content/Dataset/training_dataset')
val_dataset = load_dataset('/content/Dataset/validation_dataset')

Follwing classes are there : 
 ['Cercospora', 'Healthy', 'Miner', 'Phoma', 'Rust']
Follwing classes are there : 
 ['Cercospora', 'Healthy', 'Miner', 'Phoma', 'Rust']


In [10]:
train_dataset=train_dataset.dataset
val_dataset=val_dataset.dataset

In [11]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=0, shuffle=True)

In [12]:
print('Train Set- ' + str(len(train_dataset)) + ' images in ' + str(len(train_loader)) +' batches')
print('Validation Set - ' + str(len(val_dataset)) + ' images in ' + str(len(val_loader)) + ' batches' )

Train Set- 5020 images in 157 batches
Validation Set - 2500 images in 79 batches


In [13]:
for images, labels in train_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([32, 3, 256, 256])
Image label dimensions: torch.Size([32])


In [14]:
for images, labels in val_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([32, 3, 256, 256])
Image label dimensions: torch.Size([32])


In [15]:
for images, labels in train_loader:
    print (labels)  # printing to observe the labels present in one batch of the training dataloader object
    break

tensor([2, 0, 4, 1, 2, 0, 2, 3, 4, 0, 1, 4, 3, 3, 3, 2, 3, 3, 2, 4, 0, 4, 3, 1,
        3, 4, 3, 1, 3, 4, 2, 1])


In [16]:
for images, labels in val_loader:
    print (labels)  # printing to observe the labels present in one batch of the training dataloader object
    break

tensor([0, 2, 4, 4, 3, 2, 4, 2, 4, 1, 2, 0, 1, 0, 4, 0, 4, 4, 4, 0, 0, 3, 4, 4,
        3, 3, 3, 1, 0, 1, 1, 0])


### Saving the dataloaders as pickle files

In [16]:
with open('train_dloader.pkl','wb') as f:
    pickle.dump(train_loader, f)

In [16]:
with open('val_dloader.pkl','wb') as f:
    pickle.dump(val_loader, f)

## Unzipping the test dataset (unaugmented)

In [17]:
test_zip_path = '/content/gdrive/MyDrive/new_test.zip'

In [18]:
patoolib.extract_archive(test_zip_path, outdir="/content")

patool: Extracting /content/gdrive/MyDrive/new_test.zip ...
patool: running /usr/bin/7z x -o/content -- /content/gdrive/MyDrive/new_test.zip
patool: ... /content/gdrive/MyDrive/new_test.zip extracted to `/content'.


'/content'

In [19]:
test_dataset = load_dataset('/content/new_test')

Follwing classes are there : 
 ['Cercospora', 'Healthy', 'Miner', 'Phoma', 'Rust']


In [20]:
test_dataset=test_dataset.dataset

In [21]:
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=0, shuffle=True)

In [22]:
print('Testing Set - ' + str(len(test_dataset)) + ' images in ' + str(len(test_loader)) + ' batches' )

Testing Set - 350 images in 11 batches


In [23]:
for images, labels in test_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([32, 3, 256, 256])
Image label dimensions: torch.Size([32])


In [24]:
for images, labels in test_loader:
    print (labels)  # printing to observe the labels present in one batch of the training dataloader object
    break

tensor([4, 1, 2, 0, 0, 0, 2, 4, 4, 2, 3, 1, 0, 4, 2, 2, 0, 3, 2, 3, 0, 0, 3, 1,
        4, 4, 3, 3, 3, 1, 0, 2])


### Saving the test dataloader using pickle

In [24]:
with open('test_dloader.pkl','wb') as f:
    pickle.dump(test_loader, f)