In [1]:
import os
import torch
import torch.nn as nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import urllib.request as req

## Dowanload data

In [2]:
data_url = "https://download.pytorch.org/tutorial/hymenoptera_data.zip"

In [5]:
# create a directory

def create_dirs(dir_path):
  os.makedirs(dir_path, exist_ok=True)
  print(f"{dir_path} directory created")

ROOT_DATA_DIR = 'hymenoptera_data'
create_dirs(ROOT_DATA_DIR)

hymenoptera_data directory created


In [26]:
data_zip_file = 'data.zip'
data_zip_path = os.path.join(ROOT_DATA_DIR, data_zip_file)

if not os.path.isfile(data_zip_path):
  print('downlaoding file...')
  filename, headers = req.urlretrieve(data_url, data_zip_path)
  print(f'filename: {filename} created with info \n{headers}')
else:
  print('file is already present')

file is already present


# Unzip data

In [29]:

from zipfile import ZipFile

unzip_data_dirname = "unzip_data_dir"
unzip_data_dir = os.path.join(ROOT_DATA_DIR, unzip_data_dirname)

if not os.path.exists(unzip_data_dir):
    os.makedirs(unzip_data_dir, exist_ok=True)
    with ZipFile(data_zip_path) as f:
        f.extractall(unzip_data_dir)
else:
    print(f"data already extacted")

# Create data loaders

In [30]:
from pathlib import Path

In [31]:
train_path = Path("hymenoptera_data/unzip_data_dir/hymenoptera_data/train")
test_path = Path("hymenoptera_data/unzip_data_dir/hymenoptera_data/val")

In [32]:

img_size = (224, 224)

In [33]:
mean = torch.tensor([0.5, 0.5, 0.5])
std = torch.tensor([0.5, 0.5, 0.5])

# Transformation

In [34]:
train_transforms = transforms.Compose([
    transforms.Resize(img_size),
    transforms.RandomRotation(degrees=20),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

test_transforms = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [35]:

train_data = datasets.ImageFolder(root=train_path, transform=train_transforms)
test_data = datasets.ImageFolder(root=test_path, transform=test_transforms)

In [36]:
train_data.class_to_idx

{'ants': 0, 'bees': 1}

In [37]:
label_map = train_data.class_to_idx
label_map

{'ants': 0, 'bees': 1}

In [38]:
train_data

Dataset ImageFolder
    Number of datapoints: 244
    Root location: hymenoptera_data/unzip_data_dir/hymenoptera_data/train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
               RandomRotation(degrees=[-20.0, 20.0], interpolation=nearest, expand=False, fill=0)
               ToTensor()
               Normalize(mean=tensor([0.5000, 0.5000, 0.5000]), std=tensor([0.5000, 0.5000, 0.5000]))
           )

In [39]:
batch_size = 64
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

In [41]:
data = next(iter(train_loader))

In [42]:
len(data)

2

In [43]:
images, labels = data

In [45]:
images.shape

torch.Size([64, 3, 224, 224])