### Lesson 06 - Transfer Learning

#### 1. Download Data, Unzip and Create Dataloader

In [6]:
# import libraries
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import urllib.request as req

In [7]:
# download data
data_url = "https://download.pytorch.org/tutorial/hymenoptera_data.zip"

In [8]:
# create a directory
def create_dirs(dir_path):
    os.makedirs(dir_path, exist_ok=True)
    print(f"{dir_path} directory created")

ROOT_DATA_DIR = "hymenoptera_data"

create_dirs(ROOT_DATA_DIR)

hymenoptera_data directory created


In [10]:
# download data
data_zip_file = "data.zip"
data_zip_path = os.path.join(ROOT_DATA_DIR, data_zip_file)

if not os.path.isfile(data_zip_file):
    print("downloading data")
    filename, headers = req.urlretrieve(data_url, data_zip_path)
    print(f"filename: {filename} created with info \n {headers}")

else:
    print("file is already present")

downloading data
filename: hymenoptera_data\data.zip created with info 
 Content-Type: application/zip
Content-Length: 47286322
Connection: close
Last-Modified: Wed, 15 Mar 2017 18:46:00 GMT
x-amz-version-id: null
Accept-Ranges: bytes
Server: AmazonS3
Date: Wed, 24 Jan 2024 11:34:37 GMT
ETag: "5f8c32a6554f6acb4d649776e7735e48"
X-Cache: Hit from cloudfront
Via: 1.1 f193db4ca15282854bb68270a34c2db2.cloudfront.net (CloudFront)
X-Amz-Cf-Pop: BOM78-P4
X-Amz-Cf-Id: kFjVP4wTVkx79fpZbVvBrXipWqwozASwRrj--k0ETL-nDLpFmxVVTw==
Age: 52




In [12]:
# Unzip data
from zipfile import ZipFile

unzip_data_dirname = "unzip_data_dir"
unzip_data_dir = os.path.join(ROOT_DATA_DIR, unzip_data_dirname)

if not os.path.isfile(unzip_data_dir):
    os.makedirs(unzip_data_dir, exist_ok=True)
    with ZipFile(data_zip_path) as f:
        f.extractall(unzip_data_dir)

else:
    print(f"data already extracted")

In [14]:
# create dataloaders
from pathlib import Path

In [15]:
train_path = Path("hymenoptera_data/unzip_data_dir/hymenoptera_data/train")
test_path = Path("hymenoptera_data/unzip_data_dir/hymenoptera_data/val")

In [16]:
img_size = (224, 224)

In [17]:
mean = torch.tensor([0.5, 0.5, 0.5])
std = torch.tensor([0.5, 0.5, 0.5])

In [18]:
# Transformations
train_transforms = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

test_transforms = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [19]:
train_data = datasets.ImageFolder(root=train_path, transform=train_transforms)
test_data = datasets.ImageFolder(root=train_path, transform=test_transforms)

In [21]:
train_data.class_to_idx

{'ants': 0, 'bees': 1}

In [22]:
label_map = train_data.class_to_idx
label_map

{'ants': 0, 'bees': 1}

In [23]:
train_data

Dataset ImageFolder
    Number of datapoints: 244
    Root location: hymenoptera_data\unzip_data_dir\hymenoptera_data\train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
               Normalize(mean=tensor([0.5000, 0.5000, 0.5000]), std=tensor([0.5000, 0.5000, 0.5000]))
           )

In [24]:
batch_size = 64

train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

In [25]:
data = next(iter(train_loader))

In [26]:
len(data)

2

In [27]:
images, labels = data

In [29]:
images.shape

torch.Size([64, 3, 224, 224])

In [30]:
labels.shape

torch.Size([64])