In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torchvision
import tarfile
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split


In [3]:
# Dowload the dataset
dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
download_url(dataset_url, '.')

In [4]:
# Extract from archive
with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
    tar.extractall(path='./data')

In [5]:
data_dir = './data/cifar10'

print(os.listdir(data_dir))
classes = os.listdir(data_dir + "/train")
print(classes)

In [6]:
airplane_files = os.listdir(data_dir + "/train/airplane")
print('No. of training examples for airplanes:', len(airplane_files))
print(airplane_files[:5])

In [7]:
ship_test_files = os.listdir(data_dir + "/test/ship")
print("No. of test examples for ship:", len(ship_test_files))
print(ship_test_files[:5])

In [8]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor

In [9]:
dataset = ImageFolder(data_dir+'/train', transform=ToTensor())

In [10]:
img, label = dataset[40000]
print(img.shape, label)
img

In [11]:
print(dataset.classes)


In [12]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

matplotlib.rcParams['figure.facecolor'] = '#ffffff'

In [13]:
def show_example(img, label):
    print('Label: ', dataset.classes[label], "("+str(label)+")")
    plt.imshow(img.permute(1, 2, 0))

In [14]:
show_example(*dataset[0])


In [15]:
datasetTest = ImageFolder(data_dir+'/test', transform=ToTensor())

In [16]:
img, label = datasetTest[100]
print(img.shape, label)
img

In [17]:
#* actually spread the components for example - img, label = dataset[0]
show_example(*datasetTest[0])



In [18]:
show_example(*datasetTest[1000])

In [19]:
len(datasetTest)

In [20]:
import numpy as np

def split_indices(n, val_pct=0.1, seed=549):
    n_val = int(n * val_pct)
    np.random.seed(seed)
    idxs = np.random.permutation(n)
    return idxs[n_val:], idxs[:n_val]
    

In [21]:
val_pct = 0.2
rand_seed = 42

train_indices, val_indices = split_indices(len(dataset), val_pct, rand_seed)
print(len(train_indices), len(val_indices))

print('Sample Validation Indices: ', val_indices[:10])

In [22]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

In [23]:
batch_size = 100

train_sampler = SubsetRandomSampler(train_indices)
train_dl = DataLoader(dataset,
                      batch_size,
                      sampler=train_sampler)


val_sampler = SubsetRandomSampler(val_indices)
val_dl = DataLoader(dataset,
                      batch_size,
                      sampler=val_sampler)

In [24]:
from torchvision.utils import make_grid

def show_batch(dl):
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(10, 10))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(images, nrow=10).permute(1, 2, 0))
        break

In [25]:
show_batch(val_dl)

In [26]:
import torch.nn as nn
import torch.nn.functional as F

In [27]:
#to see the description of the nn.Conv2d fuction
?nn.Conv2d

In [28]:
simple_model = nn.Sequential(
    nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(2,2)
)

In [32]:
for images, labels in train_dl:
    print('images.shape: ', images.shape)
    # "out" is the feature map
    out = simple_model(images) 
    print('out.shape', out.shape)
    break

In [33]:
model = nn.Sequential(
    #PyTorch channels are shown before image dimmensions
    nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),         # it's written in this way:- batch_size x channels x image shape(row x col)
    nn.MaxPool2d(2,2), #output: batch_size x 16(ch) x 16 x 16 (half of input layer)
    
    nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2,2), #output: batch_size x 16(ch) x 8 x 8(half of previous layer)
    
    nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2,2), #output: batch_size x 16(ch) x 4 x 4 (half of input layer)
    
    nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2,2), #output: batch_size x 16(ch) x 2 x 2 (half of previous layer)
    
    nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2,2), #output: batch_size x 16(ch) x 1 x 1 (half of previous layer)
    
    nn.Flatten(), #output: batch_size x 16
    nn.Linear(16, 10) #output: batch_size x 10
)