In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm # Displays a progress bar

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset, Subset, DataLoader, random_split

In [3]:
# Load the dataset and train, val, test splits
print("Loading datasets...")
dataset_path = "C:/Users/Admin/Desktop/cse803_hw5"

FASHION_transform = transforms.Compose([
    transforms.ToTensor(), # Transform from [0,255] uint8 to [0,1] float
    transforms.Normalize([0.2859], [0.3530]) # Normalize to zero mean and unit variance
])
FASHION_trainval = datasets.FashionMNIST(
    dataset_path,
    download=True,
    train=True,
    transform=FASHION_transform
)
FASHION_train = Subset(FASHION_trainval, range(50000))
FASHION_val = Subset(FASHION_trainval, range(50000,60000))
FASHION_test = datasets.FashionMNIST(
    dataset_path,
    download=True,
    train=False,
    transform=FASHION_transform
)
print("Done!")

# Create dataloaders
# TODO: Experiment with different batch sizes
trainloader = DataLoader(FASHION_train, batch_size=64, shuffle=True)
valloader = DataLoader(FASHION_val, batch_size=64, shuffle=True)
testloader = DataLoader(FASHION_test, batch_size=64, shuffle=True)

Loading datasets...
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|████████████████████████████████████████| 26421880/26421880 [15:58<00:00, 27564.32it/s]


Extracting C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\train-images-idx3-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████████████████████████████████████████| 29515/29515 [00:00<00:00, 73563.12it/s]


Extracting C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\train-labels-idx1-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|█████████████████████████████████████████| 4422102/4422102 [00:36<00:00, 122735.87it/s]


Extracting C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|███████████████████████████████████████████████████████████| 5148/5148 [00:00<?, ?it/s]

Extracting C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to C:/Users/Admin/Desktop/cse803_hw5\FashionMNIST\raw

Done!





In [28]:
"""
Network class.
"""
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: Design your own network, define layers here.
        p_keep = 1.0
        k_size = 3
        
        # convolutional layers
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=k_size, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=1.0 - p_keep),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=k_size, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=1.0 - p_keep),
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(p=1.0 - p_keep),
        )
        
        # linear layers
        self.fc1 = nn.Linear(4 * 4 * 128, 625, bias=True)
        self.fc2 = nn.Linear(625, 10, bias=True)

    def forward(self,x):
        # TODO: Design your own network, implement forward pass here
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        
        relu = nn.ReLU()
        x = self.fc1(x)
        x = self.fc2(relu(x))
        return x

In [29]:
"""
Hyperparameters.
"""
# configure device
device = "cuda" if torch.cuda.is_available() else "cpu"

# init model
model = Network().to(device)

# specify the loss layer
criterion = nn.CrossEntropyLoss()

# TODO: Modify the line below, experiment with different 
# optimizers and parameters (such as learning rate)
# Specify optimizer and assign trainable parameters to it, 
# weight_decay is L2 regularization strength
optimizer = optim.Adam(
    model.parameters(),
    lr=0.001,
    weight_decay=1e-4
)

# TODO: choose an appropriate number of training epochs
num_epoch = 15

In [30]:
"""
Train & evaluation functions.
"""
def train(model, train_loader, val_loader, num_epoch = 10): # Train the model
    print("Start training...")
    train_losses = []
    val_losses = []
    
    for i in range(num_epoch):
        # Set the model to training mode
        model.train()
        running_loss = []
        for batch, label in tqdm(train_loader):
            # format data
            batch = batch.to(device)
            label = label.to(device)
            
            # Clear gradients from the previous iteration
            optimizer.zero_grad()
            
            # This will call Network.forward() that you implement
            pred = model(batch)
            
            # Calculate the training loss
            loss = criterion(pred, label)
            running_loss.append(loss.item())
            
            # Backprop gradients to all tensors in the network
            loss.backward()
            
            # Update trainable weights
            optimizer.step()
        
        # training loss
        train_loss = np.mean(running_loss)
        train_losses.append(train_loss)
        
        # validation loss
        _, val_loss = evaluate(model, val_loader)
        val_losses.append(val_loss)
        
        # report epoch results
        print(f"Epoch {i+1}: train_loss={train_loss}, val_loss={val_loss}") # Print the average losses for this epoch
    
    # finished
    print("Done!")
    return train_losses, val_losses

def evaluate(model, val_loader): # Evaluate accuracy on validation / test set
    model.eval() # Set the model to evaluation mode
    running_loss = []
    correct = 0
    with torch.no_grad(): # Do not calculate grident to speed up computation
        for batch, label in tqdm(val_loader):
            # format data
            batch = batch.to(device)
            label = label.to(device)
            
            # make predictions
            pred = model(batch)
            
            # Calculate the validation loss
            loss = criterion(pred, label)
            running_loss.append(loss.item())
            
            # calculate batch accuracy
            correct += (torch.argmax(pred,dim=1)==label).sum().item()
    
    # averaged accuracy
    acc = correct / len(val_loader.dataset)
    
    # validation loss
    val_loss = np.mean(running_loss)
    
    # finished
    print("Evaluation accuracy: {}".format(acc))
    return acc, val_loss

In [None]:
"""
Train and evaluate model.
"""
# train
train_losses, val_losses = train(model, trainloader, valloader, num_epoch)

print("Evaluate on test set")
test_acc, test_loss = evaluate(model, testloader)

Start training...


  4%|██▎                                                   | 34/782 [00:02<00:56, 13.17it/s]

In [27]:
"""
Analyze training & evaluation results.
"""
print(train_losses, val_losses)

[0.439466334107663] [0.3011674018707245]
