# APS360 Font Project 
## By: Giro and Sagnik

In [0]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
import os
import random

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
class LargeNet(nn.Module):
    def __init__(self):
        super(LargeNet, self).__init__()
        self.name = "large"
        self.conv1 = nn.Conv2d(3, 5, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(5, 10, 5)
        self.fc1 = nn.Linear(10 * 53 * 53, 6200)
        self.fc2 = nn.Linear(6200, 128)
        self.fc3 = nn.Linear(128, 9)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 10 * 53 * 53)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = x.squeeze(1) # Flatten to [batch_size]
        return x

In [0]:
###############################################################################
# Training
def get_model_name(name, batch_size, learning_rate, epoch):
    """ Generate a name for the model consisting of all the hyperparameter values

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "model_{0}_bs{1}_lr{2}_epoch{3}".format(name,
                                                   batch_size,
                                                   learning_rate,
                                                   epoch)
    return path

def normalize_label(labels):
    """
    Given a tensor containing 2 possible values, normalize this to 0/1

    Args:
        labels: a 1D tensor containing two possible scalar values
    Returns:
        A tensor normalize to 0/1 value
    """
    max_val = torch.max(labels)
    min_val = torch.min(labels)
    norm_labels = (labels - min_val)/(max_val - min_val)
    return norm_labels

def evaluate(net, loader, criterion):
    """ Evaluate the network on the validation set.

     Args:
         net: PyTorch neural network object
         loader: PyTorch data loader for the validation set
         criterion: The loss function
     Returns:
         err: A scalar for the avg classification error over the validation set
         loss: A scalar for the average loss function over the validation set
     """
    total_loss = 0.0
    total_err = 0.0
    total = 0
    total_epoch = 0
    for i, data in enumerate(loader, 0):
        inputs, labels = data
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        total_err += (predicted != labels).sum().item()
        total_loss += loss.item()
        total_epoch += len(labels)
    err = float(total_err) / total_epoch
    loss = float(total_loss) / (i + 1)
    return err, loss

###############################################################################
# Training Curve
def plot_training_curve(path):
    """ Plots the training curve for a model run, given the csv files
    containing the train/validation error/loss.

    Args:
        path: The base path of the csv files produced during training
    """
    import matplotlib.pyplot as plt
    train_err = np.loadtxt("{}_train_err.csv".format(path))
    val_err = np.loadtxt("{}_val_err.csv".format(path))
    train_loss = np.loadtxt("{}_train_loss.csv".format(path))
    val_loss = np.loadtxt("{}_val_loss.csv".format(path))
    plt.title("Train vs Validation Error")
    n = len(train_err) # number of epochs
    plt.plot(range(1,n+1), train_err, label="Train")
    plt.plot(range(1,n+1), val_err, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Error")
    plt.legend(loc='best')
    plt.show()
    plt.title("Train vs Validation Loss")
    plt.plot(range(1,n+1), train_loss, label="Train")
    plt.plot(range(1,n+1), val_loss, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend(loc='best')
    plt.show()


def get_data_loader(batch_size):
    """ Returns the the shuffled and loaded dataset for training_set, validation_set and 
    evaluation_set.

    Args:
        batch_size
    Returns:
        train_loader
        val_loader
        test_loader
    """
    ########################################################################
    # The output of torchvision datasets are PILImage images of range [0, 1].
    # We transform them to Tensors of normalized range [-1, 1].
    transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    dataset = torchvision.datasets.ImageFolder(root = '/content/drive/My Drive/fontData/fontDataSmallSet', transform = transform)
    random.shuffle(dataset.imgs)
    
    train_i = int(0.6*len(dataset))
    val_i = int(0.8*len(dataset))
    
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
    val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
    
    train_loader.dataset.imgs = train_loader.dataset.imgs[:train_i]
    val_loader.dataset.imgs = val_loader.dataset.imgs[train_i:val_i]
    test_loader.dataset.imgs = test_loader.dataset.imgs[val_i:]
    
    return train_loader.dataset.imgs ,val_loader.dataset.imgs ,test_loader.dataset.imgs 

#train,val,test = get_data_loader(32)
train,val,test = get_data_loader(16)


FileNotFoundError: ignored

In [0]:
train

In [0]:
def train(model, data, batch_size=4, num_epochs=100, learning_rate=0.01, small=False):
    # Fixed PyTorch random seed for reproducible result
    torch.manual_seed(1000) 
    # load batches of the datasets
    train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size)
    # Define the Loss function and optimizer
    # The loss function will be Cross Entropy
    # Optimizer will adam
    criterion = nn.CrossEntropyLoss()     
    optimizer=optim.Adam(model.parameters(), lr=learning_rate)

    ########################################################################
    # Set up some numpy arrays to store the training/test loss/erruracy
    train_err = np.zeros(num_epochs)
    train_loss = np.zeros(num_epochs)
    val_err = np.zeros(num_epochs)
    val_loss = np.zeros(num_epochs)
    ########################################################################
    # Train the network
    # Loop over the data iterator and sample a new batch of training data
    # Get the output from the network, and optimize our loss function.
    start_time = time.time()
    
    iters, losses, train_acc, val_acc = [], [], [], []
    
    # training
    n = 0 # the number of iterations
    for epoch in range(num_epochs): # loop over the dataset multiple times
        for imgs, labels in iter(train_loader):
            #print("labels: ", labels)
            out = model(imgs)             # forward pass
            #print("outs: ", out)
            loss = criterion(out, labels) # compute the total loss
           
            loss.backward()               # backward pass (compute parameter updates)
            optimizer.step()              # make the updates for each parameter
            optimizer.zero_grad()         # a clean up step for PyTorch

            # save the current training information
            iters.append(n)
       
            losses.append(float(loss)/batch_size)             # compute *average* loss
            #
            train_acc.append(get_accuracy(model, True, batch_size,small)) # compute training accuracy 
            val_acc.append(get_accuracy(model, False, batch_size,small))  # compute validation accuracy
            n += 1
        print("-")
        
        #creating setpoints every epouch
        model_path = get_model_name(model.name, batch_size, learning_rate, epoch)
        torch.save(model.state_dict(), model_path)