<a href="https://colab.research.google.com/github/RexSword/1112-New-Learning-Algorithm/blob/main/hw3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HW3
## weight-tuning_LG

Model 6 \
hidden nodes: 11 \
epochs: 300 \
init: xavier \
active: relu \
optimize: sgd \
schedule: None \
weight decay: 0.0

## Define the functions

In [87]:
import torch
from torch import nn, optim, Generator, backends
from torch.utils.data import DataLoader, Dataset, random_split
from typing import Iterable, Callable, Type, Optional, Union, Tuple, List

In [88]:
from operator import mul

def product(nums: Iterable[Type], func: Callable[[Type, Type], Type] = mul):
    def _product(nums):
        nonlocal func
        if len(nums) == 1:
            return nums[0]
        return func(nums[-1], _product(nums[:-1]))
    try:
        return _product(nums)
    except Exception as e:
        raise e

### Model

In [89]:
class TwoLayerNetwork(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_classes: int, init_method: Callable, active_func: nn.modules.module.Module) -> None:
        super(TwoLayerNetwork, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        # full connected first layer
        self.fc1 = nn.Linear(input_size, hidden_size)
        # activation
        self.active_func = active_func()
        # initialize
        for param in self.parameters():
            init_method(param)
        # full connected second layer
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.active_func(out)
        out = self.fc2(out)
        return out


### Training_LG

In [90]:
from array import array

def train(model: TwoLayerNetwork, opt: nn.Module, device: str, epochs: Union[int, float], learning_rate: float, trainloader: DataLoader, valloader: DataLoader, criterion: nn.modules.loss._Loss, sched: optim.lr_scheduler._LRScheduler, weight_decay: float, learning_goal: float):
    """
    Args:
        learning goal: the desire ratio of [validation loss / initiate validation loss] to early stop
    """
    if epochs < 1:
        raise ValueError("Invalid epoch!!")

    model.to(device)
    optimizer = opt(model.parameters(), lr=learning_rate,
                    weight_decay=weight_decay)
    scheduler = sched(optimizer) if sched else None
    history = []

    epoch = 0
    # Train the model
    while epoch < epochs:
        train_loss = 0.0
        train_correct = 0
        model.train()
        for X, y in trainloader:
            X = X.view(-1, model.input_size).to(device)
            y = y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * X.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_correct += (predicted == y).sum().item()
        train_loss /= len(trainloader.dataset)
        train_accuracy = train_correct / len(trainloader.dataset)

        # Validate the model
        val_loss = 0.0
        val_correct = 0
        model.eval()
        with torch.no_grad():
            for X, y in valloader:
                X = X.view(-1, model.input_size).to(device)
                y = y.to(device)
                outputs = model(X)
                loss = criterion(outputs, y)
                val_loss += loss.item() * X.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_correct += (predicted == y).sum().item()
            val_loss /= len(valloader.dataset)
            val_accuracy = val_correct / len(valloader.dataset)
        if scheduler:
            scheduler.step()
        
        # Log Statics
        history.append((train_loss, train_accuracy, val_loss, val_accuracy))
        # sys.stdout.write('Epoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.2f}%, Val Loss: {:.4f}, Val Accuracy: {:.2f}%\n'
        #       .format(epoch+1, epochs, train_loss, train_accuracy, val_loss, val_accuracy))

        if learning_goal < val_accuracy:
            return history
       
        epoch += 1
        print(epoch)

    return history

### Training_LG_UA

In [91]:
from array import array

def train_UA(model: TwoLayerNetwork, opt: nn.Module, device: str, epochs: Union[int, float], learning_rate: float, lr_border: float, trainloader: DataLoader, valloader: DataLoader, criterion: nn.modules.loss._Loss, sched: optim.lr_scheduler._LRScheduler, weight_decay: float, learning_goal: float):
    """
    Args:
        learning goal: the desire ratio of [validation loss / initiate validation loss] to early stop
    """
    if epochs < 1:
        raise ValueError("Invalid epoch!!")

    model.to(device)
    optimizer = opt(model.parameters(), lr=learning_rate,
                    weight_decay=weight_decay)
    scheduler = sched(optimizer) if sched else None
    history = []

    epoch = 0
    # Train the model
    while epoch < epochs:
        train_loss = 0.0
        train_correct = 0
        model.train()

        cond = True
        best_loss = float("inf")

        for X, y in trainloader:
            X = X.view(-1, model.input_size).to(device)
            y = y.to(device)

            # clear grad buffer for new batch
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()

            while cond:
              optimizer.step()
              if loss.item() < best_loss:
                learning_rate *= 1.2
                best_loss = loss.item()
                break
              else:
                if learning_rate > lr_border:
                  learning_rate *= 0.7
                else:
                  print("lr is {} lower that {}. Stuck in UA. Model needs to be tune.".format(learning_rate, lr_border))
                  return history

              # Update optimizer learning rate
              for param_group in optimizer.param_groups:
                      param_group['lr'] = learning_rate

              outputs = model(X)
              loss = criterion(outputs, y)         
                  
            # add batch loss to epoch total
            train_loss += loss.item() * X.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_correct += (predicted == y).sum().item()
        
        # count epoch avg loss
        train_loss /= len(trainloader.dataset)
        train_accuracy = train_correct / len(trainloader.dataset)

        # Validate the model
        val_loss = 0.0
        val_correct = 0
        model.eval()
        with torch.no_grad():
            for X, y in valloader:
                X = X.view(-1, model.input_size).to(device)
                y = y.to(device)
                outputs = model(X)
                loss = criterion(outputs, y)
                val_loss += loss.item() * X.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_correct += (predicted == y).sum().item()
            val_loss /= len(valloader.dataset)
            val_accuracy = val_correct / len(valloader.dataset)
        if scheduler:
            scheduler.step()

        # Log Statics
        history.append((train_loss, train_accuracy, val_loss, val_accuracy))
        # sys.stdout.write('Epoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.2f}%, Val Loss: {:.4f}, Val Accuracy: {:.2f}%\n'
        #       .format(epoch+1, epochs, train_loss, train_accuracy, val_loss, val_accuracy))

        if learning_goal < val_accuracy:
            return history
        # sys.stdout.write('Epoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.2f}%, Val Loss: {:.4f}, Val Accuracy: {:.2f}%\n'
        #       .format(epoch+1, epochs, train_loss, train_accuracy, val_loss, val_accuracy))
        
        epoch += 1
        print(epoch)

    return history

### Testing

In [92]:
def test(model: nn.Module, device: str, testloader: DataLoader):
    val_correct = 0
    model.to(device)
    model.eval()
    with torch.no_grad():
        for X, y in testloader:
            X = X.view(-1, model.input_size).to(device)
            y = y.to(device)
            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
            val_correct += (predicted == y).sum().item()
        val_accuracy = val_correct / len(testloader.dataset)
    return val_accuracy

## Dataset

In [93]:
# load pytorch dataset
from torchvision import datasets, transforms

def getPytorchData(train: float = 0.8, remain: float = 0.1):
    """
    Args:
        train: train_amount / total_amount or 1 - valid_amount / total_amount
        remain: reduce data amount to save time
    """
    # preprocess: flatten, normalize, drop 90%, split
    transform = transforms.transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    if 0 >= train or train >= 1:
        raise ValueError()
    if 0 > remain or remain > 1:
        raise ValueError()
    # Split the training set into training and validation sets
    trainset = datasets.FashionMNIST(
        root="./data/", train=True, download=True, transform=transform)
    train_count = int(train * remain * len(trainset))
    valid_count = int((1 - train) * remain * len(trainset))
    if train_count * valid_count == 0:
        raise ValueError()
    datum_size = product(trainset[0][0].size())
    class_amount = len(trainset.classes)
    testset = datasets.FashionMNIST(
        root="./data/", train=False, download=True, transform=transform)
    print(train_count, valid_count, len(testset))
    trainset, valset, _ = random_split(
        trainset, (train_count, valid_count, len(trainset) - train_count - valid_count), Generator().manual_seed(42))
    # Create dataloaders to load the data in batches
    trainloader = DataLoader(trainset, batch_size=32, shuffle=True)
    valloader = DataLoader(valset, batch_size=32, shuffle=True)
    testloader = DataLoader(testset, batch_size=32, shuffle=True)
    return trainloader, valloader, testloader, datum_size, class_amount

## Training process

In [94]:
device = "cuda" if torch.cuda.is_available(
) else "mps" if torch.backends.mps.is_available() else "cpu"

# model spec
trainloader, valloader, testloader, input_size, output_size = getPytorchData()
hidden_size = 11
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()

# set the hyperparameter
init = lambda x: nn.init.xavier_uniform_(tensor=x) if len(x.shape) > 1 else None
active = nn.ReLU
optimize = optim.SGD
schedule = None
weight_decay = 0.0


4800 1199 10000


### weigth-tuning_LG

In [96]:
epochs = float("inf")
learning_goal = 0.8

model = TwoLayerNetwork(input_size, hidden_size,
                        output_size, init, active)
LG_baseline = test(model, device, testloader)
LG_history = train(model, optimize, device, epochs, learning_rate,
                   trainloader, valloader, criterion, schedule, weight_decay, learning_goal)
LG_result = test(model, device, testloader)
print(LG_baseline, LG_history, LG_result, sep="\n")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47


KeyboardInterrupt: ignored

### weight-tuning_LG_UA

In [None]:
epochs = float("inf")
learning_goal = 0.8
lr_border = 1e-5


model = TwoLayerNetwork(input_size, hidden_size,
                        output_size, init, active)
LG_UA_baseline = test(model, device, testloader)
LG_UA_history = train_UA(model, optimize, device, epochs, learning_rate, lr_border,
                   trainloader, valloader, criterion, schedule, weight_decay, learning_goal)
LG_UA_result = test(model, device, testloader)
print(LG_UA_baseline, LG_UA_history, LG_UA_result, sep="\n")