### Scratch Optimizer

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
import torch
from torch import nn
from torch.nn import functional as F
from sklearn.model_selection import train_test_split

In [2]:
class MLP(nn.Module):
    def __init__(self, num_in, num_hidden, num_out):
        super().__init__()
        self.l1 = nn.Linear(num_in, num_hidden)
        self.l2 = nn.Linear(num_hidden, num_out)

    def forward(self, X):
        z1 = self.l1(X)
        a1 = F.relu(z1)
        x = self.l2(a1)

        return x
        

In [3]:
dataset = datasets.load_digits()
images = dataset['images']
target = dataset['target']

X_train, X_val, y_train,y_val = train_test_split(images, target, test_size=0.2,random_state=42)

# normalization
x_mean = X_train.mean()
x_std = X_train.std()
X_train = (X_train - x_mean)/x_std
X_val = (X_val - x_mean)/x_std

X_train = torch.tensor(X_train.reshape(-1,64), dtype=torch.float32)
X_val = torch.tensor(X_val.reshape(-1,64), dtype=torch.float32)
y_train = torch.tensor(y_train)
y_val = torch.tensor(y_val)

epochs = 30
num_feature = 64
num_hidden = 30
num_class = 10

learning_rate = 0.03

batch_size = 30
num_batches = np.ceil(len(y_train)/batch_size).astype(int)
model = MLP(num_feature, num_hidden, num_class)

train_losses = []
val_losses = []
val_accuracies = []

for epoch in range(epochs):
    
    shuffled_indices = np.random.permutation(len(y_train))
    running_loss = 0
    
    for i in range(num_batches):
        
        start = i * batch_size
        end = start + batch_size
        batch_indices = shuffled_indices[start:end]

        # place break point
        # import pdb; pdb.set_trace()
        
        y = y_train[batch_indices] 
        X = X_train[batch_indices] 

        preds = model(X)
        loss = F.cross_entropy(preds, y)
        loss.backward()
        running_loss += loss.item()

        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate *param.grad

        model.zero_grad()


    # validation
    with torch.no_grad():
        preds_val = model(X_val)
        val_loss = F.cross_entropy(preds_val, y_val)
        val_accuracy = torch.sum(torch.argmax(preds_val, dim=1)==y_val) / y_val.shape[0]


    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracies)

    print(f'epoch: {epoch}: train error: {running_loss/num_batches}, validation error: {val_loss.item()}, validation accuracy: {val_accuracy}')



epoch: 0: train error: 2.0988816569248834, validation error: 1.8303459882736206, validation accuracy: 0.7166666388511658
epoch: 1: train error: 1.5495978842178981, validation error: 1.2628381252288818, validation accuracy: 0.8083333373069763
epoch: 2: train error: 1.0355976211527984, validation error: 0.8267065286636353, validation accuracy: 0.8805555701255798
epoch: 3: train error: 0.7067110873758793, validation error: 0.5882936716079712, validation accuracy: 0.8861111402511597
epoch: 4: train error: 0.5270976231743892, validation error: 0.45997074246406555, validation accuracy: 0.9083333611488342
epoch: 5: train error: 0.4212843493248026, validation error: 0.3806031048297882, validation accuracy: 0.9166666865348816
epoch: 6: train error: 0.35396986672033864, validation error: 0.3254891633987427, validation accuracy: 0.9222221970558167
epoch: 7: train error: 0.30587718232224387, validation error: 0.289912611246109, validation accuracy: 0.9222221970558167
epoch: 8: train error: 0.27074

### Optimizer class

```python
with torch.no_grad():
    for param in model.parameters():
        param -= learning_rate *param.grad

model.zero_grad()
```

In [4]:
class Optimizer():
    def __init__(self, parameters, lr=0.03):
        self.parameters = list(parameters)
        self.lr = lr

    def step(self):
        with torch.no_grad():
            for param in self.parameters:
                param -= self.lr*param.grad

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:
                param.grad = None
        

In [5]:
dataset = datasets.load_digits()
images = dataset['images']
target = dataset['target']

X_train, X_val, y_train,y_val = train_test_split(images, target, test_size=0.2,random_state=42)

# normalization
x_mean = X_train.mean()
x_std = X_train.std()
X_train = (X_train - x_mean)/x_std
X_val = (X_val - x_mean)/x_std

X_train = torch.tensor(X_train.reshape(-1,64), dtype=torch.float32)
X_val = torch.tensor(X_val.reshape(-1,64), dtype=torch.float32)
y_train = torch.tensor(y_train)
y_val = torch.tensor(y_val)

epochs = 30
num_feature = 64
num_hidden = 30
num_class = 10

learning_rate = 0.03

batch_size = 30
num_batches = np.ceil(len(y_train)/batch_size).astype(int)

model = MLP(num_feature, num_hidden, num_class)
optimizer = Optimizer(model.parameters(), learning_rate)

train_losses = []
val_losses = []
val_accuracies = []

for epoch in range(epochs):
    
    shuffled_indices = np.random.permutation(len(y_train))
    running_loss = 0
    
    for i in range(num_batches):
        
        start = i * batch_size
        end = start + batch_size
        batch_indices = shuffled_indices[start:end]

        # place break point
        # import pdb; pdb.set_trace()
        
        y = y_train[batch_indices] 
        X = X_train[batch_indices] 

        preds = model(X)
        loss = F.cross_entropy(preds, y)
        loss.backward()
        running_loss += loss.item()

        # with torch.no_grad():
        #     for param in model.parameters():
        #         param -= learning_rate *param.grad

        # model.zero_grad()

        optimizer.step()
        optimizer.zero_grad()


    # validation
    with torch.no_grad():
        preds_val = model(X_val)
        val_loss = F.cross_entropy(preds_val, y_val)
        val_accuracy = torch.sum(torch.argmax(preds_val, dim=1)==y_val) / y_val.shape[0]


    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracies)

    print(f'epoch: {epoch}: train error: {running_loss/num_batches}, validation error: {val_loss.item()}, validation accuracy: {val_accuracy}')

epoch: 0: train error: 2.150591194629669, validation error: 1.948250651359558, validation accuracy: 0.43888887763023376
epoch: 1: train error: 1.6810334051648776, validation error: 1.402390718460083, validation accuracy: 0.7111111283302307
epoch: 2: train error: 1.1431196344395478, validation error: 0.8884793519973755, validation accuracy: 0.875
epoch: 3: train error: 0.7510314037402471, validation error: 0.6075643301010132, validation accuracy: 0.8916666507720947
epoch: 4: train error: 0.5428537577390671, validation error: 0.46119800209999084, validation accuracy: 0.8972222208976746
epoch: 5: train error: 0.4269363411391775, validation error: 0.37742623686790466, validation accuracy: 0.9138888716697693
epoch: 6: train error: 0.35368542714665335, validation error: 0.3194444477558136, validation accuracy: 0.9194444417953491
epoch: 7: train error: 0.3048794260248542, validation error: 0.28259915113449097, validation accuracy: 0.925000011920929
epoch: 8: train error: 0.26772382110357285, 

### torch.optim

In [6]:
from torch import optim

In [7]:
dataset = datasets.load_digits()
images = dataset['images']
target = dataset['target']

X_train, X_val, y_train,y_val = train_test_split(images, target, test_size=0.2,random_state=42)

# normalization
x_mean = X_train.mean()
x_std = X_train.std()
X_train = (X_train - x_mean)/x_std
X_val = (X_val - x_mean)/x_std

X_train = torch.tensor(X_train.reshape(-1,64), dtype=torch.float32)
X_val = torch.tensor(X_val.reshape(-1,64), dtype=torch.float32)
y_train = torch.tensor(y_train)
y_val = torch.tensor(y_val)

epochs = 30
num_feature = 64
num_hidden = 30
num_class = 10

learning_rate = 0.03

batch_size = 30
num_batches = np.ceil(len(y_train)/batch_size).astype(int)

model = MLP(num_feature, num_hidden, num_class)
opt = optim.SGD(params=model.parameters(), lr=learning_rate) # SGD : stochastic gradient descent algorithm

train_losses = []
val_losses = []
val_accuracies = []

for epoch in range(epochs):
    
    shuffled_indices = np.random.permutation(len(y_train))
    running_loss = 0
    
    for i in range(num_batches):
        
        start = i * batch_size
        end = start + batch_size
        batch_indices = shuffled_indices[start:end]

        # place break point
        # import pdb; pdb.set_trace()
        
        y = y_train[batch_indices] 
        X = X_train[batch_indices] 

        preds = model(X)
        loss = F.cross_entropy(preds, y)
        loss.backward()
        running_loss += loss.item()

        opt.step()
        opt.zero_grad()


        # validation
        with torch.no_grad():
            preds_val = model(X_val)
            val_loss = F.cross_entropy(preds_val, y_val)
            val_accuracy = torch.sum(torch.argmax(preds_val, dim=1)==y_val) / y_val.shape[0]


    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracies)

    print(f'epoch: {epoch}: train error: {running_loss/num_batches}, validation error: {val_loss.item()}, validation accuracy: {val_accuracy}')

epoch: 0: train error: 2.1434737046559653, validation error: 1.9405169486999512, validation accuracy: 0.5138888955116272
epoch: 1: train error: 1.7096788783868153, validation error: 1.432951807975769, validation accuracy: 0.7222222089767456
epoch: 2: train error: 1.2081543480356534, validation error: 0.9521835446357727, validation accuracy: 0.8638888597488403
epoch: 3: train error: 0.8216578053931395, validation error: 0.6471744179725647, validation accuracy: 0.8888888955116272
epoch: 4: train error: 0.5921219655623039, validation error: 0.48532405495643616, validation accuracy: 0.9027777910232544
epoch: 5: train error: 0.45843809470534325, validation error: 0.38829079270362854, validation accuracy: 0.9083333611488342
epoch: 6: train error: 0.3731627017259598, validation error: 0.32477113604545593, validation accuracy: 0.9138888716697693
epoch: 7: train error: 0.317837620464464, validation error: 0.281171053647995, validation accuracy: 0.9277777671813965
epoch: 8: train error: 0.275716