## Optimizing Model Parameters

### Now that we have a model and data it’s time to train, validate and test our model by optimizing its parameters on our data. Training a model is an iterative process; in each iteration the model makes a guess about the output, calculates the error in its guess (loss), collects the derivatives of the error with respect to its parameters (as we saw in the previous section), and optimizes these parameters using gradient descent. 

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
train_data=datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data=datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [3]:
train_dataloader=DataLoader(train_data,batch_size=32)
test_dataloader=DataLoader(test_data,batch_size=32)

In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten=nn.Flatten()
        self.linear_relu_stack=nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,64),
            nn.ReLU(),
            nn.Linear(64,10)
        )
    
    def forward(self,x):
        x=self.flatten(x)
        logits=self.linear_relu_stack(x)
        return logits
    
model=NeuralNetwork()    
            

In [5]:
# hyperparameters

learning_rate=1e-3
batch_size=32
epochs=10

In [6]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)

In [7]:
# training loop 

def train(dataloader,model,loss_fn,optimizer):
    size=len(dataloader.dataset)
    model.train()
    for batch,(X,y) in enumerate(dataloader):
        pred=model(X)
        loss=loss_fn(pred,y)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if batch%100==0:
            loss,current=loss.item(),batch*batch_size+len(X)
            print(f"Loss->{loss} current->{current} size->{size}")

In [8]:
# testing loop (valdiation)

def test(dataloader,model,loss_fn):
    model.eval()
    size=len(dataloader.dataset)
    num_batches=len(dataloader)
    test_loss,correct=0,0
    
    with torch.no_grad():
        for X,y in dataloader:
            pred=model(X)
            test_loss+=loss_fn(pred,y)
            correct+=(pred.argmax(1)==y).type(torch.float).sum().item()
            
    test_loss=test_loss/num_batches
    correct=correct/size
    
    print(f"test accuracy is {100*correct}% and average loss is {test_loss}")        
            

In [9]:
for t in range(epochs):
    print(f"epoch {t}---------------------------")
    train(train_dataloader,model,loss_fn,optimizer)
    test(train_dataloader,model,loss_fn)
print("Done!!!")    

epoch 0---------------------------
Loss->2.292773962020874 current->32 size->60000
Loss->2.2895328998565674 current->3232 size->60000
Loss->2.2935073375701904 current->6432 size->60000
Loss->2.3061904907226562 current->9632 size->60000
Loss->2.3014676570892334 current->12832 size->60000
Loss->2.2843220233917236 current->16032 size->60000
Loss->2.2888054847717285 current->19232 size->60000
Loss->2.2803986072540283 current->22432 size->60000
Loss->2.288588762283325 current->25632 size->60000
Loss->2.263746738433838 current->28832 size->60000
Loss->2.2708828449249268 current->32032 size->60000
Loss->2.2709312438964844 current->35232 size->60000
Loss->2.2794458866119385 current->38432 size->60000
Loss->2.2631986141204834 current->41632 size->60000
Loss->2.261119842529297 current->44832 size->60000
Loss->2.257246971130371 current->48032 size->60000
Loss->2.2695815563201904 current->51232 size->60000
Loss->2.250166654586792 current->54432 size->60000
Loss->2.234530448913574 current->57632 si