In [70]:
import torch
import os
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
"""
StepLR->step learning rate->every step what happen with multiplied with some constant term so that it will keep on reducing the the overall learning rates once its starts approaching to the minima. 
"""
from torchvision import datasets, transforms
import mlflow
import mlflow.pytorch

In [71]:
class Config:
    EPOCHS=10
    BATCH_SIZE=32
    LR=0.01
    DEVICE="cuda" if torch.cuda.is_available() else "cpu"
    GAMMA=0.7
    """
    GAMM value associated with StepLR-> after certain number of steps this GAMMA value will be multiplied with the learning rate LR. So that further reducing the LR
    So lets say after 10 number of steps we can multiply 0.7 with LR=0.001. So this will give us further reduced value. Learning rate actually gets slowed down once we ieterate over the EPOCHS.
    
    """
    SEED=42
    LOG_INTERVAL=10
    """
    LOG_INTERval->how many intervals you want to log the output or print the outcome
    """
    TEST_BATCH_SIZE=1000
    """
    TEST_BATCH_SIZE->we can keep different batch size for the training and testing. So for testing we keep 1000 data point at once.
    """
    DRY_RUN=True
    """
    DRY_RUN->It can be True or False. Just to check wheter you are going thrugh entire training or the function that you implemented just once without iterating over the entire epochs
    """
    

In [72]:
config=Config()

# Implement the NN

In [73]:
class ConvNet(nn.Module):
    def __init__(self):
        """
        import the super class all the possible constructor are there or the functions which are used or arguments which can be initialized here as a parent Networks.
        
        """
        super(ConvNet, self).__init__()
        self.conv1=nn.Conv2d(1,32,3,1)#1is image. 32 is filter size, 3 is kernel size, 1 is stride
        self.conv2=nn.Conv2d(32,64,3,1)
        self.dropout1=nn.Dropout(0.25)
        self.dropout2=nn.Dropout(0.50)
        self.fc1=nn.Linear(9216,128)
        self.fc2=nn.Linear(128,10)
    def forward(self,x):
        """
        connect with convolution layer one
        
        """
        x=self.conv1(x)
        """
        after convolution we are going to use relu activation function. relu will take the x and it will pass to the next convolution layer
        
        """
        x=F.relu(x)
        x=self.conv2(x)
        x=F.relu(x)
        x=F.max_pool2d(x,2)
        x=self.dropout1(x)
        x=torch.flatten(x,1)# 1 is dimension here
        x=self.fc1(x)
        x=F.relu(x)
        x=self.dropout2(x)
        x=self.fc2(x)
        """
        log_softmax will take the target value. thats mean whatever is the value is coming the raw value is  saying 
        apply a softmax and we are passing x and also dimension
        """
        output=F.log_softmax(x, dim=1)
        return output
        
        

# Define the training phase

In [86]:
def train_(config, model, device,train_loader, optimizer, epochs):
    """
    we will take the model and we will call the train
    """
    model.train()
    for batch_idx, (data,target) in enumerate(train_loader):
        # bring the data into our device
        data, target=data.to(device), target.to(device)
        """
        before using optimizer we have to zero the gradient if there is any residual gradient from the past iteration.
        """
        optimizer.zero_grad()
        # Calculate the output or prediction outcomes. It will take the model and then it will take the data and then pass on the outcome
        pred=model(data)
        #calculate the loss with cross_entropy() which will take the actual value thats is the target and predicted value which is the prediction value
        
        loss=F.cross_entropy(pred, target)
        #once the loss is calculated we actully take a differentiation of it or gradient of it
        loss.backward()
        # after calculating all the gradient we can go for the optimizer.step->that means next step of the optimizer can be taken that is the weight update
        optimizer.step()
        if batch_idx % config.LOG_INTERVAL==0:
            print(f"train epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100.0 *batch_idx / len(train_loader):.0f})]\t Loss:  {loss.item():.6f}")
            if config.DRY_RUN:
                break

# Define the testing phase

In [87]:
def test(model,device, test_loader):
    pass

In [88]:
torch.manual_seed(config.SEED)

<torch._C.Generator at 0x2255f52c090>

In [89]:
train_kwargs={"batch_size":config.BATCH_SIZE}
test_kwargs={"batch_size":config.TEST_BATCH_SIZE}

In [90]:
if config.DEVICE=="cuda":
    """
    cuda_kwargs->apply what are the number of workers that means how many cpu members which will be enagaged
    """
    cuda_kwargs={"num_workers":1,"pin_memory":True,"shupple":True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)
    

In [91]:
transform = transforms.Compose(
    [transforms.ToTensor()]
)

In [92]:
train=datasets.MNIST("../demo", train=True, download=True, transform=transforms)
test=datasets.MNIST("../demo", train=False, download=True, transform=transforms)
train_loader=torch.utils.data.DataLoader(train, **train_kwargs)
test_loader=torch.utils.data.DataLoader(test, **test_kwargs)

In [93]:
model=ConvNet().to(config.DEVICE)
scripted_model = torch.jit.script(model)
print(scripted_model)
"""
we have to pass the all the parameters of the model because adam is an optimizer which works on the model parameters.
so model parameters are actually get updated here. All the trainable parameters will be update here.
"""
optimizer=torch.optim.Adam(model.parameters(), lr=config.LR)
scheduler=StepLR(optimizer, step_size=1,gamma=config.GAMMA)

RecursiveScriptModule(
  original_name=ConvNet
  (conv1): RecursiveScriptModule(original_name=Conv2d)
  (conv2): RecursiveScriptModule(original_name=Conv2d)
  (dropout1): RecursiveScriptModule(original_name=Dropout)
  (dropout2): RecursiveScriptModule(original_name=Dropout)
  (fc1): RecursiveScriptModule(original_name=Linear)
  (fc2): RecursiveScriptModule(original_name=Linear)
)


In [94]:
# Training loop

for epoch in range(1,config.EPOCHS+1):
    train_(config, model, config.DEVICE,train_loader, optimizer, epoch)
    """
    one step of training is done we have to take the scheduler step
    """
    scheduler.step()# every step after the training is passed we are going to take scheduler step. That means we will be multiplying with the gamma
    

TypeError: 'module' object is not callable

In [95]:
for epoch in range(1, config.EPOCHS + 1):
    train_(config, model, config.DEVICE, train_loader, optimizer, epoch)
    scheduler.step()

TypeError: 'module' object is not callable

In [96]:
# Changed from unsqeeze to test_datapoints[0].reshape(1,1,28,28)
with mlflow.start_run() as run:
    mlflow.pytorch.log_model(model, "model")# save the model in mlflow
    model_path = mlflow.get_artifact_uri("model")# load the model in mlflow for the pytorch
    loaded_torch_model = mlflow.pytorch.load_model(model_path)# load the model again
    model.eval()#for evaluationg a model
    with torch.no_grad():# we are not going to calculate gradient because while evaluating you are not suppose to update the weightsor trainable parameters
        test_datapoints, test_target = next(iter(test_loader))
        pred = model(test_datapoints[1].reshape(1,1,28,28).to(config.DEVICE))
        actual = test_target[1].item()
        predicted = torch.argmax(pred).item()
        print(f"actual: {actual}, predicted: {predicted}")



TypeError: 'module' object is not callable