In [119]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from datetime import datetime
from typing import Sequence
from torch.utils.data import random_split
from d2l import torch as d2l

torch.manual_seed(123)



<torch._C.Generator at 0x7f9849218c50>

In [121]:
train_data = torch.load("localization_train.pt") 
val_data = torch.load("localization_val.pt")
test_data= torch.load("localization_test.pt")



In [123]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    
    n_batch = len(train_loader)
    
    # We'll store there the training loss for each epoch
    losses_train = []
    
    # Set the network in training mode
    model.train()
    
    # Re-initialize gradients, just in case the model has been inappropriately 
    # manipulated before the training
    optimizer.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1): 
        
        # Training loss for the current epoch
        loss_train = 0
        


        # Loop over our dataset (in batches the data loader creates for us)
        for imgs, labels in train_loader:
            
            
            #print("hello")
            
            # Feed a batch into our model
            outputs = model(imgs)
            
            # Compute the loss we wish to minimize 
            # Note that by default, it is the mean loss that is computed
            # (so entire_batch_loss / batch_size)
            
            
            loss = loss_fn(labels, outputs)
        
                
            #print(loss)
            
            # Perform the backward step. That is, compute the gradients of all parameters we want the network to learn
            loss.backward()
            
            # Update the model
            optimizer.step() 
            
            # Zero out gradients before the next round (or the end of training)
            optimizer.zero_grad() 

            # Update loss for this epoch
            # It is important to transform the loss to a number with .item()
            loss_train += loss.item()
            
        # Store current epoch loss. 
        losses_train.append(loss_train / n_batch)
        

        if epoch == 1 or epoch % 10 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
            
    return losses_train

In [125]:
class MNIST_model(nn.Module):
    def __init__(self):
        super().__init__()  # to inherit the '__init__' method from the 'nn.Module' class
        # Add whatever you want here (e.g layers and activation functions)
        # The order and names don't matter here but it is easier to understand
        # if you go for Layer1, fun1, layer2, fun2, etc
        # Some conventions:
        # - conv stands for convolution
        # - pool for pooling
        # - fc for fully connected

        # 32*32*3: determined by our dataset: 32x32 RGB images
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5, stride = 1, padding = 0)
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0)
        
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = 5, stride = 1, padding = 0)
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0)
        
        self.conv3 = nn.Conv2d(in_channels = 16, out_channels = 120, kernel_size = 5, stride = 1, padding = 0)
        
        self.flat = nn.Flatten()
        
        self.fc1 = nn.Linear(in_features = 5*8*120 , out_features = 1000) # 5 is height 8 is width and 120 is nr channels.
        self.fc2 = nn.Linear(in_features = 1000 , out_features = 100)
        self.fc3 = nn.Linear(in_features = 100 , out_features = 10 +5) #C + 5 components
         
        
             

    def forward(self, x):
        # Now the order matters! 
        out = F.relu(self.conv1(x)) #F.relu is the activation function
        out = self.pool1(out)
        out = F.relu(self.conv2(out))
        out = self.pool2(out)
        out = F.relu(self.conv3(out))
        out = self.flat(out)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        
        return out
        

In [127]:
def loss_fn(y_true, y_pred):
    
    LA = nn.BCEWithLogitsLoss(reduction='none')
    A = LA(y_pred[:,0], y_true[:,0])
        
    LB = nn.MSELoss(reduction='none')
    B = torch.sum(LB(y_pred[:,1:5], y_true[:,1:5]), dim = 1)
    LC = nn.CrossEntropyLoss(reduction='none')
    C = LC(y_pred[:,5:], y_true[:,5].long())
    
    #print(y_true[:,5])

    
        
    #print(A.shape)
    #print(B.shape)
    #print(y_true.shape)
    
        
        
        
        
   
        
    L_loc = torch.where(y_true[:,0] == 1, A+B+C, A)
    L_loc = torch.mean(L_loc)

    
    
    
    #print("A",A)
    #print("B",B)
    #print("C",C)
    
    
    return L_loc
        
        
        

In [129]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=False)

In [136]:
torch.manual_seed(123)
model = MNIST_model()

train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)

optimizer = optim.SGD(model.parameters(), lr=1e-2) #lr=1e-2 is the same as lr=0.01

loss_train = train(1, optimizer, model, loss_fn, train_loader)
loss_train

18:52:18.492397  |  Epoch 1  |  Training loss 2.513


[2.513425717302548]

In [187]:
def compute_iou(bb1, bb2):
    A_x0 = bb1[:,0] - bb1[:,2]/2
    A_y0 = bb1[:,1] - bb1[:,3]/2
    A_x1 = bb1[:,0] + bb1[:,2]/2
    A_y1 = bb1[:,1] + bb1[:,3]/2
    
    B_x0 = bb2[:,0] - bb2[:,2]/2
    B_y0 = bb2[:,1] - bb2[:,3]/2
    B_x1 = bb2[:,0] + bb2[:,2]/2
    B_y1 = bb2[:,1] + bb2[:,3]/2
    
    #compare each element of the tensors between A and B
    
    I_x0 = torch.max(A_x0,B_x0)
    I_y0 = torch.max(A_y0,B_y0)
    I_x1 = torch.min(A_x1,B_x1)
    I_y1 = torch.min(A_y1,B_y1)
    
    I_area = (I_x1 - I_x0)*(I_y1 - I_y0)
    
    A_area = (A_x1 - A_x0)*(A_y1 - A_y0)
    B_area = (B_x1 - B_x0)*(B_y1 - B_y0)
    
    U_area = A_area + B_area - I_area
    
    IoU = I_area / U_area
    
    return IoU
    
    
    
    
    
     

In [None]:
compute_iou([1,1,2,1,2], [1,2,4,3,4])

In [152]:
def plot_boundingbox:
    

SyntaxError: invalid syntax (2999646975.py, line 1)

In [228]:
def compute_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    tot_iou = 0
    totalpc1 = 0
    
    totalpcright = 0

    # We do not want gradients here, as we will not want to update the parameters.
    with torch.no_grad():
        for imgs, labels in loader:

            outputs = model(imgs)
            
            
            """
            if labels[0] == 0:
                if outputs[0] == 0:
                    correct += 1
            else:
                if outputs[0] == 0:
                    continue
                else:
                    if argmax(outputs[6:16]) == label[6]
                        correct += 1
                total_iou = iou(output[1:5], label[1:5]) 
                
                correct/ba
            
            """
            
            
            pc1 = torch.where(labels[:,0] == 1)[0]
            #print(pc1)
            
            
            _, predicted = torch.max(outputs[:,5:], dim=1) 
            predicted = torch.unsqueeze(predicted, 1)
            #print(labels.shape[0])
            total += labels.shape[0]
            totalpc1 += pc1.shape[0]
            correct += int((predicted[pc1] == labels[pc1,5:]).sum())
        
            
            #print(f"{outputs[:,1:5].shape} {labels[:,1:5].shape}")
        
            iou = compute_iou(outputs[pc1,1:5], labels[pc1,1:5])
            
            correctpc = predicted[:,0] == labels[:,0]
            totalpcright += correctpc.shape[0]
            
            print(total.shape[0])
            

    accpc = totalpcright/total
    print(accpc)

    acc =  ((correct / total) + (iou/totalpc1))/2

    #print(acc.shape)

    #print("Accuracy: {:.2f}".format(sum(acc)/acc.shape[0]))
          
    return acc

In [205]:
torch.manual_seed(123)

model_1 = MNIST_model()

train(5, optimizer, model, loss_fn, train_loader)

compute_accuracy(model_1, val_loader)


10:57:51.094225  |  Epoch 1  |  Training loss 2.415
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) torch.Size([128, 4])
torch.Size([128, 4]) tor

tensor([0.0451, 0.0448, 0.0448, 0.0448, 0.0454, 0.0448, 0.0452, 0.0449, 0.0449,
        0.0449, 0.0449, 0.0448, 0.0452, 0.0448, 0.0449, 0.0447, 0.0448, 0.0451,
        0.0450, 0.0389, 0.0449, 0.0448, 0.0451, 0.0448, 0.0448, 0.0455, 0.0456,
        0.0447, 0.0447, 0.0449, 0.0449, 0.0448, 0.0445, 0.0448, 0.0446, 0.0449,
        0.0444, 0.0449, 0.0453, 0.0450, 0.0447, 0.0449, 0.0449, 0.0449, 0.0449,
        0.0447, 0.0449, 0.0441, 0.0450, 0.0449, 0.0450, 0.0448, 0.0449, 0.0450,
        0.0449, 0.0451, 0.0451, 0.0443, 0.0449, 0.3895, 0.0447, 0.0448, 0.0448,
        0.0450, 0.0433, 0.0449, 0.0485, 0.0449, 0.0449, 0.0452, 0.0448, 0.0448])

In [229]:
compute_accuracy(model_1, val_loader)


AttributeError: 'int' object has no attribute 'shape'