### Simple Convultion Implementation.

In [1]:
import numpy as np

In [30]:
def conv( x, w, s, p ):
    x_pad = np.array(x)
    w_rot = np.array(w[::-1])
    if (p > 0):
        pad = np.zeros( shape = p )
        x_pad = np.concatenate( [pad, x, pad] )
    res = []
    for i in range( 0, int( len(x_pad) - len(w) ) + 1, s ):
        res.append( np.sum(x_pad[i:i+w_rot.shape[0]] * w_rot))
        
    return res

In [109]:
x = [1,2,3,12]
w = [2,1]

In [125]:
np.zeros( shape = (2,2) )

array([[0., 0.],
       [0., 0.]])

In [126]:
def conv2d( x, w, s = (1,1), p = (0,0) ):
    w_rot = w[::-1,::-1]
    x_orig = np.array(x)
    
    n1 = x_orig.shape[0] + 2*p[0]
    n2 = x_orig.shape[1] + 2*p[1]
    
    x_padded = np.zeros(shape=(n1,n2))
    x_padded[ p[0]:p[0] + x_orig.shape[0] , p[1]:p[1] + x_orig.shape[1] ] = x_orig

    res = []
    for i in range( 0, int( ( x_pad.shape[0] - w_rot.shape[0] ) / s[0] ) + 1, s[0] ):
        res.append([])
        for j in range( 0, int( ( x_pad.shape[1] - w_rot.shape[1] ) / s[1] ) + 1, s[1] ):
            x_sub = x_padded[i:i+w_rot.shape[0], j:j+w_rot.shape[1]]
            ret[-1].append(np.sum(x_sub * w_rot))
    return res

### Conv W. PyTorch

In [None]:
'''
    While small NNs are insufficient to capture the complexity of a real-word dataset, big NNs can be prone
    to overfitting. To address this issue, we use regularization. There are 2 types: L1 & L2, while the latter
    being more famous than the former. Furthermore, we have another method to restrict the weights which is
    called `Dropout`. We will look in more detail for it.
'''

In [1]:
import torch
import torch.nn as nn

In [12]:
loss_func = nn.BCELoss()
loss = loss_func( torch.tensor([0.9]), torch.tensor([1.0]) )
l2_lambda = 0.001
conv_layer = nn.Conv2d( in_channels=3, out_channels=5, kernel_size=5 ) 
l2_penalty = l2_lambda * sum( [ ( p**2 ).sum() for p in conv_layer.parameters() ] )
loss_with_penalty = l2_penalty + loss
loss_with_penalty

tensor(0.1070, grad_fn=<AddBackward0>)

In [13]:
linear = nn.Linear( in_features= 10, out_features= 16 )
l2_penalty = l2_lambda * sum([( p**2 ).sum() for p in linear.parameters()])
loss_with_penalty = loss + l2_lambda
loss_with_penalty

tensor(0.1064)

In [None]:
'''
    When it comes to dropout, we tend to turn off some of the units so the recieving node does not
    rely completely on one of them. Theis technique is quietly used in Computer Vision since
    the data, almost always, is insufficient and the model is likely to overfit. In other applications
    we dont bother using this technique until we see a potential of overfitting, because we
    loose the ability to debug the NN or the code.
'''

In [19]:
logits = torch.tensor([0.8])
proba  = torch.sigmoid(logits)
truth  = torch.tensor([1.0])
bce_loss = nn.BCELoss()
logits_bce = nn.BCEWithLogitsLoss()
print("BCELoss:", bce_loss(proba, truth ))
print("logits_BCELoss:", logits_bce(logits, truth))

BCELoss: tensor(0.3711)
logits_BCELoss: tensor(0.3711)


In [32]:
logits = torch.tensor([[0.8, 2.1, 3.2]])
proba  = torch.softmax(logits, dim = 1)
truth  = torch.tensor([2])
ce_loss = nn.CrossEntropyLoss()
nll_loss = nn.NLLLoss()
print("CE_loss:", ce_loss(logits, truth ))
print("NLL_loss:", nll_loss(torch.log(proba), truth ))

CE_loss: tensor(0.3532)
NLL_loss: tensor(0.3532)
