In [44]:
import numpy as np
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from IPython import display
import matplotlib.pylab as plt

Convolution

In [45]:
# apply a convolutional kernel to an image matrix
def convolve2d(X, W):
    h, w = X.shape
    m, n = W.shape
    Y = np.zeros((h-m+1, w-n+1))
    for i in range(h-m+1):
        for j in range(w-n+1):
            Y[i][j] = np.sum(X[i:i+m, j:j+n]*W)
    return Y

# apply a max pooling to an image matrix
def maxpool2d(X, size=2):
    h, w = X.shape
    Y = np.zeros((h//size, w//size))
    for i in range(h//size):
        for j in range(w//size):
            Y[i][j] = np.max(X[i*size:i*size+size, j*size:j*size+size])
    return Y

In [46]:
X = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [6, 7, 8, 9, 10], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25]])
W = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
print(X)
Y = convolve2d(X, W)
print(Y)
Y = maxpool2d(X)
print(Y)


[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [ 6  7  8  9 10]
 [16 17 18 19 20]
 [21 22 23 24 25]]
[[8. 8. 8.]
 [8. 8. 8.]
 [8. 8. 8.]]
[[ 7.  9.]
 [17. 19.]]


In [47]:
X = torch.from_numpy(X).unsqueeze(0).unsqueeze(0).float()
W = torch.from_numpy(W).unsqueeze(0).unsqueeze(0).float()
print(X)
Y = F.conv2d(X, W, bias=None, stride=1, padding=0, dilation=1, groups=1)
print(Y)

Y = F.conv_transpose2d(X, W, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1)
print(Y)


Y = F.conv_transpose2d(X, W, bias=None, stride=1, padding=1, output_padding=0, groups=1, dilation=1)
print(Y)

Y = F.conv_transpose2d(X, W, bias=None, stride=2, padding=0, output_padding=1, groups=1, dilation=1)
print(Y)

Y = F.conv_transpose2d(X, W, bias=None, stride=2, padding=1, output_padding=1, groups=1, dilation=1)
print(Y)
print(Y.shape)
Y = F.max_pool2d(X, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
print(Y)


tensor([[[[ 1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10.],
          [ 6.,  7.,  8.,  9., 10.],
          [16., 17., 18., 19., 20.],
          [21., 22., 23., 24., 25.]]]])
tensor([[[[8., 8., 8.],
          [8., 8., 8.],
          [8., 8., 8.]]]])
tensor([[[[ -1.,  -2.,  -2.,  -2.,  -2.,   4.,   5.],
          [ -8., -11.,  -6.,  -6.,  -6.,  17.,  20.],
          [-19., -23.,  -8.,  -8.,  -8.,  31.,  35.],
          [-34., -38.,  -8.,  -8.,  -8.,  46.,  50.],
          [-59., -63.,  -8.,  -8.,  -8.,  71.,  75.],
          [-58., -61.,  -6.,  -6.,  -6.,  67.,  70.],
          [-21., -22.,  -2.,  -2.,  -2.,  24.,  25.]]]])
tensor([[[[-11.,  -6.,  -6.,  -6.,  17.],
          [-23.,  -8.,  -8.,  -8.,  31.],
          [-38.,  -8.,  -8.,  -8.,  46.],
          [-63.,  -8.,  -8.,  -8.,  71.],
          [-61.,  -6.,  -6.,  -6.,  67.]]]])
tensor([[[[ -1.,   0.,  -1.,   0.,  -1.,   0.,  -1.,   0.,  -1.,   0.,   5.,
             0.],
          [ -2.,   0.,  -2.,   0.,  -2.,   0.,  

Output dimension of convolution layer

In [48]:
# make a function that outputs the dimensions of the output of a convolutional layer
def conv_output_shape(h,w, kernel_size=1, stride=1, pad=0, dilation=1):
    from math import floor
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    h_out = floor(((h + (2 * pad) - (dilation * (kernel_size[0] - 1)) - 1) / stride) + 1)
    w_out = floor(((w + (2 * pad) - (dilation * (kernel_size[0] - 1)) - 1) / stride) + 1)
    return h_out, w_out

def trans_conv_output_shape(h,w, kernel_size=1, stride=1, pad=0, output_padding=0, dilation=1):
    from math import floor
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    h_out = floor((h-1)*stride - 2*pad + dilation*(kernel_size[0]-1) + output_padding + 1)
    w_out = floor((w-1)*stride - 2*pad + dilation*(kernel_size[0]-1) + output_padding + 1)
    return h_out, w_out


def padding_needed(kernel_size):
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    if kernel_size[0]%2 == 0:
        pad = (kernel_size[0]-1)//2
    else:
        pad = kernel_size[0]//2
    return pad
    

In [49]:
print(X)
print(X.shape)
h = X.shape[2]
w = X.shape[3]
print(h,w)
h_out, w_out = conv_output_shape(h,w, kernel_size=1, stride=1, pad=0, dilation=1)
print(h_out, w_out)

h_out, w_out = conv_output_shape(h,w, kernel_size=3, stride=1, pad=1, dilation=1)
print(h_out, w_out)

h_out, w_out = conv_output_shape(6,6, kernel_size=2, stride=2, pad=0, dilation=1)
print(h_out, w_out)

h_out, w_out = trans_conv_output_shape(h,w, kernel_size=1, stride=1, pad=0, output_padding=0, dilation=1)
print(h_out, w_out)

h_out, w_out = trans_conv_output_shape(h,w, kernel_size=3, stride=1, pad=1, output_padding=0, dilation=1)
print(h_out, w_out)

h_out, w_out = trans_conv_output_shape(6,6, kernel_size=2, stride=2, pad=0, output_padding=0, dilation=1)
print(h_out, w_out)

print(padding_needed(3))
print(padding_needed(7))

tensor([[[[ 1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10.],
          [ 6.,  7.,  8.,  9., 10.],
          [16., 17., 18., 19., 20.],
          [21., 22., 23., 24., 25.]]]])
torch.Size([1, 1, 5, 5])
5 5
5 5
5 5
3 3
5 5
5 5
12 12
1
3


Reseptive field 
Tegn. Start bag fra.

function virker ikke! men tror måske den giver størrelsen eller noget, så kan måske bruges lidt.

In [50]:
#Make a function that can determinate the reseptive field of a given pixel (x,y), given a list of kernel sizes, strides, paddings and dilations and number of layers
# and return the receptive field of the pixel (x,y) as min_x, min_y, max_x, max_y 
# such that the reseptive field of the pixel (x,y) is the rectangle with corners (min_x, min_y) and (max_x, max_y)
def receptive_field(x,y,kernel_size, stride, padding, dilation, layers):
    from math import floor
    if type(kernel_size) is not list:
        kernel_size = [kernel_size]*layers
    if type(stride) is not list:
        stride = [stride]*layers
    if type(padding) is not list:
        padding = [padding]*layers
    if type(dilation) is not list:
        dilation = [dilation]*layers
    if len(kernel_size) != layers or len(stride) != layers or len(padding) != layers or len(dilation) != layers:
        print("Error: The number of layers does not match the number of kernel sizes, strides, paddings and dilations")
        return
    min_x = x
    min_y = y
    max_x = x
    max_y = y
    for i in range(layers):
        min_x = floor((min_x - 1) * stride[i] - 2 * padding[i] + dilation[i] * (kernel_size[i] - 1) + 1)
        min_y = floor((min_y - 1) * stride[i] - 2 * padding[i] + dilation[i] * (kernel_size[i] - 1) + 1)
        max_x = floor((max_x - 1) * stride[i] - 2 * padding[i] + dilation[i] * (kernel_size[i] - 1) + 1)
        max_y = floor((max_y - 1) * stride[i] - 2 * padding[i] + dilation[i] * (kernel_size[i] - 1) + 1)
    return min_x, min_y, max_x, max_y

    

In [51]:
# make a function that kan calculate the size of the receptive field
def receptive_field_size(kernel_size, stride, padding, dilation, layers):
    # the final output size corresponds to the receptive field of the first pixel
    from math import floor
    if type(kernel_size) is not list:
        kernel_size = [kernel_size]*layers
    if type(stride) is not list:
        stride = [stride]*layers
    if type(padding) is not list:
        padding = [padding]*layers
    if type(dilation) is not list:
        dilation = [dilation]*layers
    if len(kernel_size) != layers or len(stride) != layers or len(padding) != layers or len(dilation) != layers:
        print("Error: The number of layers does not match the number of kernel sizes, strides, paddings and dilations")
        return
    size = 1
    for i in range(layers):
        size = floor((size - 1) * stride[i] - 2 * padding[i] + dilation[i] * (kernel_size[i] - 1) + 1)
    return size
    

In [52]:
rf = receptive_field(1,2,kernel_size=[3,3], stride=[1,2], padding=[0,1], dilation=[1,1], layers=2)
print(rf)
rf = receptive_field(1,2,kernel_size=[3], stride=[1], padding=[1], dilation=[1], layers=1)
print(rf)

rf = receptive_field(1,1,kernel_size=[3,2,3,2,3], stride=[1,2,1,2,1], padding=[1,0,1,0,1], dilation=[1,1,1,1,1], layers=5)
print(rf)
rf = receptive_field(10,13,kernel_size=[3,2,3,2,3], stride=[1,2,1,2,1], padding=[1,0,1,0,1], dilation=[1,1,1,1,1], layers=5)
print(rf)

(5, 7, 5, 7)
(1, 2, 1, 2)
(4, 4, 4, 4)
(40, 52, 40, 52)


In [53]:
rf = receptive_field_size(kernel_size=[3,2,3,2,3], stride=[1,2,1,2,1], padding=[1,0,1,0,1], dilation=[1,1,1,1,1], layers=5)
print(rf)

rf = receptive_field_size(kernel_size=[3,2,3,2,3,2,3,2,3,1], stride=[1,2,1,2,1,2,1,2,1,1], padding=[1,0,1,0,1,0,1,0,1,0], dilation=[1,1,1,1,1,1,1,1,1,1], layers=10)
print(rf)

4
16


activation functions

In [54]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def softmax(x):
    return np.exp(x)/np.sum(np.exp(x), axis=0)

def relu(x):
    return np.maximum(0,x)


In [55]:
x = np.array([-1.,2.,3.,4.,5.])
print(softmax(x))
print(sigmoid(x))
print(relu(x))

x = torch.from_numpy(x)
print(nn.Softmax(dim=0)(x))
print(nn.Sigmoid()(x))
print(nn.ReLU()(x))
print(nn.Tanh()(x))


[0.00159356 0.03200752 0.08700545 0.23650533 0.64288814]
[0.26894142 0.88079708 0.95257413 0.98201379 0.99330715]
[0. 2. 3. 4. 5.]
tensor([0.0016, 0.0320, 0.0870, 0.2365, 0.6429], dtype=torch.float64)
tensor([0.2689, 0.8808, 0.9526, 0.9820, 0.9933], dtype=torch.float64)
tensor([0., 2., 3., 4., 5.], dtype=torch.float64)
tensor([-0.7616,  0.9640,  0.9951,  0.9993,  0.9999], dtype=torch.float64)


Loss functions

In [56]:
def binary_cross_entropy(x, y):
    return -np.mean(y*np.log(x)+(1-y)*np.log(1-x))

def cross_entropy(x, y):
    return -np.sum(y*np.log(x))

def focal_loss(x, y, gamma=2):
    return -np.mean(y*np.power(1-x, gamma)*np.log(x))

def binary_focal_loss(y_real, y_pred, gamma=2):
    loss = -torch.mean(y_real*(1-y_pred)**gamma*torch.log(y_pred) + (1-y_real)*torch.log(1-y_pred))
    return loss

#def dice_loss(x, y):
#    return -np.mean(2*x*y/(x+y))

def dice_loss(y_real, y_pred):
    loss = 1 - torch.mean(2*y_real*y_pred+1)/(torch.mean(y_real+y_pred) + 1)
    return loss

def bce_total_variation(y_real, y_pred):
    y_pred2 = F.sigmoid(y_pred)
    total_variation = torch.mean(torch.abs(y_pred2[:, :, :, :-1] - y_pred2[:, :, :, 1:])) + torch.mean(torch.abs(y_pred2[:, :, :-1, :] - y_pred2[:, :, 1:, :]))
    return bce_loss(y_real, y_pred) + 0.1*total_variation



In [58]:
x = np.array([0.1,0.2,0.3,0.4,0.5])
y = np.array([0.,0.,1.,0.,0.])
print(binary_cross_entropy(x,y))
print(focal_loss(x,y))

x = torch.from_numpy(x)
y = torch.from_numpy(y)
print(nn.BCELoss()(x,y))
print(dice_loss(x,y))
x = np.array([-1.,2.,3.,4.,5.])
y = np.array([0.,0.,1.,0.,0.])

print(cross_entropy(softmax(x),y))

x = torch.from_numpy(x)
y = torch.from_numpy(y)

print(nn.BCEWithLogitsLoss()(x,y))
print(nn.BCEWithLogitsLoss(pos_weight=torch.tensor([1.,1.,1.,1.,1.]))(x,y))

print(nn.CrossEntropyLoss()(x,y))


0.5472899351247816
0.11798933482394172
tensor(0.5473, dtype=torch.float64)
tensor(0.2533, dtype=torch.float64)
2.441784530014455
tensor(2.3027, dtype=torch.float64)
tensor(2.3027, dtype=torch.float64)
tensor(2.4418, dtype=torch.float64)


Metrics

In [76]:
def IoU_bbox(box1,box2,mode="xywh"):
    # if mode == "xyxy":
    # box1 = [x11,y11,x12,y12]
    # box2 = [x21,y21,x22,y22]
    # elif mode == "xywh":
    # box1 = [x11,y11,w,h]
    # box2 = [x21,y21,w,h]
    # returns the intersection over union of
    if mode == "xyxy":
        x11 = box1[0]
        y11 = box1[1]
        x12 = box1[2]
        y12 = box1[3]
        w1 = abs(x12 - x11)
        h1 = abs(y12 - y11)
        x21 = box2[0]
        y21 = box2[1]
        x22 = box2[2]
        y22 = box2[3]
        w2 = abs(x22 - x21)
        h2 = abs(y22 - y21)
    elif mode == "xywh":
        x11 = box1[0]
        y11 = box1[1]
        w1 = box1[2]
        h1 = box1[3]
        x22 = box2[0]
        y22 = box2[1]
        w2 = box2[2]
        h2 = box2[3]
    else:
        print("Error: unknown mode")
        return
    # calculate the intersection
    xI1 = max(x11, x22)
    yI1 = max(y11, y22)
    xI2 = min(x11 + w1, x22 + w2)
    yI2 = min(y11 + h1, y22 + h2)
    interArea = max(0, xI2 - xI1) * max(0, yI2 - yI1)
    # calculate the union
    boxAArea = w1 * h1
    boxBArea = w2 * h2
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou


import numpy as np
import torch

def x1y1x2y2_to_x1y1wh(x):
    # Convert bounding box format from [x1, y1, x2, y2] to [x1, y1, w ,h]
    y = x.clone()
    y[:, 0] = x[:, 0]
    y[:, 1] = x[:, 1]
    x1 = x[:, 0]
    y1 = x[:, 1]
    x2 = x[:, 2]
    y2 = x[:, 3]
    w = abs(x2 - x1)
    h = abs(y2 - y1)
    y[:, 2] = w
    y[:, 3] = h
    return y

def x1y1wh_to_x1y1x2y2(x):
    # Convert bounding box
    y = x.clone()
    y[:, 0] = x[:, 0]
    y[:, 1] = x[:, 1]
    x1 = x[:, 0]
    y1 = x[:, 1]
    w = x[:, 2]
    h = x[:, 3]
    x2 = x1 + w
    y2 = y1 + h
    y[:, 2] = x2
    y[:, 3] = y2
    return y



def xyxy2xywh(x):
    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] x,y is the center of the box
    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)

    y[:, 0] = (x[:, 0] + x[:, 2]) / 2.0
    y[:, 1] = (x[:, 1] + x[:, 3]) / 2.0
    y[:, 2] = x[:, 2] - x[:, 0]
    y[:, 3] = x[:, 3] - x[:, 1]
    return y


def xywh2xyxy(x):
    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2
    y[:, 1] = x[:, 1] - x[:, 3] / 2
    y[:, 2] = x[:, 0] + x[:, 2] / 2
    y[:, 3] = x[:, 1] + x[:, 3] / 2
    return y


def bbox_iou(box1, box2, mode="xyxy"):
    """
    numpy version iou, and use for nms
    """
    # Get the coordinates of bounding boxes

    if mode == "xyxy":
        # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[..., 0], box1[..., 1], box1[..., 2], box1[..., 3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[..., 0], box2[..., 1], box2[..., 2], box2[..., 3]
    else:
        # x, y, w, h = box1
        b1_x1, b1_x2 = box1[..., 0] - box1[..., 2] / 2, box1[..., 0] + box1[..., 2] / 2
        b1_y1, b1_y2 = box1[..., 1] - box1[..., 3] / 2, box1[..., 1] + box1[..., 3] / 2
        b2_x1, b2_x2 = box2[..., 0] - box2[..., 2] / 2, box2[..., 0] + box2[..., 2] / 2
        b2_y1, b2_y2 = box2[..., 1] - box2[..., 3] / 2, box2[..., 1] + box2[..., 3] / 2

    # Intersection area
    inter_area = np.maximum((np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)), 0.0) * \
                 np.maximum(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1), 0.0)

    # Union Area
    union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
                 (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area

    return inter_area / union_area  # iou


def iou_xywh_numpy(boxes1, boxes2):
    """
    :param boxes1: boxes1和boxes2的shape可以不相同，但是需要满足广播机制
    :param boxes2: 且需要保证最后一维为坐标维，以及坐标的存储结构为(x,y,w,h)，其中(x,y)是bbox的中心坐标
    :return: 返回boxes1和boxes2的IOU，IOU的shape为boxes1和boxes2广播后的shape[:-1]
    """
    boxes1 = np.array(boxes1)
    boxes2 = np.array(boxes2)

    boxes1_area = boxes1[..., 2] * boxes1[..., 3]
    boxes2_area = boxes2[..., 2] * boxes2[..., 3]

    print(boxes1[..., 0],boxes1[..., 1],boxes1[..., 2],boxes1[..., 3])

    # 分别计算出boxes1和boxes2的左上角坐标、右下角坐标
    # 存储结构为(xmin, ymin, xmax, ymax)，其中(xmin,ymin)是bbox的左上角坐标，(xmax,ymax)是bbox的右下角坐标
    boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                             boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
    boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                             boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

    # 计算出boxes1与boxes1相交部分的左上角坐标、右下角坐标
    left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

    # 因为两个boxes没有交集时，(right_down - left_up) < 0，所以maximum可以保证当两个boxes没有交集时，它们之间的iou为0
    inter_section = np.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
    IOU = 1.0 * inter_area / union_area
    return IOU


def iou_xyxy_numpy(boxes1, boxes2):

    boxes1 = np.array(boxes1)
    boxes2 = np.array(boxes2)
    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
    
    inter_section = np.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
    IOU = 1.0 * inter_area / union_area
    return IOU


def iou_xyxy_torch(boxes1, boxes2):
    """
    :param boxes1: boxes1和boxes2的shape可以不相同，但是需要满足广播机制，且需要是Tensor
    :param boxes2: 且需要保证最后一维为坐标维，以及坐标的存储结构为(xmin, ymin, xmax, ymax)
    :return: 返回boxes1和boxes2的IOU，IOU的shape为boxes1和boxes2广播后的shape[:-1]
    """
    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    # 计算出boxes1与boxes1相交部分的左上角坐标、右下角坐标
    left_up = torch.max(boxes1[..., :2], boxes2[..., :2])
    right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:])

    # 因为两个boxes没有交集时，(right_down - left_up) < 0，所以maximum可以保证当两个boxes没有交集时，它们之间的iou为0
    inter_section = torch.max(right_down - left_up, torch.zeros_like(right_down))
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
    IOU = 1.0 * inter_area / union_area
    return IOU


def iou_xywh_torch(boxes1, boxes2):
    """
    :param boxes1: boxes1和boxes2的shape可以不相同，但是需要满足广播机制，且需要是Tensor
    :param boxes2: 且需要保证最后一维为坐标维，以及坐标的存储结构为(x, y, w, h)
    :return: 返回boxes1和boxes2的IOU，IOU的shape为boxes1和boxes2广播后的shape[:-1]
    """
    boxes1_area = boxes1[..., 2] * boxes1[..., 3]
    boxes2_area = boxes2[..., 2] * boxes2[..., 3]

    # 分别计算出boxes1和boxes2的左上角坐标、右下角坐标
    # 存储结构为(xmin, ymin, xmax, ymax)，其中(xmin,ymin)是bbox的左上角坐标，(xmax,ymax)是bbox的右下角坐标
    boxes1 = torch.cat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], dim=-1)
    boxes2 = torch.cat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], dim=-1)
    # 计算出boxes1与boxes1相交部分的左上角坐标、右下角坐标
    left_up = torch.max(boxes1[..., :2], boxes2[..., :2])
    right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:])
    # 因为两个boxes没有交集时，(right_down - left_up) < 0，所以maximum可以保证当两个boxes没有交集时，它们之间的iou为0
    inter_section = torch.max(right_down - left_up, torch.zeros_like(right_down))
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
    IOU = 1.0 * inter_area / union_area

    return IOU


def nms(bboxes, score_threshold, iou_threshold, sigma=0.3, method='nms'):

    classes_in_img = list(set(bboxes[:, 5].astype(np.int32)))
    best_bboxes = []

    for cls in classes_in_img:
        cls_mask = (bboxes[:, 5].astype(np.int32) == cls)
        cls_bboxes = bboxes[cls_mask]
        while len(cls_bboxes) > 0:
            max_ind = np.argmax(cls_bboxes[:, 4])
            best_bbox = cls_bboxes[max_ind]
            best_bboxes.append(best_bbox)
            cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
            iou = iou_xyxy_numpy(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])

            weight = np.ones((len(iou),), dtype=np.float32)
            iou_mask = iou > iou_threshold
            weight[iou_mask] = 0.0
            
            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
            score_mask = cls_bboxes[:, 4] > score_threshold
            cls_bboxes = cls_bboxes[score_mask]
    return np.array(best_bboxes)


def Accuray_seg(y_pred, y_true):
    return np.mean(y_pred == y_true)

def Precision_seg(y_pred, y_true):
    return np.sum((y_pred == 1) & (y_true == 1)) / np.sum(y_pred == 1)

def Recall_seg(y_pred, y_true):
    return np.sum((y_pred == 1) & (y_true == 1)) / np.sum(y_true == 1)

def F1_seg(y_pred, y_true):
    precision = Precision(y_pred, y_true)
    recall = Recall(y_pred, y_true)
    return 2*precision*recall/(precision+recall)


def Sensitivity_seg(y_pred, y_true):
    return np.sum((y_pred == 1) & (y_true == 1)) / np.sum(y_true == 1)

def Specificity_seg(y_pred, y_true):
    return np.sum((y_pred == 0) & (y_true == 0)) / np.sum(y_true == 0)
    

In [60]:
def dice_seg(target,preds):
    X = target.view(-1)
    Y = preds.view(-1)
    return 2*torch.mean(torch.mul(X,Y))/torch.mean(X+Y)


def IoU_seg(target,preds):
    X = target.view(-1)
    Y = preds.view(-1)
    return torch.mean(torch.mul(X,Y))/(torch.mean(X+Y)-torch.mean(torch.mul(X,Y)))

def Accuracy_seg(target,preds):
    X = target.view(-1)
    Y = preds.view(-1)
    X = X.cpu().numpy()
    Y = Y.cpu().numpy()
    TN, FP, FN, TP = confusion_matrix(X, Y).ravel()     
    
    # Accuracy
    accuracy =  (TP+TN)/(TP+FP+TN+FN)
    return accuracy
        
def sensitivity_seg(target,preds):
    X = target.view(-1)
    Y = preds.view(-1)
    X = X.numpy()
    Y = Y.numpy()
    TN, FP, FN, TP = confusion_matrix(X, Y).ravel()     
    return TP/(TP+FN)

def specificity_seg(target,preds):
    X = target.view(-1)
    Y = preds.view(-1)
    X = X.cpu().numpy()
    Y = Y.cpu().numpy()
    TN, FP, FN, TP = confusion_matrix(X, Y).ravel()     
    return TN/(TN+FP)

In [78]:

def nms(bboxes, iou_threshold):
    classes_in_img = list(set(bboxes[:, 4].astype(np.int32)))

    best_bboxes = []

    for cls in classes_in_img:
        cls_mask = (bboxes[:, 4].astype(np.int32) == cls)
        cls_bboxes = bboxes[cls_mask]
        while len(cls_bboxes) > 0:
            max_ind = np.argmax(cls_bboxes[:, 4])
            best_bbox = cls_bboxes[max_ind]
            best_bboxes.append(best_bbox)
            cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
            iou = iou_xyxy_numpy(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])

            weight = np.ones((len(iou),), dtype=np.float32)
            iou_mask = iou > iou_threshold
            weight[iou_mask] = 0.0
            
            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
    return np.array(best_bboxes)


In [62]:
(572-388)/2

92.0

In [63]:
7*7*4*20/2

1960.0

In [64]:
3*3*1

9

In [65]:
pred_box_1 = np.array([10,50,20,20])
pred_box_2 = np.array([40,10,40,30])
gt_box_1 = np.array([40,60,40,20])
gt_box_2 = np.array([15,60,15,10])

pred_box_1 = torch.from_numpy(pred_box_1)
pred_box_2 = torch.from_numpy(pred_box_2)
gt_box_1 = torch.from_numpy(gt_box_1)
gt_box_2 = torch.from_numpy(gt_box_2)


print(IoU_bbox(pred_box_1,gt_box_1))
print(IoU_bbox(pred_box_1,gt_box_2))
print(IoU_bbox(pred_box_2,gt_box_1))
print(IoU_bbox(pred_box_2,gt_box_2))

print(iou_xywh_torch(pred_box_1,gt_box_1))
print(iou_xywh_torch(pred_box_1,gt_box_2))
print(iou_xywh_torch(pred_box_2,gt_box_1))
print(iou_xywh_torch(pred_box_2,gt_box_2))


tensor(0.)
tensor(0.3750)
tensor(0.)
0.0
tensor(0.)
tensor(0.1282)
tensor(0.)
tensor(0.)


In [67]:
pred_box_1 = np.array([10,20,10,20])
#pred_box_2 = np.array( [60,60,60,60])
gt_box_1 = np.array([10,30,10,10])
gt_box_2 = np.array( [60,60,60,60])

pred_box_1 = torch.from_numpy(pred_box_1)
#pred_box_2 = torch.from_numpy(pred_box_2)
gt_box_1 = torch.from_numpy(gt_box_1)
gt_box_2 = torch.from_numpy(gt_box_2)


print(IoU_bbox(pred_box_1,gt_box_1))
print(IoU_bbox(pred_box_1,gt_box_2))
print(IoU_bbox(pred_box_2,gt_box_1))
print(IoU_bbox(pred_box_2,gt_box_2))

print(iou_xywh_torch(pred_box_1,gt_box_1))
print(iou_xywh_torch(pred_box_1,gt_box_2))
print(iou_xywh_torch(pred_box_2,gt_box_1))
print(iou_xywh_torch(pred_box_2,gt_box_2))


tensor(0.5000)
0.0
tensor(0.)
tensor(0.)
tensor(0.2000)
tensor(0.)
tensor(0.)
tensor(0.)


In [79]:
B0 = np.array([65,65,50,50,0.4])
B1 = np.array([125,120,25,30,0.6])
B2 = np.array([60,60,60,60,0.9])
B3 = np.array([120,120,30,30,0.7])

B0 = torch.from_numpy(B0)
B1 = torch.from_numpy(B1)
B2 = torch.from_numpy(B2)
B3 = torch.from_numpy(B3)


bboxes = torch.stack([B0,B1,B2,B3],dim=0)
bboxes = x1y1wh_to_x1y1x2y2(bboxes)

bboxes = bboxes.numpy()

bboxes_out = nms(bboxes, iou_threshold=0.7)

bboxes_out = torch.from_numpy(bboxes_out)
bboxes_out = x1y1x2y2_to_x1y1wh(bboxes_out)

print(bboxes_out)



tensor([[ 60.0000,  60.0000,  60.0000,  60.0000,   0.9000],
        [120.0000, 120.0000,  30.0000,  30.0000,   0.7000],
        [ 65.0000,  65.0000,  50.0000,  50.0000,   0.4000],
        [125.0000, 120.0000,  25.0000,  30.0000,   0.0000]],
       dtype=torch.float64)
