In [1]:
import math
import pdb

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from torch.autograd import Variable
import numpy as np
import pandas as pd
from tqdm import tqdm 
import pickle

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  
print("Use device:",device)

Use device: cuda:0


In [3]:
batch_size = 8
lr = 0.001
pretrain = False
mode = 'train'
epoch = 40

# dataloader

In [4]:
meta = pd.read_csv('/kaggle/input/lidcidri-250-500/data/Meta/meta_info.csv')

# test 1-30 valid 31-40 test 41-50
train_img = []
valid_img = []
test_img = []
train_bbox = []
valid_bbox = []
test_bbox = []
train_label = []
valid_label = []
test_label = []


# bbox_list = np.load('/content/drive/MyDrive/fasterRCNN/data/bbox.pkl')
with open('/kaggle/input/lidcidri-250-500/data/bbox.pkl', "rb") as file:
    bbox_list = pickle.load(file)

# for i in range(len(bbox_list)):
#   for j in range(len(bbox_list[i])):
#     bbox_list[i][j][0] = bbox_list[i][j][0]*600/512
#     bbox_list[i][j][1] = bbox_list[i][j][1]*600/512
#     bbox_list[i][j][2] = bbox_list[i][j][2]*600/512
#     bbox_list[i][j][3] = bbox_list[i][j][3]*600/512

for i in tqdm(range(len(meta.index))):
    if meta.iloc[i]['is_clean'] == False:
        img = np.load('/kaggle/input/lidcidri-250-500/data/Image/LIDC-IDRI-' + meta.iloc[i]['original_image'][:4] + '/' + meta.iloc[i]['original_image'] + '.npy') 
    else :
        img = np.load('/kaggle/input/lidcidri-250-500/data/Clean/Image/LIDC-IDRI-' + meta.iloc[i]['original_image'][:4] + '/' + meta.iloc[i]['original_image'] + '.npy')
    if img.shape[0] != 512 or img.shape[1] != 512:
        print(img.shape, meta.iloc[i]['patient_id'])
    img = np.clip(img, -1200,1200)
    img = torch.Tensor(img)
    img = img.to(torch.float32) 
    min_value = img.min()
    max_value = img.max()    

    # Shift the tensor to make all values positive
    shifted_tensor = img - min_value   
    # Normalize the tensor to the range [0, 1]
    img = shifted_tensor / (max_value - min_value)

    if meta.iloc[i]['patient_id'] <= 450: 
        train_img.append(img)
        train_bbox.append(bbox_list[i])
        if meta.iloc[i]['is_clean'] == False:
            train_label.append(torch.ones(len(bbox_list[i])).type(torch.int64))
        else:
            train_label.append(torch.zeros(len(bbox_list[i])).type(torch.int64))
    elif meta.iloc[i]['patient_id'] <= 475: 
        valid_img.append(img)
        valid_bbox.append(bbox_list[i])
        if meta.iloc[i]['is_clean'] == False:
            valid_label.append(torch.ones(len(bbox_list[i])).type(torch.int64))
        else:
            valid_label.append(torch.zeros(len(bbox_list[i])).type(torch.int64))
    else : 
        test_img.append(img)
        test_bbox.append(bbox_list[i])
        if meta.iloc[i]['is_clean'] == False:
            test_label.append(torch.ones(len(bbox_list[i])).type(torch.int64))
        else:
            test_label.append(torch.zeros(len(bbox_list[i])).type(torch.int64))


100%|██████████| 3395/3395 [00:11<00:00, 286.79it/s]


In [5]:
print(len(train_img[:-1]), len(valid_img), len(test_img), len(train_bbox[:-2]), len(valid_bbox), len(test_bbox), len(train_label), len(valid_label), len(test_label))

2564 408 422 2563 408 422 2565 408 422


In [6]:
import torch
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class FasterRCNNDataset(Dataset):
    def __init__(self, image_list, bboxes_list, labels_list, transform=None):
        self.image_list = image_list
        self.bboxes_list = bboxes_list
        self.labels_list = labels_list
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        image = self.image_list[idx]
        bboxes = self.bboxes_list[idx]
        labels = self.labels_list[idx]

        image = torch.tensor(image)
        image = torch.unsqueeze(image, 0)
        if self.transform is not None:
            image = self.transform(image)
        bboxes = torch.tensor(bboxes)
        labels = labels

        return image, bboxes, labels


def collate_fn(batch):
    image_list = []
    bboxes_list = []
    labels_list = []
    for item in batch:
        image_list.append(item[0])
        bboxes_list.append(item[1])
        labels_list.append(item[2])

    # Pad the lists of bounding boxes with -1
    max_num_bboxes = max(len(bboxes) for bboxes in bboxes_list)
    padded_bboxes_list = []
    for bboxes in bboxes_list:
        padded_bboxes = torch.cat((bboxes, torch.tensor([[-1, -1, -1, -1]]).expand((max_num_bboxes - len(bboxes)), -1)), dim=0)
        padded_bboxes_list.append(padded_bboxes)

    # Pad label
    max_num_labels = max_num_bboxes
    padded_labels_list = []
    for labels in labels_list:
        padded_labels = torch.cat((labels, torch.zeros(max_num_labels - len(labels)).type(torch.int64)), dim=0)
        padded_labels_list.append(padded_labels)
    # Convert images, bboxes, and labels to tensors
    image_list = torch.stack(image_list)
    padded_bboxes_list = torch.stack(padded_bboxes_list)
    padded_labels_list = torch.stack(padded_labels_list)

    return image_list, padded_bboxes_list, padded_labels_list

# transform  = transforms.Resize([600,600]) 
train_dataset = FasterRCNNDataset(train_img, train_bbox, train_label)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)

valid_dataset = FasterRCNNDataset(valid_img, valid_bbox, valid_label)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)
valid_iou_dataloader = DataLoader(valid_dataset, batch_size=1, collate_fn=collate_fn, shuffle=False)

test_dataset = FasterRCNNDataset(test_img, test_bbox, test_label)
test_dataloader = DataLoader(test_dataset, batch_size=1, collate_fn=collate_fn, shuffle=False)

for images, bboxes, labels in train_dataloader:
    print("Images shape:", images.shape)
    print("Bounding boxes shape:", bboxes.shape)
    print("Labels shape:", labels.shape, labels)
    break


Images shape: torch.Size([8, 1, 512, 512])
Bounding boxes shape: torch.Size([8, 2, 4])
Labels shape: torch.Size([8, 2]) tensor([[1, 0],
        [1, 1],
        [1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [0, 0]])


  image = torch.tensor(image)


In [7]:
def bbox_iou(bbox_a, bbox_b):
    if bbox_a.shape[1]!=4 or bbox_b.shape[1]!=4:
        raise IndexError
    tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
    br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])

    area_i = np.prod(br-tl, axis=2) * (tl<br).all(axis=2)
    area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
    area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)

    return area_i / (area_a[:, None] + area_b - area_i)

In [8]:
import torch
import torchvision.ops as ops

# 定義bbox和scores
boxes = torch.tensor([[10, 20, 50, 60], [30, 40, 70, 80], [15, 25, 55, 65]])
scores = torch.tensor([0.9, 0.75, 0.95])
boxes = boxes.float()
scores = scores.float()
# 執行NMS
keep = ops.nms(boxes, scores, iou_threshold=0.5)

# 打印保留的bbox索引
print(keep)


tensor([2, 1])


In [9]:
def per_bbox_iou(bbox_a, bbox_b):
    if bbox_a.shape[0]!=4 or bbox_b.shape[0]!=4:
        raise IndexError
    tl_x = max(bbox_a[0], bbox_b[0])
    tl_y = max(bbox_a[1], bbox_b[1])
    br_x = min(bbox_a[2], bbox_b[2])
    br_y = min(bbox_a[3], bbox_b[3])
    
    area_i = (br_x - tl_x) * (br_y - tl_y)
    area_a = (bbox_a[2] - bbox_a[0]) * (bbox_a[3] - bbox_a[1])
    area_b = (bbox_b[2] - bbox_b[0]) * (bbox_b[3] - bbox_b[1])

    return area_i / (area_a + area_b - area_i)

def combine_two_boxes(bbox_a, bbox_b):
    if bbox_a.shape[0]!=4 or bbox_b.shape[0]!=4:
        raise IndexError
    tl_x = min(bbox_a[0], bbox_b[0])
    tl_y = min(bbox_a[1], bbox_b[1])
    br_x = max(bbox_a[2], bbox_b[2])
    br_y = max(bbox_a[3], bbox_b[3])
    
    return [tl_x, tl_y, br_x, br_y]
    

# print(per_bbox_iou(np.array([395.89, 236.79, 403.69, 244.99]), np.array([395.77, 239.97, 403.75, 248.49])))

def combine_boxes(pred_boxes, threshold=0.2):
#     print("pred_boxes",pred_boxes)
    tmp = np.array([[1, 2, 3, 5]])
    times = 0
#     print('pred_boxes[0].shape[0]', pred_boxes.shape[0])
    record = np.ones((pred_boxes.shape[0]))
    for i in range(pred_boxes.shape[0]):
        for j in range(i+1, pred_boxes.shape[0]):
#             print('ij', i, j, times)
            iou =  per_bbox_iou(pred_boxes[i], pred_boxes[j])
            if iou > threshold :
                unfold = []
                unfold.append(combine_two_boxes(pred_boxes[i], pred_boxes[j]))
#                 print("unfold",np.array(unfold)[0], tmp)
#                 print('sss', np.any(np.all(np.array(unfold)[0] == tmp, axis = 1)))
                if np.any(np.all(np.array(unfold)[0] == tmp, axis = 1)) == False:
                    tmp = np.append(tmp, np.array(unfold), axis=0)
                times += 1
                record[i] = 0
                record[j] = 0
#                 print("tmp",tmp)
    for i in range(record.shape[0]):
        if record[i] == 1:
            tmp = np.append(tmp, pred_boxes[i][np.newaxis,:], axis=0)
#     print('times', times, tmp[1:])      
    if times != 0 :
        result = combine_boxes(tmp[1:])
#         print('result',result)
        return result
    else :
#         print('tmp', tmp[1:])
        return tmp[1:]

# print(combine_boxes(np.array([[395.89865, 236.78769, 403.68912, 244.99106]
#  ,[391.71582, 233.57025, 406.8557,  249.33136]
#  ,[395.77515, 239.97034, 403.75067, 248.48788]])))
# print(combine_boxes(np.array([[4, 2, 6,  4]
#  ,[5, 3, 8, 6]
#  ,[6, 2, 8, 6]])))

In [10]:
def recall_coverage(bbox_a, bbox_b):
    if bbox_a.shape[1]!=4 or bbox_b.shape[1]!=4:
        raise IndexError
    tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
    br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])

    area_i = np.prod(br-tl, axis=2) * (tl<br).all(axis=2)
    area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
    area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)

    return area_i / area_b

In [11]:
import numpy as np

# Create the original array
arr1 = np.array([[1, 2, 3], [4, 5, 6]])

# Create the array to append
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

# Append arr2 to arr1 along axis 0 (rows)
result = np.append(arr1, arr2, axis=0)

print(result)


[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [14]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.models.detection.transform import GeneralizedRCNNTransform

model = models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
model.transform = GeneralizedRCNNTransform(512,512,[0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
in_feature = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feature, 2)
model.backbone.body.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
aspect_ratios = [(0.5, 1.0, 2.0), (0.5, 1.0, 2.0), (0.5, 1.0, 2.0), (0.5, 1.0, 2.0), (0.5, 1.0, 2.0)]
sizes = ((4,), (8,), (16,), (32,), (64,)) 
model.rpn.anchor_generator = AnchorGenerator(sizes=sizes, aspect_ratios=aspect_ratios)

model.load_state_dict(torch.load('/kaggle/input/weight/weight.pt'))
model.cuda()
model.eval()
true_pos_gt = 0
true_neg = 0
total_boxes = 0
scans = 0
false_pos = 0
true_pos_anchor = 0
total_anchor_num = 0
iou_array = np.array([])
ratio_array = np.array([])
for step, (imgs, bboxes, labels) in enumerate(test_dataloader):
    if torch.cuda.is_available():
      imgs = imgs.cuda()
      bboxes = bboxes.cuda()
      labels = labels.cuda()
    bbox, label = 0, 0
    for i in range(imgs.shape[0]):
      bbox = bboxes[i]
      label = labels[i]
      try:
        label_num = torch.nonzero(label == 0)[0][0]
      except:
        label_num = label.shape[0]
      label = label[:label_num]
      bbox = bbox[:label_num]
    output = model(imgs)
    total_boxes += bbox.shape[0]
    scans += 1
    indices = torch.where(output[0]['scores'] > 0.5)[0]
    if indices.shape[0] > 0:
        total_anchor_num += indices.shape[0]
        if label.shape[0] > 0:
            pred_boxes = output[0]['boxes'][indices].cpu().detach().numpy()
            pred_boxes = combine_boxes(pred_boxes)
            ious = bbox_iou(pred_boxes, bbox.cpu().numpy())
            coverage_ratio = recall_coverage(pred_boxes, bbox.cpu().numpy())
            
            print('pred', pred_boxes)

            print(' bbox',  bbox.cpu().numpy())
            print('ious', ious)
            max_iou_for_gt = np.max(ious, axis=0)
            max_ratio_for_gt = np.max(coverage_ratio, axis=0)
            print('max_ratio_for_gt', max_ratio_for_gt)
        
            max_ious_per_anchor = np.max(ious, axis=1)
            print('max_ious', max_ious_per_anchor)

            true_positive_index_gt = np.where(max_ratio_for_gt > 0.7)[0]
            true_positive_index_anchor = np.where(max_ious_per_anchor > 0.3)[0]
            print('true_positive_index_gt', true_positive_index_gt)
            iou_array = np.append(iou_array, max_iou_for_gt)
            ratio_array = np.append(ratio_array, max_ratio_for_gt[true_positive_index_gt])
            true_pos_gt += true_positive_index_gt.shape[0]
            true_pos_anchor += true_positive_index_anchor.shape[0]
#         else :
#             false_pos += indices.shape[0]

print('Sensitivity : ', true_pos_gt / total_boxes, true_pos_gt, total_boxes)
print('precision : ', true_pos_anchor / total_anchor_num, true_pos_anchor, total_anchor_num)
print('IoU average : ', np.mean(iou_array))
print('Ratio average : ', np.mean(ratio_array), ratio_array.shape)
print('scans', scans)

  image = torch.tensor(image)


pred [[179.23141479 266.36688232 201.97653198 303.63049316]]
 bbox [[191 355 205 369]
 [358 171 369 185]]
ious [[-0.  0.]]
max_ratio_for_gt [-0.  0.]
max_ious [0.]
true_positive_index_gt []
pred [[391.45657349 232.90138245 406.49771118 248.20970154]]
 bbox [[395 237 404 245]]
ious [[0.31269742]]
max_ratio_for_gt [1.]
max_ious [0.31269742]
true_positive_index_gt [0]
pred [[391.49093628 232.93859863 406.49588013 248.22964478]]
 bbox [[394 237 405 245]]
ious [[0.38354038]]
max_ratio_for_gt [1.]
max_ious [0.38354038]
true_positive_index_gt [0]
pred [[391.5562439  232.86235046 406.63827515 248.48039246]]
 bbox [[394 237 406 246]]
ious [[0.45849788]]
max_ratio_for_gt [1.]
max_ious [0.45849788]
true_positive_index_gt [0]
pred [[391.51309204 233.15960693 406.97250366 248.57284546]]
 bbox [[395 237 405 246]]
ious [[0.37770754]]
max_ratio_for_gt [1.]
max_ious [0.37770754]
true_positive_index_gt [0]
pred [[391.56170654 233.30981445 406.93145752 248.9703064 ]]
 bbox [[395 238 405 246]]
ious [[0.33