In [3]:
from ultralytics import YOLO
from torch.utils.data import Dataset ,DataLoader
import os
import numpy as np
import tqdm
import cv2
import matplotlib.pyplot as plt
import mplcursors


# Load the model
model = YOLO('./best_0731.pt')  # load a pretrained model
print(type(model.names),len(model.names))

<class 'dict'> 12


In [4]:
image_path = './test/images'
label_path = './test/labels'

image_filepaths = sorted([os.path.join(image_path, f) for f in os.listdir(image_path)])
label_filepaths = sorted([os.path.join(label_path, f) for f in os.listdir(label_path)])

class TestDataset(Dataset):    
    def __init__(self, image_list, label_list):    
        self.file_list = image_list
        self.label_list = label_list
        self.img_size = 640
        
    def __len__(self):
        return len(self.file_list)

    def xywh2xyxy(self,x):
        y = np.copy(x)
        y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
        y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
        y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
        y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
        return y
    
    def __getitem__(self, idx):        
        img_path = self.file_list[idx]
        label_path = self.label_list[idx]
        
        data_list = []
        with open(label_path, 'r') as file:
            for line in file:
                line_data = line.strip().split(' ')
                data_list.append([float(num) for num in line_data])

        # 리스트를 NumPy 배열로 변환
        numpy_array = np.array(data_list)
        numpy_array[:,1:] = self.xywh2xyxy(numpy_array[:,1:]) * self.img_size
        return img_path, numpy_array
    
test_dataset = TestDataset(image_filepaths,label_filepaths)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)     #객체 수가 달라서 batch_size는 1로 진행

In [5]:
def IoU(box1, box2):
    # box = (x1, y1, x2, y2)
    box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
    box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

    # obtain x1, y1, x2, y2 of the intersection
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # compute the width and height of the intersection
    w = max(0, x2 - x1 + 1)
    h = max(0, y2 - y1 + 1)

    inter = w * h
    iou = inter / (box1_area + box2_area - inter)
    return iou

In [4]:
img_vec_dict = {}
for key in range(12):
    img_vec_dict[key] = np.array([])

croped_img_path_dict = {}
for key in range(12):
    croped_img_path_dict[key] = []

save_img_path = './crop_img_save'

iou_thres = 0.7
cnt = 0

for img_path, label in tqdm.tqdm(test_dataloader):
    origin_img = cv2.imread(img_path[0])
    results = model(img_path[0])
    box , img_vec = results[0][0].boxes.xyxy.cpu().numpy() , results[1][0].cpu().numpy()        # (객체수 , 4) / (target_num,128)
    label = label.squeeze(0).cpu().numpy()                                                                 # (실제 객체수 , 5)
    
    #print(box.shape,label.shape)
    if label.shape[0] > box.shape[0]:
        check_list = [True for _ in range(label.shape[0])]
        for ii , box_xyxy in enumerate(box):
            iou_list = []
            for i ,target_xyxy in enumerate(label):
                if check_list[i]:
                    iou_list.append(IoU(box_xyxy,target_xyxy[1:]))
                else: 
                    iou_list.append(0.0)
            iou_list = np.array(iou_list)
            #print(iou_list.shape,iou_list)
            if np.max(iou_list) < iou_thres:
                continue
            max_idx = np.argmax(iou_list)
            check_list[max_idx] = False

            
            label_idx = int(label[max_idx,0])

            croped_img = origin_img[int(box[ii,1]):int(box[ii,3]), int(box[ii,0]):int(box[ii,2])]
            cv2.imwrite(os.path.join(save_img_path,f"{str(cnt)}.jpg"), croped_img)
            croped_img_path_dict[label_idx].append(cnt)
            cnt += 1
            
            if img_vec_dict[label_idx].shape[0]:
                img_vec_dict[label_idx] = np.vstack((img_vec_dict[label_idx], img_vec[ii]))
            else:
                img_vec_dict[label_idx] = img_vec[ii]
    
    else:
        check_list = [True for _ in range(box.shape[0])]
        for ii , target_xyxy in enumerate(label):
            iou_list = []
            for i ,box_xyxy in enumerate(box):
                if check_list[i]:
                    iou_list.append(IoU(box_xyxy,target_xyxy[1:]))
                else: 
                    iou_list.append(0.0)
            iou_list = np.array(iou_list)
            #print(iou_list.shape,iou_list)
            if np.max(iou_list) < iou_thres:
                continue
            max_idx = np.argmax(iou_list)
            check_list[max_idx] = False
            label_idx = int(target_xyxy[0])

            croped_img = origin_img[int(box[max_idx,1]):int(box[max_idx,3]), int(box[max_idx,0]):int(box[max_idx,2])]
            cv2.imwrite(os.path.join(save_img_path,f"{str(cnt)}.jpg"), croped_img)
            croped_img_path_dict[label_idx].append(cnt)
            cnt += 1

            if img_vec_dict[label_idx].shape[0]:
                img_vec_dict[label_idx] = np.vstack((img_vec_dict[label_idx], img_vec[max_idx]))
            else:
                img_vec_dict[label_idx] = img_vec[max_idx]


  0%|          | 0/3282 [00:00<?, ?it/s]
  0%|          | 1/3282 [00:03<3:07:15,  3.42s/it]
  0%|          | 2/3282 [00:03<1:26:40,  1.59s/it]
  0%|          | 3/3282 [00:04<54:38,  1.00it/s]  
  0%|          | 4/3282 [00:04<39:24,  1.39it/s]
  0%|          | 5/3282 [00:04<31:27,  1.74it/s]
  0%|          | 6/3282 [00:04<26:52,  2.03it/s]
  0%|          | 7/3282 [00:05<23:29,  2.32it/s]
  0%|          | 8/3282 [00:05<21:10,  2.58it/s]
  0%|          | 9/3282 [00:05<19:41,  2.77it/s]
  0%|          | 10/3282 [00:06<18:35,  2.93it/s]
  0%|          | 11/3282 [00:06<17:39,  3.09it/s]
  0%|          | 12/3282 [00:06<16:56,  3.22it/s]
  0%|          | 13/3282 [00:07<17:04,  3.19it/s]
  0%|          | 14/3282 [00:07<16:43,  3.26it/s]
  0%|          | 15/3282 [00:07<16:18,  3.34it/s]
  0%|          | 16/3282 [00:07<16:03,  3.39it/s]
  1%|          | 17/3282 [00:08<15:54,  3.42it/s]
  1%|          | 18/3282 [00:08<16:22,  3.32it/s]
  1%|          | 19/3282 [00:08<16:12,  3.35it/s]
  1%|       

In [None]:
for i ,(k,v) in enumerate(img_vec_dict.items()):
    print(v.shape)

x_label = np.concatenate([np.full((v.shape[0],1), i) for  i,v in enumerate(img_vec_dict.values())], axis=0)
x = np.concatenate([v for v in img_vec_dict.values()], axis=0)

# croped_img_path_dict의 값들을 1차원 리스트로 합치기
image_paths_list = [image_path for image_list in croped_img_path_dict.values() for image_path in image_list]
image_paths_list = np.array(image_paths_list).reshape((-1, 1))

np.save('img_vec.npy', x)
x = np.load('img_vec.npy')

np.save('label_sequence.npy', y)
x_label = np.load('label_sequence.npy')

np.save('image_paths_list.npy', image_paths_list)
image_paths_list = np.load('image_paths_list.npy')

x_label.shape , x.shape , image_paths_list.shape

In [3]:
from sklearn.manifold import TSNE

train_x = TSNE(n_components=2,verbose=2).fit_transform(x)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 5907 samples in 0.001s...
[t-SNE] Computed neighbors for 5907 samples in 0.369s...
[t-SNE] Computed conditional probabilities for sample 1000 / 5907
[t-SNE] Computed conditional probabilities for sample 2000 / 5907
[t-SNE] Computed conditional probabilities for sample 3000 / 5907
[t-SNE] Computed conditional probabilities for sample 4000 / 5907
[t-SNE] Computed conditional probabilities for sample 5000 / 5907
[t-SNE] Computed conditional probabilities for sample 5907 / 5907
[t-SNE] Mean sigma: 2.461026
[t-SNE] Computed conditional probabilities in 0.126s
[t-SNE] Iteration 50: error = 76.1857529, gradient norm = 0.0403317 (50 iterations in 0.696s)
[t-SNE] Iteration 100: error = 66.8251953, gradient norm = 0.0141498 (50 iterations in 0.627s)
[t-SNE] Iteration 150: error = 64.1211090, gradient norm = 0.0091609 (50 iterations in 0.610s)
[t-SNE] Iteration 200: error = 62.7639351, gradient norm = 0.0080469 (50 iterations in 0.599s)
[t

In [16]:
train_x.shape

(5907, 2)

In [None]:
np.save('tsne_vec.npy',train_x)

# PCA

In [19]:
for i ,(k,v) in enumerate(img_vec_dict.items()): print(v.shape)

yy = np.concatenate([np.full((v.shape[0],1), i) for  i,v in enumerate(img_vec_dict.values())], axis=0)
xx = np.concatenate([v for v in img_vec_dict.values()], axis=0)
yy.shape , xx.shape

(492, 128)
(494, 128)
(496, 128)
(498, 128)
(499, 128)
(500, 128)
(484, 128)
(478, 128)
(495, 128)
(496, 128)
(496, 128)
(479, 128)


((5907, 1), (5907, 128))

In [8]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca_x = pca.fit_transform(xx)
print(f'주성분 중요도 : {pca.explained_variance_ratio_}')
pca_x.shape

주성분 중요도 : [    0.14306     0.11723]


(5907, 2)

In [9]:
np.save('pca_vec.npy',pca_x)

In [None]:
## 테스트용
model = YOLO('./best.pt')  # load a pretrained model
print(type(model.names),len(model.names))

image = cv2.imread('test_orignal.png')
results = model(image)
boxes = results[0][0].boxes
for box in boxes:
    cls = box.cls.tolist()[0]
    conf = box.conf.tolist()[0]
    box = box.xyxy
    box = box.tolist()[0]
    x1, y1, x2, y2 = map(int, box)
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    caption = 'class(' + str(int(cls)) + ') : ' + str(int(conf*100)) + '%'
    print(caption)
    cv2.putText(image, caption, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 10)
cv2.imwrite("test_test.jpg", image)