In [1]:
from selenium.webdriver.common.by import By
from urllib.request import urlretrieve
from selenium import webdriver
import pandas as pd
import datetime
import json
import cv2
import os

In [None]:
if not os.path.exists('data'):
    os.mkdir('data')
    os.mkdir('data/car')
    os.mkdir('data/car/crop')
    os.mkdir('data/car/msk')
    os.mkdir('data/carNumber')
    os.mkdir('data/carNumber/crop')
    os.mkdir('data/carNumber/msk')
    os.mkdir('data/crawling')
    os.mkdir('data/json')

In [2]:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')

url_main = "https://auto.danawa.com/usedcar/?Work=list&Tab=classify&Page="

driver = webdriver.Chrome(options=chrome_options)

dicts = {
    'car_name':[], 
    'car_number':[],
    'car_img':[],
}

In [3]:
for page_num in range(1, 3):
    driver.get(url_main + str(page_num))
    driver.implicitly_wait(5)
    cars_info = driver.find_elements(By.CSS_SELECTOR, 'div.col.info')[1:]

    for idx, car in enumerate(cars_info):
        dicts['car_name'].append(car.find_element(By.CSS_SELECTOR, 'a.name').text)
        dicts['car_number'].append(car.find_elements(By.CSS_SELECTOR, 'span')[-1].text)

        img_src = car.find_element(By.CSS_SELECTOR, 'img').get_attribute('src')
        img_path = f"data/crawling/img_crawling_{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}.jpg"
        urlretrieve(img_src, img_path)
        dicts['car_img'].append(img_path)
driver.quit()

In [4]:
img_path = sorted(os.listdir('data/crawling/'))
json_path = sorted(os.listdir('data/json/'))

for path in img_path:
    json_file_name = f"{path.split('_')[-1].split('.')[0]}.json"
    if json_file_name not in json_path:
        with open("data/json/" + json_file_name, 'w') as f:
            json.dump({"result":False}, f)


# 이미지 처리

In [19]:
is_drawing = False

task_type = ''
item_type = ['car', 'carNumber']

file_point = 0
item_point = 0

In [20]:
def get_task_type(key):
    task2key = {
        'q':'item_left', 'Q':'item_left',
        'e':'item_right', 'E':'item_right',
        'a':'left', 'A':'left',
        'd':'right', 'D':'right',
        'x':'exit', 'X':'exit',
    }
    return task2key[chr(key)] if chr(key) in task2key else ''

In [21]:
def load_json(file_point):
    with open("data/json/" + json_path[file_point], 'r') as f:
        return json.load(f) 

In [22]:
def save_json(dict_total, file_point):
    with open("data/json/" + json_path[file_point], 'w') as f:
        return json.dump(dict_total, f) 

In [23]:
json_path = sorted(os.listdir('data/json/'))
dict_total = load_json(file_point)

In [24]:
def draw_object(img, objs):
    for obj in objs:
        item_type = obj['type']
        pt1 = [obj['x1'], obj['y1']]
        pt2 = [obj['x2'], obj['y2']]
        
        cv2.rectangle(img, pt1, pt2, (item_type * 255, 0, 0))

In [25]:
def add_object(dict_total, obj):
    if 'data' in dict_total:
        idx = len(dict_total['data'])
    else:
        dict_total['data'] = []
        idx = 0
        
    obj['id'] = idx
    dict_total['data'].append(obj)

In [26]:
import numpy as np
def crop_object(obj):
    global img, json_path, file_point, item_type
    x1, x2, y1, y2 = obj['x1'], obj['x2'], obj['y1'], obj['y2']
    idx = obj['id']
    item_name = item_type[obj['type']]

    img_temp = img[y1:y2,x1:x2,:]
    cv2.imwrite(f"data/{item_name}/crop/{json_path[file_point].split('.')[0]}_{idx}.jpg", img_temp)

    img_temp = np.zeros(img.shape)
    img_temp = img_temp[y1:y2,x1:x2,:]
    cv2.imwrite(f"data/{item_name}/msk/{json_path[file_point].split('.')[0]}_{idx}.jpg", img_temp)

In [27]:
def mouse_crop(event, x, y, flags, param):
    global is_drawing, pt1, pt2, dict_total, file_point, item_point

    if event == cv2.EVENT_LBUTTONDOWN:
        is_drawing = True
        pt1, pt2 = (x, y), (x, y)
    elif event == cv2.EVENT_MOUSEMOVE:
        if is_drawing is True:
            pt2 = (x, y)
    elif event == cv2.EVENT_LBUTTONUP:
        is_drawing = False

        obj = {}
        # refactorying 필요
        xs = sorted([pt1[0], pt2[0]])
        ys = sorted([pt1[1], pt2[1]])

        obj['x1'], obj['x2'], obj['y1'], obj['y2'] = (xs[0], xs[1], ys[0], ys[1])
        obj['type'] = item_point
        add_object(dict_total, obj)
        
        dict_total['result'] = True

        crop_object(obj)

        save_json(dict_total, file_point)
        

In [28]:
# 무한 반복
windows_name = 'img'
cv2.namedWindow(windows_name, flags=cv2.WINDOW_NORMAL)
cv2.resizeWindow(windows_name, 800, 600)
cv2.createTrackbar("img_num", windows_name, 0, len(img_path) - 1, lambda x:x)
cv2.setTrackbarPos("img_num", windows_name, 0)
cv2.createTrackbar("item_type", windows_name, 0, len(item_type) - 1, lambda x:x)
cv2.setTrackbarPos("item_type", windows_name, 0)
cv2.setMouseCallback("img", mouse_crop)

dict_total = load_json(file_point)
while True:
    # 이미지 초기화
    img = cv2.imread('data/crawling/' + img_path[file_point])

    # # 기존 파일 출력
    if 'data' in dict_total:
        draw_object(img, dict_total['data'])
        
    # 현재 만드는 파일 출력
    if is_drawing is True:
        cv2.rectangle(img, pt1, pt2, 255)

    cv2.imshow(windows_name, img)
    
    file_point = cv2.getTrackbarPos("img_num", windows_name)
    item_point = cv2.getTrackbarPos("item_type", windows_name)
    key = cv2.waitKey(1) & 0xFF

    # 크롭중이 아니면, 테스크 확인
    if is_drawing is False:
        task_type = get_task_type(key)
        
    if task_type == 'exit':
        break
    elif task_type == 'left':
        if file_point != 0:
            cv2.setTrackbarPos("img_num", windows_name, file_point - 1)
            save_json(dict_total, file_point)
            dict_total = load_json(file_point - 1)
    elif task_type == 'right':
        if file_point != len(img_path) - 1:
            cv2.setTrackbarPos("img_num", windows_name, file_point + 1)
            save_json(dict_total, file_point)
            dict_total = load_json(file_point + 1)
            
    elif task_type == 'item_left':
        if item_point != 0:
            cv2.setTrackbarPos("item_type", windows_name, item_point - 1)
            
    elif task_type == 'item_right':
        if item_point != len(item_type) - 1:
            cv2.setTrackbarPos("item_type", windows_name, item_point + 1)
            
    task_type = ''

cv2.destroyAllWindows()