In [1]:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt

In [2]:
folder_path = './recaptcha-dataset/QC'
class_D = {}
files_and_folders = os.listdir(folder_path)

# 해당 경로의 모든 파일 및 폴더 이름 리스트 반환
files_and_folders = os.listdir(folder_path)

# 폴더 이름만 필터링하여 리스트에 저장
folder_names = [f for f in files_and_folders if os.path.isdir(os.path.join(folder_path, f))]
folder_dic = {}
print(folder_names)
for a in folder_names:
    folder_dic[a] = os.listdir(folder_path+"/"+a)
    print(a, len(folder_dic[a]))

['Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney', 'Crosswalk', 'Hydrant', 'Motorcycle', 'Palm', 'Traffic Light']
Bicycle 28
Bridge 19
Bus 17
Car 21
Chimney 20
Crosswalk 20
Hydrant 20
Motorcycle 21
Palm 19
Traffic Light 24


In [3]:
def norm_hist(hist):
   #정규화 => 이미지 크기에 상관없이 히스토그램 그려줘 - 모든 값 0-1 매핑핑
    # Normalize the histogram
    hist = hist.astype('float')
    hist /= hist.sum()
    return hist


def hist_gram(image):
    hist_b, bins_b = np.histogram(image[0], bins=128, range=(0, 256)) #bin 만큼의 차원의 feature 뽑을 수 있다.
    hist_g, bins_g = np.histogram(image[1], bins=128, range=(0, 256))
    hist_r, bins_r = np.histogram(image[2], bins=128, range=(0, 256))
    hist_b = norm_hist(hist_b)    # 256-d
    hist_g = norm_hist(hist_g)    # 256-d
    hist_r = norm_hist(hist_r)    # 256-d
    # gray histogram 입력을 grat -> bin을 128 -> 사실은 띄워져 있다.
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    hist_gray, bins_gray = np.histogram(gray, bins=128, range=(0, 256))
    hist_gray = norm_hist(hist_gray)    # 128-d

    return hist_b, hist_g, hist_r, hist_gray

In [4]:
# Law's texture
from scipy import signal as sg

def laws_texture(gray):
    (rows, cols) = gray.shape[:2] #행과 열의 개수 - 크기기
    
    #gray scale에 대해 smoothing한 이미지를 원본에서 빼준다. 5*5짜리의 mean 필터를 적용해줬다.
    smooth_kernel = (1/25)*np.ones((5,5))
    gray_smooth = sg.convolve(gray, smooth_kernel,"same")
    gray_processed = np.abs(gray - gray_smooth)
    

    filter_vectors = np.array([[ 1,  4,  6,  4, 1],    # L5
                               [-1, -2,  0,  2, 1],    # E5
                               [-1,  0,  2,  0, 1],    # S5
                               [ 1, -4,  6, -4, 1]])   # R5

    # 0:L5L5, 1:L5E5, 2:L5S5, 3:L5R5, 
    # 4:E5L5, 5:E5E5, 6:E5S5, 7:E5R5,
    # 8:S5L5, 9:S5E5, 10:S5S5, 11:S5R5, 
    # 12:R5L5, 13:R5E5, 14:R5S5, 15:R5R5
    filters = list()
    for i in range(4):
        for j in range(4):
            filters.append(np.matmul(filter_vectors[i][:].reshape(5,1),
                                     filter_vectors[j][:].reshape(1,5)))

    conv_maps = np.zeros((rows, cols,16))
    for i in range(len(filters)):
        conv_maps[:, :, i] = sg.convolve(gray_processed,
                                         filters[i],'same')
    #16개의 값 중 가장 중요한 9개 선정
    texture_maps = list()
    texture_maps.append((conv_maps[:, :, 1]+conv_maps[:, :, 4])//2)     # L5E5 / E5L5
    texture_maps.append((conv_maps[:, :, 2]+conv_maps[:, :, 8])//2)     # L5S5 / S5L5
    texture_maps.append((conv_maps[:, :, 3]+conv_maps[:, :, 12])//2)    # L5R5 / R5L5
    texture_maps.append((conv_maps[:, :, 7]+conv_maps[:, :, 13])//2)    # E5R5 / R5E5
    texture_maps.append((conv_maps[:, :, 6]+conv_maps[:, :, 9])//2)     # E5S5 / S5E5 
    texture_maps.append((conv_maps[:, :, 11]+conv_maps[:, :, 14])//2)   # S5R5 / R5S5
    texture_maps.append(conv_maps[:, :, 10])                            # S5S5
    texture_maps.append(conv_maps[:, :, 5])                             # E5E5
    texture_maps.append(conv_maps[:, :, 15])                            # R5R5
    texture_maps.append(conv_maps[:, :, 0])                             # L5L5 (use to norm TEM)하나의 feature 값으로 저장하길 원하기에 normalization => 9개의 숫자가 나온다.
#숫자 9개로 나오게 하려고고


    TEM = list()
    for i in range(9):
        TEM.append(np.abs(texture_maps[i]).sum() / np.abs(texture_maps[9]).sum())
        
    return TEM

# laws = laws_texture(gray)    # 9-d laws_texture 직접 함수로 구현하기! - 특징 벡터 제일 많이 쓸겨!
# print(laws)

In [5]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
recaptcha = './recaptcha-dataset/QC'
query_path = './query'
# labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 
#           'Crosswalk', 'Motorcycle']
labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney', 
           'Crosswalk', 'Hydrant', 'Motorcycle', 'Palm', 'Traffic Light']

sbt_train_features = []
sbt_train_labels = []
q_sbt_test_features = []

for label in labels:
    image_dir = os.path.join(recaptcha, label)
    image_list = os.listdir(image_dir)
    for i, image_name in enumerate(image_list):

        image_path = os.path.join(image_dir, image_name)
        img = cv2.imread(image_path)

        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        sbt_train_features.append(laws_texture(gray_img))
        sbt_train_labels.append(label)
        

query_list = os.listdir(query_path)
for i, q_name in enumerate(query_list):
        
        q_path = os.path.join(query_path, q_name)
        image = cv2.resize(cv2.imread(q_path),(120,120))
        
        gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
        if i < 100:
            q_sbt_test_features.append(laws_texture(gray_img))
        else:
            break


In [6]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from skimage.feature import local_binary_pattern

recaptcha = './recaptcha-dataset/QC'
# labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 
#           'Crosswalk', 'Motorcycle']
labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney', 
           'Crosswalk', 'Hydrant', 'Motorcycle', 'Palm', 'Traffic Light']

lbp_train_features = []
lbp_train_labels = []
q_lbp_test_features = []

for label in labels:
    image_dir = os.path.join(recaptcha, label)
    image_list = os.listdir(image_dir)
    for i, image_name in enumerate(image_list):

        image_path = os.path.join(image_dir, image_name)
        img = cv2.imread(image_path)

        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        lbp = local_binary_pattern(gray_img, P=8, R=1) #R=1 -> 바로 인접 8개의 픽셀값 고려하겠다. 더 멀리 보려면 R을 늘려려

        hist_lbp, bin_lbp = np.histogram(lbp.ravel(), bins=64, range=(0, 256)) #256차원을 bin값으로 축소할수도!
        hist_lbp = norm_hist(hist_lbp)    # 64-d
        lbp_train_features.append(hist_lbp)
        lbp_train_labels.append(label)

query_list = os.listdir(query_path)
for i, q_name in enumerate(query_list):
        
        q_path = os.path.join(query_path, q_name)
        image = cv2.resize(cv2.imread(q_path),(120,120))
        gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        lbp = local_binary_pattern(gray_img, P=8, R=1) #R=1 -> 바로 인접 8개의 픽셀값 고려하겠다. 더 멀리 보려면 R을 늘려려

        hist_lbp, bin_lbp = np.histogram(lbp.ravel(), bins=64, range=(0, 256)) #256차원을 bin값으로 축소할수도!
        hist_lbp = norm_hist(hist_lbp)    # 64-d

        if i < 100:
            q_lbp_test_features.append(hist_lbp)
        else:
            break



In [7]:
lst = []
for i in range(len(sbt_train_features)):
    npa = np.array(sbt_train_features[i])
    lst.append(npa)
final_train_features = np.concatenate([lbp_train_features, lst], axis = 1)
Lst = []
for i in range(len(q_sbt_test_features)):
    npa = np.array(q_sbt_test_features[i])
    Lst.append(npa)
final_test_features = np.concatenate([q_lbp_test_features, Lst], axis = 1)

final_classifier = KNeighborsClassifier(n_neighbors = 5)

final_classifier.fit(final_train_features, lbp_train_labels)
final_predict_labels = final_classifier.predict(final_test_features)
print(final_predict_labels)
print(query_list)

['Bus' 'Palm' 'Bus' 'Motorcycle' 'Chimney' 'Traffic Light' 'Motorcycle'
 'Crosswalk' 'Crosswalk' 'Bridge' 'Traffic Light' 'Palm' 'Bridge' 'Bus'
 'Crosswalk' 'Motorcycle' 'Bridge' 'Palm' 'Chimney' 'Hydrant' 'Palm' 'Bus'
 'Bridge' 'Crosswalk' 'Bicycle' 'Bridge' 'Crosswalk' 'Chimney' 'Chimney'
 'Car' 'Palm' 'Bicycle' 'Palm' 'Chimney' 'Motorcycle' 'Crosswalk'
 'Motorcycle' 'Chimney' 'Chimney' 'Palm' 'Car' 'Palm' 'Chimney' 'Bus'
 'Bus' 'Traffic Light' 'Hydrant' 'Crosswalk' 'Bus' 'Hydrant' 'Palm'
 'Bicycle' 'Bicycle' 'Chimney' 'Hydrant' 'Crosswalk' 'Traffic Light'
 'Motorcycle' 'Hydrant' 'Bridge' 'Hydrant' 'Crosswalk' 'Bridge' 'Car'
 'Bridge' 'Bicycle' 'Chimney' 'Bus' 'Hydrant' 'Crosswalk' 'Bridge' 'Bus'
 'Hydrant' 'Bus' 'Chimney' 'Bridge' 'Car' 'Bicycle' 'Hydrant' 'Car'
 'Hydrant' 'Bicycle' 'Bridge' 'Palm' 'Bridge' 'Chimney' 'Crosswalk'
 'Bridge' 'Bus' 'Chimney' 'Hydrant' 'Hydrant' 'Hydrant' 'Palm'
 'Motorcycle' 'Hydrant' 'Palm' 'Bus' 'Motorcycle' 'Chimney']
['query1.png', 'query10.png', 'q

In [8]:
neigh_ind = final_classifier.kneighbors(X = final_test_features, n_neighbors=10, return_distance=False) # Top-10 results
neigh_labels = np.array(lbp_train_labels)[neigh_ind]    

In [9]:
print(neigh_labels)

[['Hydrant' 'Bus' 'Chimney' 'Hydrant' 'Bus' 'Hydrant' 'Car' 'Motorcycle'
  'Bus' 'Bridge']
 ['Chimney' 'Traffic Light' 'Traffic Light' 'Palm' 'Palm' 'Palm'
  'Motorcycle' 'Chimney' 'Chimney' 'Bridge']
 ['Bus' 'Bus' 'Bicycle' 'Bus' 'Hydrant' 'Car' 'Bus' 'Car' 'Bus'
  'Motorcycle']
 ['Palm' 'Motorcycle' 'Chimney' 'Hydrant' 'Motorcycle' 'Traffic Light'
  'Motorcycle' 'Hydrant' 'Hydrant' 'Traffic Light']
 ['Palm' 'Bus' 'Chimney' 'Chimney' 'Crosswalk' 'Hydrant' 'Traffic Light'
  'Traffic Light' 'Bridge' 'Bicycle']
 ['Traffic Light' 'Bridge' 'Crosswalk' 'Traffic Light' 'Bicycle'
  'Crosswalk' 'Crosswalk' 'Chimney' 'Crosswalk' 'Bridge']
 ['Car' 'Motorcycle' 'Motorcycle' 'Bicycle' 'Bus' 'Bicycle' 'Bus' 'Bus'
  'Traffic Light' 'Car']
 ['Crosswalk' 'Palm' 'Traffic Light' 'Crosswalk' 'Chimney' 'Crosswalk'
  'Traffic Light' 'Chimney' 'Chimney' 'Palm']
 ['Bicycle' 'Crosswalk' 'Car' 'Bus' 'Crosswalk' 'Motorcycle' 'Crosswalk'
  'Crosswalk' 'Motorcycle' 'Chimney']
 ['Car' 'Bridge' 'Motorcycle' 'Chimne

In [11]:
import csv

with open('c1_t2_a1.csv','w') as file :
    write = csv.writer(file)
    for i in range(len(final_predict_labels)):
        write.writerow([query_list[i]] + list(neigh_labels[i]))
