In [2]:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt

In [3]:
folder_path = './recaptcha-dataset/Large'
class_D = {}
files_and_folders = os.listdir(folder_path)

# 해당 경로의 모든 파일 및 폴더 이름 리스트 반환
files_and_folders = os.listdir(folder_path)

# 폴더 이름만 필터링하여 리스트에 저장
folder_names = [f for f in files_and_folders if os.path.isdir(os.path.join(folder_path, f))]
folder_dic = {}
print(folder_names)
for a in folder_names:
    folder_dic[a] = os.listdir(folder_path+"/"+a)
    print(a, len(folder_dic[a]))

['Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney', 'Crosswalk', 'Hydrant', 'Motorcycle', 'Palm', 'Traffic Light']
Bicycle 468
Bridge 765
Bus 1170
Car 453
Chimney 198
Crosswalk 462
Hydrant 714
Motorcycle 156
Palm 339
Traffic Light 288


In [4]:
def norm_hist(hist):
   #정규화 => 이미지 크기에 상관없이 히스토그램 그려줘 - 모든 값 0-1 매핑핑
    # Normalize the histogram
    hist = hist.astype('float')
    hist /= hist.sum()
    return hist


def hist_gram(image):
    hist_b, bins_b = np.histogram(image[0], bins=128, range=(0, 256)) #bin 만큼의 차원의 feature 뽑을 수 있다.
    hist_g, bins_g = np.histogram(image[1], bins=128, range=(0, 256))
    hist_r, bins_r = np.histogram(image[2], bins=128, range=(0, 256))
    hist_b = norm_hist(hist_b)    # 256-d
    hist_g = norm_hist(hist_g)    # 256-d
    hist_r = norm_hist(hist_r)    # 256-d
    # gray histogram 입력을 grat -> bin을 128 -> 사실은 띄워져 있다.
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    hist_gray, bins_gray = np.histogram(gray, bins=128, range=(0, 256))
    hist_gray = norm_hist(hist_gray)    # 128-d

    return hist_b, hist_g, hist_r, hist_gray

In [5]:
# Law's texture
from scipy import signal as sg

def laws_texture(gray):
    (rows, cols) = gray.shape[:2] #행과 열의 개수 - 크기기
    
    #gray scale에 대해 smoothing한 이미지를 원본에서 빼준다. 5*5짜리의 mean 필터를 적용해줬다.
    smooth_kernel = (1/25)*np.ones((5,5))
    gray_smooth = sg.convolve(gray, smooth_kernel,"same")
    gray_processed = np.abs(gray - gray_smooth)
    

    filter_vectors = np.array([[ 1,  4,  6,  4, 1],    # L5
                               [-1, -2,  0,  2, 1],    # E5
                               [-1,  0,  2,  0, 1],    # S5
                               [ 1, -4,  6, -4, 1]])   # R5

    # 0:L5L5, 1:L5E5, 2:L5S5, 3:L5R5, 
    # 4:E5L5, 5:E5E5, 6:E5S5, 7:E5R5,
    # 8:S5L5, 9:S5E5, 10:S5S5, 11:S5R5, 
    # 12:R5L5, 13:R5E5, 14:R5S5, 15:R5R5
    filters = list()
    for i in range(4):
        for j in range(4):
            filters.append(np.matmul(filter_vectors[i][:].reshape(5,1),
                                     filter_vectors[j][:].reshape(1,5)))

    conv_maps = np.zeros((rows, cols,16))
    for i in range(len(filters)):
        conv_maps[:, :, i] = sg.convolve(gray_processed,
                                         filters[i],'same')
    #16개의 값 중 가장 중요한 9개 선정
    texture_maps = list()
    texture_maps.append((conv_maps[:, :, 1]+conv_maps[:, :, 4])//2)     # L5E5 / E5L5
    texture_maps.append((conv_maps[:, :, 2]+conv_maps[:, :, 8])//2)     # L5S5 / S5L5
    texture_maps.append((conv_maps[:, :, 3]+conv_maps[:, :, 12])//2)    # L5R5 / R5L5
    texture_maps.append((conv_maps[:, :, 7]+conv_maps[:, :, 13])//2)    # E5R5 / R5E5
    texture_maps.append((conv_maps[:, :, 6]+conv_maps[:, :, 9])//2)     # E5S5 / S5E5 
    texture_maps.append((conv_maps[:, :, 11]+conv_maps[:, :, 14])//2)   # S5R5 / R5S5
    texture_maps.append(conv_maps[:, :, 10])                            # S5S5
    texture_maps.append(conv_maps[:, :, 5])                             # E5E5
    texture_maps.append(conv_maps[:, :, 15])                            # R5R5
    texture_maps.append(conv_maps[:, :, 0])                             # L5L5 (use to norm TEM)하나의 feature 값으로 저장하길 원하기에 normalization => 9개의 숫자가 나온다.
#숫자 9개로 나오게 하려고고


    TEM = list()
    for i in range(9):
        TEM.append(np.abs(texture_maps[i]).sum() / np.abs(texture_maps[9]).sum())
        
    return TEM

# laws = laws_texture(gray)    # 9-d laws_texture 직접 함수로 구현하기! - 특징 벡터 제일 많이 쓸겨!
# print(laws)

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
recaptcha = './recaptcha-dataset/Large'
query_path = './query'
# labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 
#           'Crosswalk', 'Motorcycle']
labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney', 
           'Crosswalk', 'Hydrant', 'Motorcycle', 'Palm', 'Traffic Light']

sbt_train_features = []
sbt_train_labels = []
q_sbt_test_features = []

for label in labels:
    image_dir = os.path.join(recaptcha, label)
    image_list = os.listdir(image_dir)
    for i, image_name in enumerate(image_list):

        image_path = os.path.join(image_dir, image_name)
        img = cv2.imread(image_path)

        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
        if i < 150:
            sbt_train_features.append(laws_texture(gray_img))
            sbt_train_labels.append(label)
        else:
            break

query_list = os.listdir(query_path)
for i, q_name in enumerate(query_list):
        
        q_path = os.path.join(query_path, q_name)
        image = cv2.imread(q_path)
        image = cv2.resize(cv2.imread(q_path),(120,120))
        
        gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
        if i < 100:
            q_sbt_test_features.append(laws_texture(gray_img))
        else:
            break


In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from skimage.feature import local_binary_pattern

recaptcha = './recaptcha-dataset/Large'
# labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 
#           'Crosswalk', 'Motorcycle']
labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney', 
           'Crosswalk', 'Hydrant', 'Motorcycle', 'Palm', 'Traffic Light']

lbp_train_features = []
lbp_train_labels = []
q_lbp_test_features = []

for label in labels:
    image_dir = os.path.join(recaptcha, label)
    image_list = os.listdir(image_dir)
    for i, image_name in enumerate(image_list):

        image_path = os.path.join(image_dir, image_name)
        img = cv2.imread(image_path)

        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        lbp = local_binary_pattern(gray_img, P=8, R=1) #R=1 -> 바로 인접 8개의 픽셀값 고려하겠다. 더 멀리 보려면 R을 늘려려

        hist_lbp, bin_lbp = np.histogram(lbp.ravel(), bins=64, range=(0, 256)) #256차원을 bin값으로 축소할수도!
        hist_lbp = norm_hist(hist_lbp)    # 64-d


        if i < 150:
            lbp_train_features.append(hist_lbp)
            lbp_train_labels.append(label)

        else:
            break

query_list = os.listdir(query_path)
for i, q_name in enumerate(query_list):
        
        q_path = os.path.join(query_path, q_name)
        image = cv2.resize(cv2.imread(q_path),(120,120))
        gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        lbp = local_binary_pattern(gray_img, P=8, R=1) #R=1 -> 바로 인접 8개의 픽셀값 고려하겠다. 더 멀리 보려면 R을 늘려려

        hist_lbp, bin_lbp = np.histogram(lbp.ravel(), bins=64, range=(0, 256)) #256차원을 bin값으로 축소할수도!
        hist_lbp = norm_hist(hist_lbp)    # 64-d

        if i < 100:
            q_lbp_test_features.append(hist_lbp)
        else:
            break



In [11]:
lst = []
for i in range(len(sbt_train_features)):
    npa = np.array(sbt_train_features[i])
    lst.append(npa)
final_train_features = np.concatenate([lbp_train_features, lst], axis = 1)
Lst = []
for i in range(len(q_sbt_test_features)):
    npa = np.array(q_sbt_test_features[i])
    Lst.append(npa)
final_test_features = np.concatenate([q_lbp_test_features, Lst], axis = 1)

final_classifier = KNeighborsClassifier(n_neighbors = 10)

final_classifier.fit(final_train_features, lbp_train_labels)
final_predict_labels = final_classifier.predict(final_test_features)
print(final_predict_labels

['Bus' 'Palm' 'Hydrant' 'Hydrant' 'Chimney' 'Bridge' 'Bus' 'Palm'
 'Motorcycle' 'Bus' 'Palm' 'Bicycle' 'Crosswalk' 'Car' 'Crosswalk'
 'Hydrant' 'Bicycle' 'Chimney' 'Hydrant' 'Bicycle' 'Palm' 'Car' 'Bicycle'
 'Bridge' 'Palm' 'Hydrant' 'Palm' 'Chimney' 'Chimney' 'Bus' 'Palm'
 'Chimney' 'Bridge' 'Chimney' 'Hydrant' 'Car' 'Car' 'Chimney' 'Chimney'
 'Palm' 'Bicycle' 'Chimney' 'Chimney' 'Bicycle' 'Bus' 'Hydrant' 'Hydrant'
 'Crosswalk' 'Bus' 'Hydrant' 'Crosswalk' 'Bicycle' 'Chimney' 'Bridge'
 'Hydrant' 'Bridge' 'Hydrant' 'Traffic Light' 'Bicycle' 'Bus' 'Palm'
 'Crosswalk' 'Crosswalk' 'Bus' 'Crosswalk' 'Hydrant' 'Palm' 'Bus'
 'Bicycle' 'Crosswalk' 'Bicycle' 'Hydrant' 'Motorcycle' 'Car' 'Chimney'
 'Crosswalk' 'Car' 'Hydrant' 'Bicycle' 'Crosswalk' 'Hydrant' 'Hydrant'
 'Crosswalk' 'Chimney' 'Bicycle' 'Bicycle' 'Chimney' 'Hydrant' 'Bus'
 'Chimney' 'Bicycle' 'Bicycle' 'Hydrant' 'Palm' 'Car' 'Bicycle' 'Palm'
 'Palm' 'Hydrant' 'Hydrant']


In [22]:
print(query_list)

['query1.png', 'query10.png', 'query100.png', 'query11.png', 'query12.png', 'query13.png', 'query14.png', 'query15.png', 'query16.png', 'query17.png', 'query18.png', 'query19.png', 'query2.png', 'query20.png', 'query21.png', 'query22.png', 'query23.png', 'query24.png', 'query25.png', 'query26.png', 'query27.png', 'query28.png', 'query29.png', 'query3.png', 'query30.png', 'query31.png', 'query32.png', 'query33.png', 'query34.png', 'query35.png', 'query36.png', 'query37.png', 'query38.png', 'query39.png', 'query4.png', 'query40.png', 'query41.png', 'query42.png', 'query43.png', 'query44.png', 'query45.png', 'query46.png', 'query47.png', 'query48.png', 'query49.png', 'query5.png', 'query50.png', 'query51.png', 'query52.png', 'query53.png', 'query54.png', 'query55.png', 'query56.png', 'query57.png', 'query58.png', 'query59.png', 'query6.png', 'query60.png', 'query61.png', 'query62.png', 'query63.png', 'query64.png', 'query65.png', 'query66.png', 'query67.png', 'query68.png', 'query69.png',

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

#지금까지 구한 특징들에서 n차원 특징을 뽑고, KNN 사용해서 classification 할겨!
recaptcha = './recaptcha-dataset/Large'
labels = ['Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney', 
          'Crosswalk', 'Hydrant', 'Motorcycle', 'Palm', 'Traffic Light']

c_train_features = []
c_train_labels = []
c_test_features = []
c_test_labels = []
g_train_features = []
g_test_features = []

#Texture - color image + SVD?
for label in labels:
    image_dir = os.path.join(recaptcha, label)
    image_list = os.listdir(image_dir)
    for i, image_name in enumerate(image_list):
        image_path = os.path.join(image_dir, image_name)
        img = cv2.resize(cv2.imread(image_path),(120,120))
        
        hist_b, hist_g, hist_r, hist_gray = hist_gram(img)
        
        if i <150:#train 이거 숫자 바꿔주면 돼돼
            c_train_features.append(hist_b+hist_g+hist_r) #feature
            g_train_features.append(hist_gray)
            c_train_labels.append(label) #label

        else:
            break
query_list = os.listdir(query_path)
for i, q_name in enumerate(query_list):
        q_path = os.path.join(query_path, q_name)
        image = cv2.imread(q_path)
        
        hist_b, hist_g, hist_r, hist_gray = hist_gram(image)
        
        if i < 100:
            c_test_features.append(hist_r+hist_g+hist_b)
        else:
            break


In [18]:

c_final_classifier = KNeighborsClassifier(n_neighbors = 15)

c_final_classifier.fit(lbp_train_features, lbp_train_labels)
c_final_predict_labels = c_final_classifier.predict(q_lbp_test_features)
print(c_final_predict_labels)
print(query_list)

['Bus' 'Palm' 'Hydrant' 'Hydrant' 'Chimney' 'Bridge' 'Bus' 'Palm'
 'Crosswalk' 'Bus' 'Palm' 'Chimney' 'Crosswalk' 'Car' 'Crosswalk'
 'Hydrant' 'Crosswalk' 'Chimney' 'Hydrant' 'Hydrant' 'Palm' 'Bus'
 'Bicycle' 'Crosswalk' 'Palm' 'Chimney' 'Chimney' 'Chimney' 'Chimney'
 'Bus' 'Palm' 'Motorcycle' 'Bridge' 'Chimney' 'Car' 'Car' 'Car' 'Chimney'
 'Chimney' 'Palm' 'Bicycle' 'Chimney' 'Chimney' 'Hydrant' 'Bus'
 'Motorcycle' 'Bicycle' 'Crosswalk' 'Hydrant' 'Hydrant' 'Bridge' 'Palm'
 'Chimney' 'Bridge' 'Hydrant' 'Crosswalk' 'Hydrant' 'Motorcycle' 'Bicycle'
 'Bus' 'Palm' 'Crosswalk' 'Crosswalk' 'Bus' 'Crosswalk' 'Bicycle' 'Palm'
 'Bus' 'Crosswalk' 'Bicycle' 'Hydrant' 'Hydrant' 'Car' 'Car' 'Bus'
 'Crosswalk' 'Crosswalk' 'Hydrant' 'Crosswalk' 'Crosswalk' 'Hydrant' 'Bus'
 'Crosswalk' 'Chimney' 'Crosswalk' 'Chimney' 'Chimney' 'Hydrant' 'Bus'
 'Chimney' 'Crosswalk' 'Bicycle' 'Hydrant' 'Chimney' 'Car' 'Chimney'
 'Palm' 'Chimney' 'Hydrant' 'Hydrant']
['query1.png', 'query10.png', 'query100.png', 'query1

### Task1 : Classification

In [19]:
import csv

with open('c1_t1_a1.csv','w') as file :
    write = csv.writer(file)
    for i in range(len(c_final_predict_labels)):
        write.writerow([query_list[i]] + [final_predict_labels[i]])
        