In [3]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import re
from PIL import ImageFont, ImageDraw, Image
from numpy.linalg import norm
from sklearn.cluster import KMeans

In [4]:
#############################color feature parameter
color_slicing = 3
bgr_bins = [8, 8, 8]
hsv_bins = [18, 3, 3]
image_size = [256, 256]
#############################edge feature parameter
edge_slicing = 4
angel_slicing = 36
edge_dominate_thres = 12
#############################local feature parameter
cluster_nums = 80
nfeatures_less = 130
nfeatures_more = 280

In [58]:
def read_file(filename):
    categories = os.listdir(filename)
    categories.sort()
    namelist = []
    for category in categories:
        temp = []
        if category != ".DS_Store":
            temp.append(category)
            img_names = os.listdir(os.path.join(filename, category))
            img_names.sort(key=lambda x: int(re.split('[_.]', x)[-2]))
            for img_name in img_names:
                if img_name != ".DS_Store":
                    temp.append(img_name)
            namelist.append(temp)
    return namelist

########### color feature ###########
def color_hist(image):
#     plt.imshow(image)
    b, g, r = cv2.split(image)
    hsvimg = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    grayimg = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    h, s, v = cv2.split(hsvimg)
    
    rows, cols = h.shape[0], h.shape[1]
    r_gsize = int(rows / color_slicing)
    c_gsize = int(cols / color_slicing)
    
    bgr_hist = []
    hsv_hist = []
    b_hist = []
    g_hist = []
    r_hist = []
    h_hist = []
    s_hist = []
    v_hist = []
    
    for i in range(0, rows, r_gsize):
        for j in range(0, cols, c_gsize):
            row_weight = min(abs(i+(r_gsize//2)), abs(rows-(i+(r_gsize//2)))) // (r_gsize//3)
            col_weight = min(abs(j+(c_gsize//2)), abs(cols-(j+(c_gsize//2)))) // (c_gsize//3)
            total_weight = row_weight+col_weight
            bgr_hist.append((cv2.calcHist([image[i : i+r_gsize, j : j+c_gsize]], [0, 1, 2], None, bgr_bins, [0, 256, 0, 256, 0, 256]).flatten()) * total_weight)
            hsv_hist.append((cv2.calcHist([hsvimg[i : i+r_gsize, j : j+c_gsize]], [0, 1, 2], None, hsv_bins, [0, 256, 30, 256, 0, 256]).flatten()) * total_weight)
            h_hist.append((cv2.calcHist([h[i : i+r_gsize, j : j+c_gsize]], [0], None, [256], [0, 256]).flatten()) * total_weight)
            s_hist.append((cv2.calcHist([s[i : i+r_gsize, j : j+c_gsize]], [0], None, [256], [0, 256]).flatten()) * total_weight)
            v_hist.append((cv2.calcHist([v[i : i+r_gsize, j : j+c_gsize]], [0], None, [256], [0, 256]).flatten()) * total_weight) 
            
    bgr_array = np.asarray(bgr_hist).flatten()
    hsv_array = np.asarray(hsv_hist).flatten()
    h_array = np.asarray(h_hist).flatten()
    s_array = np.asarray(s_hist).flatten()
    v_array = np.asarray(v_hist).flatten()
    color_feature = np.concatenate([bgr_array, hsv_array, h_array, s_array, v_array])
    return color_feature

########### edge feature ###########
def hog(pyramid):
    edge_feature = []
    dominate_edge = 0
    for i in range(len(pyramid)):
        angle_list = []
        angle_array = np.zeros([angel_slicing])
        image = pyramid[i]
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        rows = np.shape(image)[0]
        cols = np.shape(image)[1]
        r_gsize = int(rows/(edge_slicing-i))
        c_gsize = int(cols/(edge_slicing-i))
        for r in range(0, rows, r_gsize):
            for c in range(0, cols, c_gsize):
                gx = cv2.Sobel(image[r : r+r_gsize, c : c+c_gsize], cv2.CV_32F, 1, 0, ksize=1)
                gy = cv2.Sobel(image[r : r+r_gsize, c : c+c_gsize], cv2.CV_32F, 0, 1, ksize=1)
                mag, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True)
                height = np.shape(mag)[0]
                width = np.shape(mag)[1]
                for h in range(height):
                    for w in range(width):
                        angle_array[int(angle[h][w]/(360/angel_slicing))] += mag[h][w]
                if(max(angle_array) > ((r_gsize**2*20))):
                    dominate_edge = 1
                for a in range(len(angle_array)):
                    edge_feature.append(angle_array[a])
    return edge_feature, dominate_edge
def pyramid(image, scale=2, minSize=(25, 25)):
    pyramid = []
    pyramid.append(image)
    while True:
        w = int(image.shape[1] / scale)
        if w< minSize[0]:
            break
        image = cv2.resize(image, (w, w))
        pyramid.append(image)
    return pyramid

########### local feature ###########
def HOG_sift(nfeatures, image):
    ##取得 keypoint descriptor
    sift = cv2.xfeatures2d.SIFT_create(nfeatures)
    key , des = sift.detectAndCompute(image, None)
    return des


########### evaluate ###########
def Bhattacharyya_distance(vec_a, vec_b):
    BC=np.sum(np.sqrt(vec_a*vec_b))
    return np.log(BC)
def coosine_similarity(vec_a, vec_b):
    return np.dot(vec_a, vec_b)/(norm(vec_a)*norm(vec_b))
def evaluate(feature, distance_flag):
    AP = np.zeros([700])
    for i in range(700):
        print(i, end='\r')
        score = {}
        for j in range(700):
            if i!=j:
                if distance_flag==1:
                    score[j] = coosine_similarity(feature[i], feature[j])
                elif distance_flag==2:
                    score[j] = Bhattacharyya_distance(feature[i], feature[j])
        num_of_retrive = 0
        num_of_correct = 0
        Precision = 0
        sortlist =  sorted(score, key=score.get, reverse=True)
        for j in sortlist:
            num_of_retrive += 1
            if (i//20)==(j//20): ##代表同一個category
                num_of_correct += 1
                Precision += (num_of_correct/num_of_retrive)
        AP[i] = Precision/19
    return AP
def evaluate_fusion(color_feature, edge_feature, local_feature):
    AP = np.zeros([700])
    for i in range(700):
        print(i, end='\r')
        i_category = i//20
        score_c = {}
        score_e = {}
        score_l = {}
        for j in range(700):
            if i!=j:
                score_c[j] = Bhattacharyya_distance(color_feature[i], color_feature[j])          
                score_e[j] = coosine_similarity(edge_feature[i], edge_feature[j])*3 + Bhattacharyya_distance(color_feature[i], color_feature[j])
                score_l[j] = coosine_similarity(local_feature[i], local_feature[j]) + Bhattacharyya_distance(color_feature[i], color_feature[j])
                    
        total_rank_array = np.zeros([700])
        total_rank = {}
        sortlist_c =  sorted(score_c, key=score_c.get, reverse=True)
        sortlist_e =  sorted(score_e, key=score_e.get, reverse=True)
        sortlist_l =  sorted(score_l, key=score_l.get, reverse=True)
        for j in range(699):
            total_rank_array[sortlist_c[j]] += j
            total_rank_array[sortlist_e[j]] += j
            total_rank_array[sortlist_l[j]] += j
        for j in range(699):
            total_rank[j] = total_rank_array[j]
        sortlist = sorted(total_rank, key=total_rank.get)
        num_of_retrive = 0
        num_of_correct = 0
        Precision = 0
        for j in sortlist:
            if(i!=j):
                num_of_retrive += 1
                if (i//20)==(j//20): ##代表同一個category
                    num_of_correct += 1
                    Precision += (num_of_correct/num_of_retrive)
        AP[i] = Precision/19
    return AP
def MAP(namelist, AP):
    MAP_dict = {}
    MAP_array = np.zeros([35])
    for i in range(700):
        MAP_array[i//20] += AP[i]
        if i%20==19:
            MAP_array[i//20] /= 20
    for i in range(35):
        MAP_dict[i] = MAP_array[i]
    for i in  sorted(MAP_dict, key=MAP_dict.get, reverse=True):
        print(namelist[i][0], " : ", MAP_array[i])
    print("mean: ", np.mean(MAP_array))
    print("max: ", max(MAP_array))
    return MAP_array

In [29]:
############讀檔############
Folder = "HW2-database-20f"
filename = os.path.join(Folder, "database")
##namelist 第一個 column 存 category 的資料夾名稱，其餘 20 個 column 為圖片檔名
##namelist 共有 35 個 row
namelist = read_file(filename)
namelist_700 = []
for i in range(len(namelist)):
    category_path = os.path.join(filename, namelist[i][0])
    for j in range(1, len(namelist[i])):
        namelist_700.append(os.path.join(category_path, namelist[i][j]))

In [102]:
############color feature############
color_featurelist = []
for i in range(len(namelist)):
    category_path = os.path.join(filename, namelist[i][0])
    for j in range(1, len(namelist[i])):
        image = cv2.imread(os.path.join(category_path, namelist[i][j]))
        res_img = cv2.resize(image, (256, 256), interpolation=cv2.INTER_LINEAR)
        crop_img = res_img[28:28+200, 28:28+200]
        color_feature = color_hist(crop_img)
        color_featurelist.append(color_feature)
print("color_featuresize :", np.shape(color_featurelist))
color_featurearray = np.asarray(color_featurelist)
np.save('color_feature', color_featurearray)

color_featuresize : (700, 23072)


In [84]:
############edge feature############
edge_featurelist = []
is_edge_category = np.zeros([35])
for i in range(len(namelist)):
    print(i, end = '\r')
    category_path = os.path.join(filename, namelist[i][0])
    for j in range(1, len(namelist[i])):
        image = cv2.imread(os.path.join(category_path, namelist[i][j]))
        res_img = cv2.resize(image, (256, 256), interpolation=cv2.INTER_LINEAR)
        crop_img = res_img[28:28+200, 28:28+200]
        image_pyramid = pyramid(crop_img)
        edge_feature, dominate_edge = hog(image_pyramid)
        edge_featurelist.append(edge_feature)
        if(dominate_edge==1):
            is_edge_category[i] += 1
print("edge_featuresize :", np.shape(edge_featurelist))
edge_featurearray = np.asarray(edge_featurelist)
np.save('edge_feature', edge_featurearray)
for i in range(35):
    if is_edge_category[i]<=edge_dominate_thres:
        is_edge_category[i] = 0
np.save('is_edge_category', is_edge_category)

edge_featuresize : (700, 1332)


In [93]:
############local feature############
sift_featurelist = [] ##較少的 descriptor for kmeans
for i in range(len(namelist)):
    category_path = os.path.join(filename, namelist[i][0])
    print(i, end = '\r')
    for j in range(1, len(namelist[i])):
        image = cv2.imread(os.path.join(category_path, namelist[i][j]))
        res_img = cv2.resize(image, (256, 256), interpolation=cv2.INTER_LINEAR)
        sift_feature = HOG_sift(nfeatures_less, res_img)
        for f in sift_feature:
            sift_featurelist.append(f)
print("sift_featuresize :", np.shape(sift_featurelist))

sift_featuresize : (82406, 128)


In [94]:
sift_featurearray = np.array(sift_featurelist)
kmeans = KMeans(n_clusters = cluster_nums, random_state = 0).fit(sift_featurearray)

In [95]:
local_featurelist = []
for i in range(len(namelist)):
    category_path = os.path.join(filename, namelist[i][0])
    print(i, end = '\r')
    for j in range(1, len(namelist[i])):
        image = cv2.imread(os.path.join(category_path, namelist[i][j]))
        res_img = cv2.resize(image, (256, 256), interpolation=cv2.INTER_LINEAR)
        sift_feature2 = HOG_sift(nfeatures_more, res_img)
        ##算圖片中的每個 descriptor 屬於哪個 cluster，local_feature 統計每個 cluster 有幾個 descriptor
        des_clusters = kmeans.predict(sift_feature2)
        local_feature = np.zeros([cluster_nums])
        for k in range(len(des_clusters)):
            local_feature[des_clusters[k]] += 1
        local_featurelist.append(local_feature)
print("local_featuresize :", np.shape(local_featurelist))
local_featurearray = np.asarray(local_featurelist)
np.save("local_feature", local_featurearray)

local_featuresize : (700, 80)


In [103]:
############evaluate color############
color_feature = np.load('color_feature.npy')
print("color_featuresize :", np.shape(color_feature))
AP_color = evaluate(color_feature, 2)
print("MAP from high to low")
MAP_color = MAP(namelist, AP_color)

color_featuresize : (700, 23072)
MAP from high to low
korean_snack  :  0.8682248986294485
gge_snack  :  0.7222368774298321
minnie_dress  :  0.6772891072500516
aloe_vera_gel  :  0.6561327788721282
sprite  :  0.6158781571365526
skirt  :  0.5150003090425773
garment  :  0.5120307205259846
hand_cream  :  0.5027433614604135
orange  :  0.46800239345810163
tennis_ball  :  0.45027150107741826
cartoon_purse  :  0.4480310180595735
women_clothes  :  0.41221595841266157
goggles  :  0.4097120194813074
lollipop  :  0.3786350571634674
cup  :  0.3257382082785237
minnie_shoes  :  0.3168750031075753
baby_shoes  :  0.31407963068376843
bottle  :  0.2979916388028838
bracelet  :  0.27260152184179076
men_clothes  :  0.26121496388062093
drum  :  0.24841748197713615
blue_pillow  :  0.24362288466402626
children_dress  :  0.2004939473802166
bicycle  :  0.1880784448892786
overalls  :  0.14698967838310134
ice_cream  :  0.13935600581695823
leather_purse  :  0.12249661467470256
glasses  :  0.11499672987801939
mouse  

In [86]:
############evaluate edge############
edge_feature = np.load('edge_feature.npy')
print("edge_featuresize :", np.shape(edge_feature))
AP_edge = evaluate(edge_feature, 1)
print("MAP from high to low")
MAP_edge = MAP(namelist, AP_edge)

edge_featuresize : (700, 1332)
MAP from high to low
gge_snack  :  0.7825110807317721
children_dress  :  0.413936643522786
bottle  :  0.36124446668736604
korean_snack  :  0.3406014138758565
aloe_vera_gel  :  0.2889467183636509
goggles  :  0.28864654072661927
cup  :  0.2622591628438726
overalls  :  0.2570930171869716
suitcase  :  0.24297745777638072
trousers  :  0.21310402595821273
minnie_shoes  :  0.2125052032232606
garment  :  0.2050339216689173
clock  :  0.18270004105462592
minnie_dress  :  0.17789476187748127
baby_shoes  :  0.17784974311215798
leather_purse  :  0.17206706590315798
nba_jersey  :  0.16365125764141694
women_clothes  :  0.15688831834077926
sprite  :  0.15613329804733908
skirt  :  0.15026332976704992
lollipop  :  0.134883599348679
bicycle  :  0.1294499523527522
chair  :  0.12440952924797397
orange  :  0.12146463426822351
cartoon_purse  :  0.11785929347691315
sweeping_robot  :  0.1142590487589346
glasses  :  0.10696914046316171
men_clothes  :  0.10352162265718172
hand_crea

In [96]:
############evaluate local############
local_feature = np.load('local_feature.npy')
print("local_featuresize :", np.shape(local_feature))
AP_local = evaluate(local_feature, 1)
print("MAP from high to low")
MAP_local = MAP(namelist, AP_local)

local_featuresize : (700, 80)
MAP from high to low
korean_snack  :  0.9584305721243529
gge_snack  :  0.8708842515015565
children_dress  :  0.45523089279042395
garment  :  0.38632170880872113
minnie_dress  :  0.3713020323074443
bracelet  :  0.37000757734504075
women_clothes  :  0.35143193772725156
blue_pillow  :  0.3370126317928464
aloe_vera_gel  :  0.30243291078442497
chair  :  0.2515697716528412
cartoon_purse  :  0.22752369556311786
baby_shoes  :  0.2230799788820073
men_clothes  :  0.21668617274696161
skirt  :  0.2121279460173938
hand_cream  :  0.14945442220015615
ice_cream  :  0.14636464118218942
minnie_shoes  :  0.14340356045136227
cup  :  0.14171274276072326
overalls  :  0.13731866043761243
clock  :  0.13088938972655256
bicycle  :  0.13011010729652334
drum  :  0.11508407426527026
orange  :  0.10044434602983196
goggles  :  0.09844854236070592
nba_jersey  :  0.09712970809762525
bottle  :  0.09233063301863663
sprite  :  0.09041020956670506
glasses  :  0.0798174838081114
trousers  :  0

In [59]:
############evaluate fusion############
color_feature = np.load('color_feature.npy')
edge_feature = np.load('edge_feature.npy')
local_feature = np.load('local_feature.npy')
AP_fusion = evaluate_fusion(color_feature, edge_feature, local_feature)
print("MAP from high to low")
MAP_fusion = MAP(namelist, AP_fusion)

MAP from high to low
gge_snack  :  0.9210122286974374
korean_snack  :  0.8834600461019504
aloe_vera_gel  :  0.7736146449301539
minnie_dress  :  0.7063430440528314
sprite  :  0.6880011199555545
garment  :  0.552174055902865
goggles  :  0.5511899078638929
skirt  :  0.5300704605734842
hand_cream  :  0.5282885691928636
orange  :  0.5264227716741668
cartoon_purse  :  0.49908864685995136
women_clothes  :  0.4754836335425475
tennis_ball  :  0.40591862400070144
cup  :  0.4002771051472126
minnie_shoes  :  0.39658473257478793
bottle  :  0.3940792066031946
lollipop  :  0.3789224411881751
baby_shoes  :  0.376916699006529
drum  :  0.30212021419284446
children_dress  :  0.2969447617146387
bicycle  :  0.2822316337598112
men_clothes  :  0.281478386847035
bracelet  :  0.26516085439156883
blue_pillow  :  0.20774364119120978
overalls  :  0.1870375452303134
ice_cream  :  0.15002332836078142
leather_purse  :  0.13801681748415537
suitcase  :  0.12294482842777246
glasses  :  0.11258458482980775
trousers  :  