In [1]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import os
from sklearn import metrics

PATH_MELANOMA = '../DermMel/test/Melanoma'
PATH_NOTMELANOMA = '../DermMel/test/NotMelanoma'
PATH_GENERATE_DIC_MEL = '../DermMel/geracao_dicionario/melanomas'
PATH_GENERATE_DIC_NOT_MEL = '../DermMel/geracao_dicionario/nao_melanomas'


files_path_dic_melanoma = os.listdir(PATH_GENERATE_DIC_MEL) 
files_path_dic_melanoma = [PATH_GENERATE_DIC_MEL + '/' + path for path in files_path_dic_melanoma]

files_path_dic_nao_melanoma = os.listdir(PATH_GENERATE_DIC_NOT_MEL) 
files_path_dic_nao_melanoma = [PATH_GENERATE_DIC_NOT_MEL + '/' + path for path in files_path_dic_nao_melanoma]

dic_des = []
dic_nao_melanoma = []
palavras = []
palavras_nao_melanoma = []
array_desc = []

for path_img in files_path_dic_melanoma:
    
    img = cv2.imread(path_img,0) # Leitura da imagem com filtro em escada de cinza
    
    sift = cv2.xfeatures2d.SIFT_create()

    # Encontrando os pontos-de-interesse
    kp = sift.detect(img,None)

    # computando o vetor-de-característica para cada ponto detectado
    kp, des = sift.compute(img, kp)
                    
    dic_des.extend(des)       


# Clusterização por meio de Kmeans
K = 3

kmeans_model = KMeans(n_clusters=K).fit(dic_des) #dic_des são os vetores-de-características concatenados

# criando o dicionário de palavras
palavras = np.array(kmeans_model.cluster_centers_)

# print("Dicionario 1")
# print(len(palavras))

for path_img in files_path_dic_nao_melanoma:
    img = cv2.imread(path_img,0) # Leitura da imagem com filtro em escada de cinza
        
    sift = cv2.xfeatures2d.SIFT_create()
    
    kp = sift.detect(img,None)
    
    kp, des = sift.compute(img, kp)
    
    dic_nao_melanoma.extend(des)
    

kmeans_model = KMeans(n_clusters=K).fit(dic_nao_melanoma) # dic_nao_melanoma são os vetores-de-características concatenados

# criando o dicionário de palavras
palavras_nao_melanomas = np.array(kmeans_model.cluster_centers_)

# print("Palavras não melanomas")
# print(len(palavras_nao_melanomas))

array_desc.extend(palavras)
array_desc.extend(palavras_nao_melanomas)

# print("Concatenação")
# print(len(array_desc))

In [2]:
'''
Método de extração de arquivo .arff. Autor: Glauco Vitor.
'''

def gravar_arquivo_arff(base_teste, classes): # 0 para NAO_MELANOMA e 1 para MELANOMA
    tam = len(base_teste[0][0])
    file = open('dic2_lbp_sift_k3.arff','w') 
 
    file.write('@relation cancer\n') 
    for i in range(tam):
        file.write('@attribute '+ str(i) +' NUMERIC\n') 
    
    file.write('@attribute classes {')
    
    a = set(classes)
    
    for i in a:
        file.write(str(i)+',')
    
    file.write('}')    
    
    for i in range(tam):
         len(set(classes))
    
    file.write('\n@data\n') 

    for item in base_teste:
        for i in range(len(item[0])):
            file.write("%s," % str(item[0][i])) 
        file.write("%s\n" % item[1])    
 
    file.close() 
    print('arquivo gravado')


In [3]:
'''
Distância euclidiana para a atribuição dos centróides a cada elemento da imagem.
'''

def distancia(a, b):
    M = len(a)
    soma = 0
    for i in range(M):
        soma = soma + ((a[i]-b[i])**2)
    return np.sqrt(soma)


In [4]:
from skimage import feature
import numpy as np
 
'''
Descritor LBP
'''
    

def extrair_caracteristica_lbp(image):
    # compute the Local Binary Pattern representation
    # of the image, and then use the LBP representation
    # to build the histogram of patterns
    
    numPoints = 2*K
    radius = 8
    eps=1e-7
    
    lbp = feature.local_binary_pattern(image, numPoints, radius, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(),
        bins = np.arange(0, numPoints + 3),
        range = (0, numPoints + 2))

    # normalize the histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + eps)
    
    # return the histogram of Local Binary Patterns
    return hist  # hist.lenght = K + 2

In [5]:
'''
Descritor SIFT
Método de extração para todas as imagens da base de treinamento. 
O vetor de descrição tomado por base é o que foi gerado no início.
'''

def extrair_caracteristica_sift(img):
    sift = cv2.xfeatures2d.SIFT_create()
    # encontrando os pontos-de-interesse
    kp = sift.detect(img,None)
#     print('Qtde Pontos Detectados: ', len(kp))
    
    kp, des = sift.compute(img, kp)
    pontos_rotulados = []

    for ponto in des:
        min = 9999
        label = -1
        for i in range(len(palavras)):
            dist = distancia(ponto, palavras[i])
            if(dist < min):
                min = dist
                label = i
        pontos_rotulados.append(label)
    
    hist = np.zeros(K)

    for i in pontos_rotulados:
        hist[i] += 1
    
    return hist # Frequência em que cada palavra centroide aparece

In [14]:
def extrair_caracteristica(imagem):
    hist = []
    hist_lbp = extrair_caracteristica_lbp(imagem)
    hist_sift = extrair_caracteristica_sift(imagem)
    
    hist.extend(hist_lbp)
    hist.extend(hist_sift)
    
    print("hist_lbp size:")
    print(len(hist_lbp))
    print("Hist_sift size:")
    print(len(hist_sift))
    print("Histograma completo size:")
    print(len(hist))
    print("Histograma concatenado: ")
    print(hist)
    print("-------------------------------------------------------------------------------------------------")
    
    return hist
    

In [15]:
import cv2 as cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.cluster import KMeans
from sklearn import metrics

files_path_nao_melanoma = os.listdir(PATH_NOTMELANOMA) 
files_path_nao_melanoma = [PATH_NOTMELANOMA+'/'+path for path in files_path_nao_melanoma]
files_path_melanoma = os.listdir(PATH_MELANOMA)
files_path_melanoma = [PATH_MELANOMA+'/'+path for path in files_path_melanoma]


all_image_paths = [files_path_nao_melanoma, files_path_melanoma]

base = []
classes = []

#extraindo as características das imagens na base de dados
for i in range(len(all_image_paths)):
    for img_path in all_image_paths[i][:500]:
        imagem = cv2.imread(img_path, 0)
        caracteristica = extrair_caracteristica(imagem)
        classes.append(i)
        base.append((caracteristica, i))
    print('Classe final: '+str(i))
    print('Size')
    print(len(caracteristica))

gravar_arquivo_arff(base, classes)

hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.08279629629626563, 0.1286259259258783, 0.12917407407402623, 0.14131481481476246, 0.12299259259254704, 0.1313407407406921, 0.0932333333332988, 0.17052222222215907, 60.0, 4.0, 3.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.1040222222221837, 0.12741481481476763, 0.1011592592592218, 0.12354814814810239, 0.11204074074069924, 0.13091481481476633, 0.10700740740736778, 0.19389259259252078, 2.0, 2.0, 9.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.0872037037036714, 0.12337407407402838, 0.1275333333332861, 0.1456555555555016, 0.12480370370365748, 0.1254888888888424, 0.09494814814811298, 0.17099259259252927, 71.0, 53

hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.08629629629626434, 0.13477037037032044, 0.1284888888888413, 0.13271851851846936, 0.11867407407403012, 0.12744074074069353, 0.09483333333329821, 0.1767777777777123, 246.0, 94.0, 96.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.09647777777774204, 0.13143703703698836, 0.10982962962958895, 0.1238814814814356, 0.11112592592588477, 0.1311999999999514, 0.10674074074070121, 0.18930740740733729, 6.0, 9.0, 13.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.08824814814811546, 0.12153703703699202, 0.12136296296291801, 0.15218518518512883, 0.1211851851851403, 0.1232592592592136, 0.09837777777774134, 0.17384444444438005, 7

hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.09092222222218854, 0.12669999999995307, 0.10830370370366359, 0.12088888888884411, 0.11992592592588151, 0.13981851851846674, 0.109592592592552, 0.18384814814808007, 7.0, 0.0, 1.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.08634444444441247, 0.12425555555550953, 0.11944074074069651, 0.1527703703703138, 0.1286370370369894, 0.12637037037032356, 0.09292962962959521, 0.16925185185178918, 8.0, 14.0, 8.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.09671111111107529, 0.12061481481477014, 0.1029148148147767, 0.13800370370365259, 0.12485555555550931, 0.12984444444439636, 0.10137777777774024, 0.185677777777709, 9.0, 7

hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.10326296296292471, 0.13286296296291375, 0.10424074074070214, 0.11567777777773493, 0.1100555555555148, 0.13430740740735767, 0.10312222222218403, 0.1964703703702976, 13.0, 20.0, 32.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.09235185185181764, 0.1259592592592126, 0.11235925925921765, 0.13223703703698805, 0.12615555555550884, 0.13348148148143205, 0.09921111111107436, 0.17824444444437842, 4.0, 7.0, 0.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.11031481481477395, 0.13285185185180265, 0.09520370370366844, 0.10374814814810972, 0.10479629629625749, 0.13572222222217195, 0.11269259259255085, 0.20467037037029456, 

hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.08568518518515345, 0.1283666666666191, 0.11204814814810665, 0.11125185185181065, 0.10865555555551532, 0.14034074074068875, 0.1149814814814389, 0.19867037037029678, 11.0, 2.0, 3.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.09506666666663145, 0.13938518518513357, 0.12191481481476966, 0.11769999999995641, 0.10915925925921884, 0.1310074074073589, 0.09794444444440817, 0.18782222222215267, 292.0, 141.0, 85.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.08594074074070891, 0.12476296296291675, 0.12104074074069592, 0.1483333333332784, 0.12808888888884146, 0.1303777777777295, 0.09145925925922538, 0.16999629629623333,

hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.0968703703703345, 0.13266296296291383, 0.10351851851848018, 0.1111888888888477, 0.10396296296292445, 0.13492592592587596, 0.11444444444440206, 0.20242592592585096, 5.0, 7.0, 14.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.07764814814811939, 0.11801481481477111, 0.12297037037032482, 0.1626185185184583, 0.13794074074068965, 0.12606296296291628, 0.09366296296292827, 0.16108148148142182, 1.0, 0.0, 7.0]
-------------------------------------------------------------------------------------------------
hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.09965555555551865, 0.12476296296291675, 0.10909259259255219, 0.12424814814810213, 0.12331481481476914, 0.13538888888883874, 0.09843333333329687, 0.18510370370363516, 22

hist_lbp size:
8
Hist_sift size:
3
Histograma completo size:
11
Histograma concatenado: 
[0.08115555555552549, 0.120218518518474, 0.1243703703703243, 0.14570370370364974, 0.1318185185184697, 0.13589629629624597, 0.09762592592588977, 0.16321111111105066, 8.0, 2.0, 2.0]
-------------------------------------------------------------------------------------------------


KeyboardInterrupt: 