ktn_tips_2b

In [1]:
import time 
start_alltime = time.time()## точка отсчета времени
import cv2
import numpy as np
import os as os
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn.metrics import *
from sklearn import preprocessing
from scipy import ndimage as ndi
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def build_filters(): #построение фильтров
        filters = []
        ksize = 11
        #sigma = 8.0
        lambd = 10.0
        gamma = 0.5
        psi = 0
        for theta in range(4):
            theta = theta / 4. * np.pi
            for sigma in (4, 8):
                kern = cv2.getGaborKernel((ksize, ksize), sigma, theta, lambd, gamma, psi, ktype=cv2.CV_32F)
                kern /= 1.5*kern.sum()
                filters.append(kern)
        return filters

In [3]:
def process(img, filters): #применение фильтров к фото
    accum = np.zeros_like(img)
    for kern in filters:
        fimg = cv2.filter2D(img, cv2.CV_8UC3, kern)
        np.maximum(accum, fimg, accum)
        return accum
    
def compute_feats(image, kernels): #Применение для фичей
    feats = np.zeros((len(kernels), 2), dtype=np.double)
    for k, kernel in enumerate(kernels):
        filtered = ndi.convolve(image, kernel, mode='wrap')
        feats[k, 0] = filtered.mean() #среднее
        feats[k, 1] = filtered.var() #дисперсия
    return feats

def extract_gab(images): #Это применение к фичи
    gabs = []
    for image in images:
        gab = compute_feats(image,filters)
        gabs.append(gab)
    
    return np.array(gabs)

## Загрузка изображений и перевод их в массивы ##

In [4]:
def LaP_images(path):
    images = []
    filenames = os.listdir(path)
    for filename in filenames:    
        image = cv2.imread(os.path.join(path, filename),0)
        image = cv2.resize(image, dsize=(150,150))
        images.append(image)
    return np.array(images)

In [5]:
start_LaP_time = time.time()## точка отсчета времени
foil_train_dir = os.getcwd() + "\\Desert"
corduroy_train_dir = os.getcwd() + "\\DenseResidential"
cork_train_dir = os.getcwd() + "\\Forest"
catton_train_dir = os.getcwd() + "\\Meadow"
linen_train_dir = os.getcwd() + "\\River"

In [6]:
foil_train_list = LaP_images(foil_train_dir)
corduroy_train_list = LaP_images(corduroy_train_dir)
cork_train_list = LaP_images(cork_train_dir)
catton_train_list = LaP_images(catton_train_dir)
linen_train_list = LaP_images(linen_train_dir)

In [7]:
print(len(foil_train_list[1]))
print(len(foil_train_list))
print(len(corduroy_train_list))
print(len(cork_train_list))
print(len(catton_train_list))
print(len(linen_train_list))

150
217
217
217
217
217



## Формируем общий датафрейм и вытаскиваем признаки ##

In [8]:
start_EX_time = time.time()## точка отсчета времени
all_images = np.vstack((foil_train_list,
                        corduroy_train_list,
                        cork_train_list,
                        catton_train_list,
                        linen_train_list
                        ))
print(all_images.shape,"Shape before extract")

(1085, 150, 150) Shape before extract


In [9]:
labels = []
for i in range(len(foil_train_list)):
    labels.append(0)
for i in range(len(corduroy_train_list)):
    labels.append(1)
for i in range(len(cork_train_list)):
    labels.append(2)
for i in range(len(catton_train_list)):
    labels.append(3)
for i in range(len(linen_train_list)):
    labels.append(4)
    
labels = np.array(labels)
print("--- %s seconds Load and preprocessing ---" % (time.time() - start_LaP_time)) ## вывод время работы программы

--- 3.293992042541504 seconds Load and preprocessing ---


> cv2.getGaborKernel(ksize, sigma, theta, lambda, gamma, psi, ktype) <br>
> ksize - size of gabor filter (n, n) <br>
> sigma - standard deviation of the gaussian function <br>
> theta - orientation of the normal to the parallel stripes <br>
> lambda - wavelength of the sunusoidal factor <br>
> gamma - spatial aspect ratio <br>
> psi - phase offset <br>
> ktype - type and range of values that each pixel in the gabor kernel can hold <br>

In [10]:
filters = build_filters()

In [11]:
len(filters)

8

In [12]:
X_all_gab = extract_gab(all_images)


In [13]:
print("--- %s seconds EX ---" % (time.time() - start_EX_time)) ## вывод время работы программы

--- 18.22455644607544 seconds EX ---


In [14]:
X_all_gab.shape

(1085, 8, 2)

## Объединение признаков одного фото в один массив и преобразования в датасет для классификации ##

In [15]:
def create_histograms(images):
    all_histograms = []
    for i in images:
        histogram = np.ravel(i)
        all_histograms.append(histogram)
    return np.array(all_histograms)

In [16]:
start_HIST_time = time.time()## точка отсчета времени
X_all_hist = create_histograms(X_all_gab)


print('X_all_hist\t', X_all_hist.shape)



X_all_hist	 (1085, 16)


In [17]:
col = ['GT_th_0.0_sig_4_mean',
  'GT_th_0.0_sig_4_var',
  'GT_th_0.0_sig_8_mean',
  'GT_th_0.0_sig_8_var',
  'GT_th_1.0_sig_4_mean',
  'GT_th_1.0_sig_4_var',
  'GT_th_1.0_sig_8_mean',
  'GT_th_1.0_sig_8_var',
  'GT_th_2.0_sig_4_mean',
  'GT_th_2.0_sig_4_var',
  'GT_th_2.0_sig_8_mean',
  'GT_th_2.0_sig_8_var',
  'GT_th_3.0_sig_4_mean',
  'GT_th_3.0_sig_4_var',
  'GT_th_3.0_sig_8_mean',
  'GT_th_3.0_sig_8_var']

In [18]:
result = pd.DataFrame(data = X_all_hist,columns = col)

In [19]:
#result['target'] = labels

In [20]:
print("--- %s seconds Features tabl ---" % (time.time() - start_HIST_time)) ## вывод время работы программы
result

--- 0.06405830383300781 seconds Features tabl ---


Unnamed: 0,GT_th_0.0_sig_4_mean,GT_th_0.0_sig_4_var,GT_th_0.0_sig_8_mean,GT_th_0.0_sig_8_var,GT_th_1.0_sig_4_mean,GT_th_1.0_sig_4_var,GT_th_1.0_sig_8_mean,GT_th_1.0_sig_8_var,GT_th_2.0_sig_4_mean,GT_th_2.0_sig_4_var,GT_th_2.0_sig_8_mean,GT_th_2.0_sig_8_var,GT_th_3.0_sig_4_mean,GT_th_3.0_sig_4_var,GT_th_3.0_sig_8_mean,GT_th_3.0_sig_8_var
0,108.564044,48.378432,108.567867,175.342994,108.564000,12.489726,108.561689,28.549217,108.567422,38.109365,108.564756,115.878696,108.569556,9.531206,108.566756,17.799321
1,105.714667,50.376274,105.718356,177.177876,105.719467,15.896057,105.718889,50.762621,105.718756,145.830857,105.714800,537.357639,105.715289,19.907295,105.717822,65.188420
2,120.786489,83.166146,120.786356,145.666312,120.782222,41.565373,120.785822,47.927328,120.786267,71.439163,120.785378,119.977360,120.780933,47.698632,120.786444,68.093994
3,97.922667,199.529931,98.105556,427.751214,97.774578,91.388118,97.772222,159.512073,98.106756,337.040692,99.476844,768.118619,97.773244,97.514715,97.776311,175.345119
4,121.163244,228.232418,121.038756,310.204009,120.761911,113.972069,120.767022,155.791232,120.877867,260.982328,120.923200,464.084324,120.764578,121.107465,120.759956,176.973534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1080,62.634311,936.396494,70.259822,2094.745115,60.606267,297.216130,61.920978,698.467089,63.607067,947.091248,71.081511,2176.747934,60.602444,227.293905,61.131111,404.232765
1081,78.740978,1153.198152,88.990756,2630.356003,75.130311,373.992175,76.990489,799.806843,75.496622,548.197100,78.614356,1227.120123,75.125822,304.737013,75.128978,412.438476
1082,75.802622,915.320375,82.504044,1927.772917,73.508444,374.337395,74.034800,720.073500,76.116044,1057.557334,84.701467,2166.092967,73.508667,300.435880,74.099556,494.970622
1083,75.333644,1901.610504,86.701378,3032.460291,71.366978,527.286172,72.021378,999.645187,74.512311,1432.183982,81.017111,2084.214329,71.369733,449.471075,71.639600,710.105356


## Обучение классификатора ##

In [21]:
X = result

scaler = preprocessing.StandardScaler().fit(X)
X_scaled = scaler.transform(X)

Y = labels

X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size = 0.30)

In [22]:
start_SVM_time = time.time()## точка отсчета времени
clf_svm = svm.SVC()
param_grid = {'C':[350,400,450],'gamma':[0.1,0.5,1.0]} #param_grid = {'C':[50,100,150],'gamma':[0.5,1,1.5],'kernel':['rbf','poly']}
grid_search_svm_clf=GridSearchCV(clf_svm,param_grid, refit = True, verbose=1)
grid_search_svm_clf.fit(X_train, Y_train)
print("--- %s seconds SVM+GridSearch fit ---" % (time.time() - start_SVM_time)) ## вывод время работы программы

Fitting 5 folds for each of 9 candidates, totalling 45 fits
--- 0.6335756778717041 seconds SVM+GridSearch fit ---


In [23]:
grid_search_svm_clf.best_estimator_

In [24]:
start_PR_time = time.time()## точка отсчета времени
best_svm = grid_search_svm_clf.best_estimator_
Y_pred_svm = best_svm.predict(X_test)
print("Accuracy:",accuracy_score(Y_test, Y_pred_svm))
print(classification_report(Y_test, Y_pred_svm))
print("--- %s seconds Predict time ---" % (time.time() - start_PR_time)) ## вывод время работы программы

Accuracy: 0.8466257668711656
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        67
           1       0.89      0.89      0.89        63
           2       0.70      0.79      0.74        66
           3       0.86      0.86      0.86        57
           4       0.85      0.71      0.78        73

    accuracy                           0.85       326
   macro avg       0.85      0.85      0.85       326
weighted avg       0.85      0.85      0.85       326

--- 0.008007049560546875 seconds Predict time ---


In [25]:
print("--- %s seconds ALL ---" % (time.time() - start_alltime)) ## вывод время работы программы

--- 23.01590847969055 seconds ALL ---


In [26]:
start_KNN_time = time.time()## точка отсчета времени

model_knn = KNeighborsClassifier(n_neighbors=1)
model_knn.fit(X_train, Y_train)

print('KNN train acc\t:', model_knn.score(X_train, Y_train))
print('KNN test acc\t:', model_knn.score(X_test, Y_test))

print("--- %s seconds KNN ---" % (time.time() - start_KNN_time)) ## вывод время работы программы

KNN train acc	: 1.0
KNN test acc	: 0.8374233128834356
--- 0.09508609771728516 seconds KNN ---


In [27]:
start_TEST_time = time.time()## точка отсчета времени

foil_test_dir = os.getcwd() + "\\Desert_test"
corduroy_test_dir = os.getcwd() + "\\DenseResidential_test"
cork_test_dir = os.getcwd() + "\\Forest_test"
catton_test_dir = os.getcwd() + "\\Meadow_test"
linen_test_dir = os.getcwd() + "\\River_test"


foil_test_list = LaP_images(foil_test_dir)
corduroy_test_list = LaP_images(corduroy_test_dir)
cork_test_list = LaP_images(cork_test_dir)
catton_test_list = LaP_images(catton_test_dir)
linen_test_list = LaP_images(linen_test_dir)


all_test_images = np.vstack((foil_test_list,
                        corduroy_test_list,
                        cork_test_list,
                        catton_test_list,
                        linen_test_list
                        ))

labels_test = []
for i in range(len(foil_test_list)):
    labels_test.append(0)
for i in range(len(corduroy_test_list)):
    labels_test.append(1)
for i in range(len(cork_test_list)):
    labels_test.append(2)
for i in range(len(catton_test_list)):
    labels_test.append(3)
for i in range(len(linen_test_list)):
    labels_test.append(4)

    
labels_test = np.array(labels_test)

In [28]:
X_test_gab = extract_gab(all_test_images)
X_all_gab.shape

(1085, 8, 2)

In [29]:
X_test_hist = create_histograms(X_test_gab)

In [30]:
result_test = pd.DataFrame(data = X_test_hist,columns = col)

In [31]:
result_test

Unnamed: 0,GT_th_0.0_sig_4_mean,GT_th_0.0_sig_4_var,GT_th_0.0_sig_8_mean,GT_th_0.0_sig_8_var,GT_th_1.0_sig_4_mean,GT_th_1.0_sig_4_var,GT_th_1.0_sig_8_mean,GT_th_1.0_sig_8_var,GT_th_2.0_sig_4_mean,GT_th_2.0_sig_4_var,GT_th_2.0_sig_8_mean,GT_th_2.0_sig_8_var,GT_th_3.0_sig_4_mean,GT_th_3.0_sig_4_var,GT_th_3.0_sig_8_mean,GT_th_3.0_sig_8_var
0,115.088711,43.718708,115.090844,62.517614,115.090800,37.402644,115.093022,78.720991,115.094578,82.537544,115.090889,151.102095,115.092578,15.989874,115.092800,18.297699
1,93.955467,146.156861,94.185156,364.516917,93.954578,44.148515,93.955111,98.120296,93.951600,315.909435,94.904489,963.596078,93.958489,74.967432,93.960444,188.257369
2,102.915378,28.935328,102.910711,66.404161,102.908578,22.504575,102.912622,57.516987,102.912311,91.271111,102.912978,305.688427,102.913422,10.974015,102.914044,19.266656
3,111.509778,193.259593,111.509422,478.073022,111.507067,121.538039,111.508400,352.264063,111.504089,525.558694,112.376756,1726.584144,111.507156,64.016438,111.512444,145.124956
4,101.697556,143.830972,101.697556,475.109105,101.697067,20.985831,101.698133,56.299454,101.700489,55.070249,101.698222,150.595686,101.700133,27.961858,101.694489,79.571196
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,49.031511,773.724651,56.080489,1665.175788,47.091644,302.749912,50.228756,1058.133938,51.567067,1185.857680,55.237156,1546.824557,47.091467,187.049412,47.615200,333.855040
161,51.151511,1048.278244,61.225067,2392.708456,47.552400,224.373565,49.230667,658.449548,49.112178,696.928216,56.359689,1722.998846,47.551244,249.317418,51.894711,1112.210736
162,56.859244,147.289210,58.698044,560.846512,56.832311,55.631658,56.835200,109.260485,56.833911,125.628459,57.908667,394.297214,56.836222,40.868244,56.834489,68.590562
163,52.047022,107.753611,52.604267,331.087573,52.045911,55.632870,52.043600,84.141966,52.046133,114.443472,52.458000,294.199792,52.043956,50.603446,52.045689,70.524757


In [32]:
start_TEPR_time = time.time()## точка отсчета времени
Xtest = result_test

scaler = preprocessing.StandardScaler().fit(Xtest)
X_sc_test = scaler.transform(Xtest)

Ytest = labels_test

In [33]:
Y_pred_test = best_svm.predict(X_sc_test)

In [34]:
print(classification_report(Ytest, Y_pred_test))
print("--- %s seconds Test_predict ---" % (time.time() - start_TEPR_time))
print("--- %s seconds test ---" % (time.time() - start_TEST_time))

              precision    recall  f1-score   support

           0       0.97      0.97      0.97        33
           1       0.92      1.00      0.96        33
           2       0.82      0.85      0.84        33
           3       0.76      0.94      0.84        33
           4       1.00      0.64      0.78        33

    accuracy                           0.88       165
   macro avg       0.89      0.88      0.88       165
weighted avg       0.89      0.88      0.88       165

--- 0.03503227233886719 seconds Test_predict ---
--- 3.422109603881836 seconds test ---
