ktn_tips_2b

In [1]:
import time 
start_alltime = time.time()## точка отсчета времени
import cv2
import numpy as np
import os as os
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn.metrics import *
from sklearn import preprocessing
from scipy import ndimage as ndi
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def build_filters():  # построение фильтров
    filters = []
    ksize = 11
    sigma = 8.0
    lambd = 10.0
    gamma = 0.5
    psi = 0
    for theta in np.arange(0, np.pi, np.pi / 16):
        kern = cv2.getGaborKernel((ksize, ksize), sigma, theta, lambd, gamma, psi, ktype=cv2.CV_32F)
        kern /= 1.5 * kern.sum()
        filters.append(kern)
    return filters


def process(img, filters):  # применение фильтров к фото
    accum = np.zeros_like(img)
    for kern in filters:
        fimg = cv2.filter2D(img, cv2.CV_8UC3, kern)
        np.maximum(accum, fimg, accum)
        return accum


def compute_feats(image, kernels):  # Применение для фичей
    feats = np.zeros((len(kernels), 1), dtype=np.double)
    for k, kernel in enumerate(kernels):
        filtered = ndi.convolve(image, kernel, mode='wrap')
        # feats[k, 0] = filtered.mean() #среднее
        feats[k, 0] = filtered.var()  # дисперсия
    return feats


def extract_gab(images):  # Это применение к фичи
    gabs = []
    for image in images:
        gab = compute_feats(image, filters)
        gabs.append(gab)

    return np.array(gabs)

## Загрузка изображений и перевод их в массивы ##

In [3]:
def LaP_images(path):
    images = []
    filenames = os.listdir(path)
    for filename in filenames:    
        image = cv2.imread(os.path.join(path, filename),0)
        image = cv2.resize(image, dsize=(150,150))
        images.append(image)
    return np.array(images)

In [4]:
start_LaP_time = time.time()## точка отсчета времени
foil_train_dir = os.getcwd() + "\\Desert"
corduroy_train_dir = os.getcwd() + "\\DenseResidential"
cork_train_dir = os.getcwd() + "\\Forest"
catton_train_dir = os.getcwd() + "\\Meadow"
linen_train_dir = os.getcwd() + "\\River"

In [5]:
foil_train_list = LaP_images(foil_train_dir)
corduroy_train_list = LaP_images(corduroy_train_dir)
cork_train_list = LaP_images(cork_train_dir)
catton_train_list = LaP_images(catton_train_dir)
linen_train_list = LaP_images(linen_train_dir)

In [6]:
print(len(foil_train_list[1]))
print(len(foil_train_list))
print(len(corduroy_train_list))
print(len(cork_train_list))
print(len(catton_train_list))
print(len(linen_train_list))

150
217
217
217
217
217



## Формируем общий датафрейм и вытаскиваем признаки ##

In [7]:
start_EX_time = time.time()## точка отсчета времени
all_images = np.vstack((foil_train_list,
                        corduroy_train_list,
                        cork_train_list,
                        catton_train_list,
                        linen_train_list
                        ))
print(all_images.shape,"Shape before extract")

(1085, 150, 150) Shape before extract


In [8]:
labels = []
for i in range(len(foil_train_list)):
    labels.append(0)
for i in range(len(corduroy_train_list)):
    labels.append(1)
for i in range(len(cork_train_list)):
    labels.append(2)
for i in range(len(catton_train_list)):
    labels.append(3)
for i in range(len(linen_train_list)):
    labels.append(4)
    
labels = np.array(labels)
print("--- %s seconds Load and preprocessing ---" % (time.time() - start_LaP_time)) ## вывод время работы программы

--- 3.2459490299224854 seconds Load and preprocessing ---


> cv2.getGaborKernel(ksize, sigma, theta, lambda, gamma, psi, ktype) <br>
> ksize - size of gabor filter (n, n) <br>
> sigma - standard deviation of the gaussian function <br>
> theta - orientation of the normal to the parallel stripes <br>
> lambda - wavelength of the sunusoidal factor <br>
> gamma - spatial aspect ratio <br>
> psi - phase offset <br>
> ktype - type and range of values that each pixel in the gabor kernel can hold <br>

In [9]:
filters = build_filters()

In [10]:
len(filters)

16

In [11]:
X_all_gab = extract_gab(all_images)


In [12]:
print("--- %s seconds EX ---" % (time.time() - start_EX_time)) ## вывод время работы программы

--- 36.465126037597656 seconds EX ---


In [13]:
X_all_gab.shape

(1085, 16, 1)

## Объединение признаков одного фото в один массив и преобразования в датасет для классификации ##

In [14]:
def create_histograms(images):
    all_histograms = []
    for i in images:
        histogram = np.ravel(i)
        all_histograms.append(histogram)
    return np.array(all_histograms)

In [15]:
start_HIST_time = time.time()## точка отсчета времени
X_all_hist = create_histograms(X_all_gab)


print('X_all_hist\t', X_all_hist.shape)



X_all_hist	 (1085, 16)


In [16]:
col = ['GT_th_0.0_sig_4_mean',
  'GT_th_0.0_sig_4_var',
  'GT_th_0.0_sig_8_mean',
  'GT_th_0.0_sig_8_var',
  'GT_th_1.0_sig_4_mean',
  'GT_th_1.0_sig_4_var',
  'GT_th_1.0_sig_8_mean',
  'GT_th_1.0_sig_8_var',
  'GT_th_2.0_sig_4_mean',
  'GT_th_2.0_sig_4_var',
  'GT_th_2.0_sig_8_mean',
  'GT_th_2.0_sig_8_var',
  'GT_th_3.0_sig_4_mean',
  'GT_th_3.0_sig_4_var',
  'GT_th_3.0_sig_8_mean',
  'GT_th_3.0_sig_8_var']

In [17]:
result = pd.DataFrame(data = X_all_hist,columns = col)

In [18]:
#result['target'] = labels

## Обучение классификатора ##

In [19]:
X = result

scaler = preprocessing.StandardScaler().fit(X)
X_scaled = scaler.transform(X)

Y = labels

X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size = 0.30)

In [20]:
start_SVM_time = time.time()## точка отсчета времени
clf_svm = svm.SVC()
param_grid = {'C':[350,400,450],'gamma':[0.1,0.5,1.0]} #param_grid = {'C':[50,100,150],'gamma':[0.5,1,1.5],'kernel':['rbf','poly']}
grid_search_svm_clf=GridSearchCV(clf_svm,param_grid, refit = True, verbose=1)
grid_search_svm_clf.fit(X_train, Y_train)
print("--- %s seconds SVM+GridSearch fit ---" % (time.time() - start_SVM_time)) ## вывод время работы программы

Fitting 5 folds for each of 9 candidates, totalling 45 fits
--- 0.8177425861358643 seconds SVM+GridSearch fit ---


In [21]:
grid_search_svm_clf.best_estimator_

In [22]:
start_PR_time = time.time()## точка отсчета времени
best_svm = grid_search_svm_clf.best_estimator_
Y_pred_svm = best_svm.predict(X_test)
print("Accuracy:",accuracy_score(Y_test, Y_pred_svm))
print(classification_report(Y_test, Y_pred_svm))
print("--- %s seconds Predict time ---" % (time.time() - start_PR_time)) ## вывод время работы программы

Accuracy: 0.8159509202453987
              precision    recall  f1-score   support

           0       0.76      0.97      0.85        58
           1       0.85      0.86      0.85        64
           2       0.83      0.81      0.82        77
           3       0.80      0.63      0.71        65
           4       0.85      0.84      0.85        62

    accuracy                           0.82       326
   macro avg       0.82      0.82      0.81       326
weighted avg       0.82      0.82      0.81       326

--- 0.011010408401489258 seconds Predict time ---


In [23]:
print("--- %s seconds ALL ---" % (time.time() - start_alltime)) ## вывод время работы программы

--- 41.330546855926514 seconds ALL ---


In [24]:
start_KNN_time = time.time()## точка отсчета времени

model_knn = KNeighborsClassifier(n_neighbors=1)
model_knn.fit(X_train, Y_train)

print('KNN train acc\t:', model_knn.score(X_train, Y_train))
print('KNN test acc\t:', model_knn.score(X_test, Y_test))

print("--- %s seconds KNN ---" % (time.time() - start_KNN_time)) ## вывод время работы программы

KNN train acc	: 1.0
KNN test acc	: 0.7730061349693251
--- 0.09608697891235352 seconds KNN ---


In [25]:
start_TEST_time = time.time()## точка отсчета времени

foil_test_dir = os.getcwd() + "\\Desert_test"
corduroy_test_dir = os.getcwd() + "\\DenseResidential_test"
cork_test_dir = os.getcwd() + "\\Forest_test"
catton_test_dir = os.getcwd() + "\\Meadow_test"
linen_test_dir = os.getcwd() + "\\River_test"


foil_test_list = LaP_images(foil_test_dir)
corduroy_test_list = LaP_images(corduroy_test_dir)
cork_test_list = LaP_images(cork_test_dir)
catton_test_list = LaP_images(catton_test_dir)
linen_test_list = LaP_images(linen_test_dir)


all_test_images = np.vstack((foil_test_list,
                        corduroy_test_list,
                        cork_test_list,
                        catton_test_list,
                        linen_test_list
                        ))

labels_test = []
for i in range(len(foil_test_list)):
    labels_test.append(0)
for i in range(len(corduroy_test_list)):
    labels_test.append(1)
for i in range(len(cork_test_list)):
    labels_test.append(2)
for i in range(len(catton_test_list)):
    labels_test.append(3)
for i in range(len(linen_test_list)):
    labels_test.append(4)

    
labels_test = np.array(labels_test)

In [26]:
X_test_gab = extract_gab(all_test_images)
X_all_gab.shape

(1085, 16, 1)

In [27]:
X_test_hist = create_histograms(X_test_gab)

In [28]:
result_test = pd.DataFrame(data = X_test_hist,columns = col)

In [29]:
result_test

Unnamed: 0,GT_th_0.0_sig_4_mean,GT_th_0.0_sig_4_var,GT_th_0.0_sig_8_mean,GT_th_0.0_sig_8_var,GT_th_1.0_sig_4_mean,GT_th_1.0_sig_4_var,GT_th_1.0_sig_8_mean,GT_th_1.0_sig_8_var,GT_th_2.0_sig_4_mean,GT_th_2.0_sig_4_var,GT_th_2.0_sig_8_mean,GT_th_2.0_sig_8_var,GT_th_3.0_sig_4_mean,GT_th_3.0_sig_4_var,GT_th_3.0_sig_8_mean,GT_th_3.0_sig_8_var
0,62.517614,305.514206,301.636696,87.827362,78.720991,121.897097,657.458894,928.255688,151.102095,335.709199,105.315994,22.019745,18.297699,23.234923,102.277867,212.104132
1,364.516917,1192.308071,698.526823,125.456955,98.120296,162.978616,1291.866463,2895.510571,963.596078,3359.425523,2129.442562,304.645951,188.257369,235.761633,1137.700246,1502.566433
2,66.404161,306.202799,233.685265,60.532162,57.516987,109.289832,831.654380,1506.621874,305.688427,778.971981,208.961631,26.638395,19.266656,26.254364,129.615832,242.488571
3,478.073022,2159.645241,1668.201264,391.395853,352.264063,657.435075,3782.524056,4669.283715,1726.584144,3380.912374,1432.654672,205.853786,145.124956,203.526156,1056.967766,1706.005919
4,475.109105,1437.862613,722.345907,94.257551,56.299454,71.365387,360.193257,611.440982,150.595686,710.476027,485.007087,98.721635,79.571196,134.161038,886.158662,1552.159091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,1665.175788,3427.846992,3833.622406,1953.198588,1058.133938,1726.894052,3693.325889,3126.915462,1546.824557,2232.571784,1673.490656,663.229656,333.855040,415.530234,1504.431301,2636.775221
161,2392.708456,3906.278914,2926.146517,942.969171,658.449548,1061.431995,2726.663638,3359.830775,1722.998846,3870.135904,3568.553977,1430.977114,1112.210736,1780.796209,3967.272248,4397.066283
162,560.846512,2371.759132,1220.105834,148.438761,109.260485,146.366013,1060.146525,1939.746235,394.297214,1628.375573,746.136540,92.127303,68.590562,96.230284,916.813974,2123.169656
163,331.087573,1728.080788,681.422112,101.978453,84.141966,110.375521,681.525089,1826.142944,294.199792,1599.936701,499.583126,88.541710,70.524757,88.612275,655.051908,1753.031659


In [30]:
start_TEPR_time = time.time()## точка отсчета времени
Xtest = result_test

scaler = preprocessing.StandardScaler().fit(Xtest)
X_sc_test = scaler.transform(Xtest)

Ytest = labels_test

In [31]:
Y_pred_test = best_svm.predict(X_sc_test)

In [32]:
print(classification_report(Ytest, Y_pred_test))
print("--- %s seconds Test_predict ---" % (time.time() - start_TEPR_time))
print("--- %s seconds test ---" % (time.time() - start_TEST_time))

              precision    recall  f1-score   support

           0       0.75      0.18      0.29        33
           1       0.89      1.00      0.94        33
           2       0.89      0.94      0.91        33
           3       0.51      0.91      0.65        33
           4       0.73      0.58      0.64        33

    accuracy                           0.72       165
   macro avg       0.75      0.72      0.69       165
weighted avg       0.75      0.72      0.69       165

--- 0.036032915115356445 seconds Test_predict ---
--- 6.142581224441528 seconds test ---
