In [3]:
import os
import numpy as np
import cv2 
from sklearn.svm import SVC 
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import csv
import imutils

def load_image(path):
    return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2GRAY)

def display_image(image):
    plt.imshow(image, 'gray')

files = []
labels = []
with open("train/train_labels.csv", "r") as csv_file:
    csv_reader = csv.DictReader(csv_file, delimiter=',')
    for lines in csv_reader:
        files.append(lines['file'])
        label = lines['labels']
        if label == 'lilyvalley':
            labels.append(1)
        elif label == 'tigerlily':
            labels.append(2)
        elif label == 'snowdrop':
            labels.append(3)
        elif label == 'bluebell':
            labels.append(4)
        elif label == 'fritillary':
            labels.append(5)

def resize(region):
    return cv2.resize(region, (40, 50), interpolation=cv2.INTER_NEAREST)

train_dir = 'train/'

images = []

for img_name in files:
    img_path = os.path.join(train_dir, img_name)
    img = load_image(img_path)
    img = resize(img)
    images.append(img)

images_hog = []

nbins = 9 
cell_size = (8, 8) 
block_size = (2, 2)

hog = cv2.HOGDescriptor(_winSize=(img.shape[1] // cell_size[1] * cell_size[1], 
                                  img.shape[0] // cell_size[0] * cell_size[0]),
                        _blockSize=(block_size[1] * cell_size[1],
                                    block_size[0] * cell_size[0]),
                        _blockStride=(cell_size[1], cell_size[0]),
                        _cellSize=(cell_size[1], cell_size[0]),
                        _nbins=nbins)

for img in images:
    images_hog.append(hog.compute(img))
    
images_hog = np.array(images_hog)
images_results = np.array(labels)
x = images_hog
y = np.array(images_results)


def reshape_data(input_data):
    nsamples, nx, ny = input_data.shape
    return input_data.reshape((nsamples, nx*ny))

x_train = reshape_data(x)
y_train = y

clf_svm = SVC(kernel='linear', probability=True) 
clf_svm.fit(x_train, y_train)
y_train_pred = clf_svm.predict(x_train)
accuracy_score_train  = accuracy_score(y_train, y_train_pred)
print("Train accuracy: ", accuracy_score_train)
train_acccuracy_perc = str(accuracy_score_train*100) + "%"
print("Train accuracy percentage: ", train_acccuracy_perc)

files_test = []
labels_test = []
with open("test/test_labels.csv", "r") as csv_file:
    csv_reader = csv.DictReader(csv_file, delimiter=',')
    for lines in csv_reader:
        files_test.append(lines['file'])
        label = lines['labels']
        if label == 'lilyvalley':
            labels_test.append(1)
        elif label == 'tigerlily':
            labels_test.append(2)
        elif label == 'snowdrop':
            labels_test.append(3)
        elif label == 'bluebell':
            labels_test.append(4)
        elif label == 'fritillary':
            labels_test.append(5)

test_dir = 'test/'
images_test = []

for img_name in files_test:
    img_path = os.path.join(test_dir, img_name)
    img = load_image(img_path)
    img = resize(img)
    images_test.append(img)

images_test_hog = []

for img in images_test:
    images_test_hog.append(hog.compute(img))
    
images_test_hog = np.array(images_test_hog)
images_test_results = np.array(labels_test)
xx = images_test_hog
yy = np.array(images_test_results)

x_test = reshape_data(xx)
y_test = yy

y_test_pred = clf_svm.predict(x_test)
accuracy_score_test = accuracy_score(y_test, y_test_pred)
print("Test accuracy: ", accuracy_score_test)
test_acccuracy_perc = str(accuracy_score_test*100) + "%"
print("Test accuracy percentage: ", test_acccuracy_perc)


Train accuracy:  0.971875
Train accuracy percentage:  97.1875%
Test accuracy:  0.5625
Test accuracy percentage:  56.25%
