In [75]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from sklearn.svm import LinearSVC
from os import listdir
from os.path import isfile, join
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

In [76]:
def extract_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [77]:
train_files = sorted([f'./train/{f}' for f in listdir('./train') if f[0] != '.' and isfile(join('./train', f))])

In [78]:
X = [extract_histogram(cv2.imread(item)) for item in train_files]
Y = [1 if item.split('/')[-1][0] == 'c' else 0 for item in train_files]
# 1 cat, 0 dog

In [79]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=9)
#X_train, Y_train = X, Y

In [80]:
clf = LinearSVC(random_state=9, C=0.51)
clf.fit(X_train, Y_train)

In [86]:
for i in [257, 370, 125]:
    print(f"{i}: {clf.coef_[:, i]}")

257: [-0.00763417]
370: [0.0437323]
125: [-0.00026433]


In [82]:
pred = clf.predict(X_test)
print(f"Macro-f-score: {precision_recall_fscore_support(Y_test, pred, average='macro')[2]}")

Macro-f-score: 0.6103896103896105


## Check

In [83]:
final_files = sorted([f'./test/{f}.jpg' for f in ["cat.1046", "dog.1025", "cat.1042", "cat.1003"]])
final = [extract_histogram(cv2.imread(item)) for item in final_files]

In [84]:
clf.predict(final)

array([1, 0, 0, 1])