In [161]:
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from IPython.display import Image
from imutils import paths
import numpy as np
import cv2
import os

In [162]:
def extract_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [163]:
import glob

In [164]:
imagePaths = sorted(glob.glob('train/*'))
data = []
labels = []

for (i, imagePath) in enumerate(imagePaths):
    image = cv2.imread(imagePath, 1)
    label = imagePath.split(os.path.sep)[-1].split(".")[0]
    hist = extract_histogram(image)
    data.append(hist)
    labels.append(label)

In [168]:
labels = list(map(lambda x: x == 'cat', labels))

In [169]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, random_state=42, test_size=0.25)

In [195]:
from sklearn.svm import SVC

In [259]:
model = SVC(C = 1.44, random_state = 42)
model.fit(X_train, y_train)

SVC(C=1.44, random_state=42)

In [260]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [198]:
dtc = DecisionTreeClassifier(criterion = 'entropy', min_samples_leaf = 10, max_leaf_nodes = 20, random_state=42)
bgc = BaggingClassifier(base_estimator=dtc, n_estimators = 18, random_state = 42)
bgc.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion='entropy',
                                                        max_leaf_nodes=20,
                                                        min_samples_leaf=10,
                                                        random_state=42),
                  n_estimators=18, random_state=42)

In [199]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators = 18, criterion = 'entropy', min_samples_leaf = 10, max_leaf_nodes = 20, random_state = 42)
rfc.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', max_leaf_nodes=20,
                       min_samples_leaf=10, n_estimators=18, random_state=42)

In [248]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver='lbfgs', random_state = 42)
lr.fit(X_train, y_train)

LogisticRegression(random_state=42)

In [249]:
from sklearn.ensemble import StackingClassifier

In [250]:
sc = StackingClassifier(
    estimators=[
        ('svc', model),
        ('bgc', bgc),
        ('rfc', rfc),
        ('lr', lr)
    ], cv=2
)

sc.fit(X_train, y_train)

StackingClassifier(cv=2,
                   estimators=[('svc', SVC(C=1.44, random_state=42)),
                               ('bgc',
                                BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion='entropy',
                                                                                        max_leaf_nodes=20,
                                                                                        min_samples_leaf=10,
                                                                                        random_state=42),
                                                  n_estimators=18,
                                                  random_state=42)),
                               ('rfc',
                                RandomForestClassifier(criterion='entropy',
                                                       max_leaf_nodes=20,
                                                       min_samples_leaf=10,
                                  

In [251]:
from sklearn.model_selection import cross_val_score

In [252]:
cross_val_score(sc, X_test, y_test, scoring='accuracy', cv=2).mean()

0.492

In [253]:
imagePaths = sorted(glob.glob('test/*'))
data_test = []
labels_test = []

for (i, imagePath) in enumerate(imagePaths):
    image = cv2.imread(imagePath, 1)
    label = imagePath.split(os.path.sep)[-1].split(".")[0]
    hist = extract_histogram(image)
    data_test.append(hist)
    labels_test.append(label)

In [254]:
labels_test = list(map(lambda x: x == 'cat', labels_test))

In [255]:
images_names = ['cat.1040.jpg', 'cat.1015.jpg', 'dog.1022.jpg', 'cat.1022.jpg']

In [256]:
def get_image_features(image_name):
    image = cv2.imread('test/' + image_name, 1)
    label = imagePath.split(os.path.sep)[-1].split(".")[0]
    return extract_histogram(image)

In [257]:
features = list(map(get_image_features, images_names))

In [264]:
sc.predict_proba(features)[:, 1]

array([0.74303911, 0.70818165, 0.45644331, 0.41103122])