In [62]:
import os
import cv2

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedShuffleSplit, GridSearchCV
from sklearn.preprocessing import MinMaxScaler

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier

from concurrent.futures import ThreadPoolExecutor
from skimage.feature import hog

import xgboost as xgb
from scipy.stats import uniform, randint

#### Persiapan data

In [3]:
# membaca, mengubah gambar menjadi grayscale, meresize, dan mengekstrak fitur.

path_datasets = '../dataset/training_set/'
categories = ['dogs', 'cats']


def process_img(file_path, img_size=(128, 64)):
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) # membaca gambar sebagai grayscale
    if img is not None:
        # img_gray = cv2.cvtColor(file_path, cv2.COLOR_RGB2GRAY)
        img_resized = cv2.resize(img, img_size) # meresize gambar

        features, hog_image = hog(img_resized, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', visualize=True)
        
        return features.flatten() # mengubah ke bentuk 1D
    return None

def load_gambar(images):
    file_paths = []
    labels = []
    
    for index, category in enumerate(images):
        folder_path = os.path.join(path_datasets, category)
        for file in os.listdir(folder_path):
            file_paths.append(os.path.join(folder_path, file))
            labels.append(index)

    # Memproses gambar menggunakan multithreading
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(process_img, file_paths))

     # Menghapus hasil yang None (gambar gagal diproses)
    processed_images = [img for img in results if img is not None]
    filtered_labels = [label for img, label in zip(results, labels) if img is not None]

    return np.array(processed_images), np.array(filtered_labels)

X, y = load_gambar(categories)

In [4]:
print(X)
print(y)

[[0.25460957 0.19111668 0.25460957 ... 0.23696665 0.23046075 0.        ]
 [0.15473158 0.05758438 0.13765091 ... 0.01546296 0.00724765 0.00284108]
 [0.18974503 0.1020711  0.16141495 ... 0.04618565 0.05853276 0.03512658]
 ...
 [0.20911772 0.21568796 0.11519594 ... 0.22265029 0.22199465 0.06372111]
 [0.23377066 0.         0.17587877 ... 0.01827367 0.00413835 0.01651245]
 [0.22771884 0.01572228 0.         ... 0.21422292 0.27972197 0.08873595]]
[0 0 0 ... 1 1 1]


In [27]:
# Normalisasi nilai piksel
minMaxScaler = MinMaxScaler()
minMax = minMaxScaler.fit_transform(X)

minMax

array([[0.36935661, 0.40664311, 0.41745559, ..., 0.45090689, 0.53130279,
        0.        ],
       [0.22446576, 0.12252353, 0.22569121, ..., 0.02942336, 0.01670868,
        0.00524179],
       [0.27525902, 0.21717889, 0.26465451, ..., 0.08788337, 0.13494107,
        0.06480844],
       ...,
       [0.30336256, 0.45892397, 0.18887424, ..., 0.42366529, 0.51178511,
        0.11756527],
       [0.33912605, 0.        , 0.28836926, ..., 0.03477165, 0.00954052,
        0.03046543],
       [0.33034681, 0.03345264, 0.        , ..., 0.40762946, 0.64486932,
        0.16371757]])

In [28]:
X_train, X_test, y_train, y_test = train_test_split(minMax, y, test_size=0.4, random_state=24)

In [63]:
param_dist = {
    'C': uniform(0.1, 100),  # Distribusi kontinu untuk C
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto'] + list(np.logspace(-3, 1, 5)),
    'degree': randint(2, 5)  # Hanya untuk kernel poly
}

random_search = RandomizedSearchCV(
    SVC(),
    param_distributions=param_dist,
    n_iter=20,  # Jumlah kombinasi yang akan dicoba
    cv=3,  # Gunakan 3-fold CV untuk mempercepat
    scoring='accuracy',
    n_jobs=-1,  # Gunakan semua core CPU
    verbose=2
)
random_search.fit(X_train, y_train)

Fitting 3 folds for each of 20 candidates, totalling 60 fits


In [67]:
final_model = random_search.best_estimator_
y_pred_svc = final_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred_svc)
print(f'Akurasi: {accuracy * 100:.2f}%')

Akurasi: 73.42%


In [35]:
model = SVC(kernel='rbf')
model.fit(X_train, y_train)

In [36]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Akurasi: {accuracy * 100:.2f}%')

Akurasi: 74.30%


In [56]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.4, random_state=24)

In [57]:
rf = RandomForestClassifier(n_estimators=80, random_state=42)
rf.fit(X_train2, y_train2)

In [58]:
y_pred_rf = rf.predict(X_test2)
accuracy = accuracy_score(y_test2, y_pred_rf)
print(f'Akurasi: {accuracy * 100:.2f}%')

Akurasi: 67.96%


In [59]:
xgboost = xgb.XGBClassifier()
xgboost.fit(X_train2, y_train2)

In [60]:
y_pred_xg = xgboost.predict(X_test2)
accuracy = accuracy_score(y_test2, y_pred_xg)
print(f'Akurasi: {accuracy * 100:.2f}%')

Akurasi: 72.36%
