In [None]:
import os
import numpy as np
import cv2
from skimage.feature import hog
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score


In [138]:
train_folder = './Dataset/dogscats_small/train'
test_folder = './Dataset/dogscats_small/test'

# Hàm đọc dữ liệu

In [139]:
def load_dataset(directory, resize = (32,32)):
    images = []
    labels = []
    
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
        label = filename.strip().split(".")[0]
        img = cv2.resize(image, dsize = resize)
        
        images.append(img)
        labels.append(label)

    return np.array(images), np.array(labels)

# Hàm rút trích đặc trưng HOG

In [140]:
def hog_features(images):
    features = []
    for img in images:
        hog_feature = hog(
            img,
            orientations = 9,
            pixels_per_cell = (8,8),
            cells_per_block = (2,2),
            block_norm="L2-Hys",
            visualize=False,
            feature_vector=True
        )

        features.append(hog_feature)
    return np.array(features)

# Logistic Regression

In [141]:
def logistic_regression(train_features, test_features, train_labels, test_labels, label_encoder):
    param_grid = {
        'C': [0.01, 0.1, 1, 10], 
        'solver': ['liblinear', 'saga']
    }
    
    logreg = LogisticRegression(max_iter=500)
    
    grid_search = GridSearchCV(logreg, param_grid, cv=5, n_jobs=-1, verbose=1)
    
    grid_search.fit(train_features, train_labels)
    
    logreg_predictions = grid_search.predict(test_features)
    print("Logistic Regression - Accuracy: {:.2f}%".format(accuracy_score(test_labels, logreg_predictions) * 100))
    print("Logistic Regression - Classification Report:")
    print(classification_report(test_labels, logreg_predictions, target_names=label_encoder.classes_))
    
    return grid_search.best_params_

# KNN

In [142]:
def KNN(train_features, test_features, train_labels, test_labels, label_encoder):

    param_grid = {
        'n_neighbors': [1, 3, 5, 7], 
        'weights': ['uniform', 'distance'],  
        'metric': ['euclidean']  
    }
    
    knn = KNeighborsClassifier()
    

    grid_search = GridSearchCV(knn, param_grid, cv=5, n_jobs=-1, verbose=1)
    

    grid_search.fit(train_features, train_labels)
    

    knn_predictions = grid_search.predict(test_features)
    
    print("KNN - Accuracy: {:.2f}%".format(accuracy_score(test_labels, knn_predictions) * 100))
    print("KNN - Classification Report:")
    print(classification_report(test_labels, knn_predictions, target_names=label_encoder.classes_))
    

    return grid_search.best_params_

# Run

In [143]:
# Đọc dữ liệu
train_imgs, train_labels = load_dataset(train_folder)
test_imgs, test_labels = load_dataset(test_folder)

In [144]:
# Rút trích đặc trưng HOG
train_features = hog_features(train_imgs)
test_features = hog_features(test_imgs)

In [145]:
# # Chuyển nhãn từ category sang dạng số
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
test_labels_encoded = label_encoder.transform(test_labels)

In [146]:
# Logistic Regression
logistic_regression(train_features, test_features, train_labels_encoded, test_labels_encoded, label_encoder)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Logistic Regression - Accuracy: 66.67%
Logistic Regression - Classification Report:
              precision    recall  f1-score   support

         cat       0.69      0.69      0.69        16
         dog       0.64      0.64      0.64        14

    accuracy                           0.67        30
   macro avg       0.67      0.67      0.67        30
weighted avg       0.67      0.67      0.67        30



{'C': 1, 'solver': 'liblinear'}

In [147]:
#KNN
KNN(train_features, test_features, train_labels, test_labels, label_encoder)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
KNN - Accuracy: 60.00%
KNN - Classification Report:
              precision    recall  f1-score   support

         cat       0.67      0.50      0.57        16
         dog       0.56      0.71      0.63        14

    accuracy                           0.60        30
   macro avg       0.61      0.61      0.60        30
weighted avg       0.61      0.60      0.60        30



{'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}