In [82]:
import os
import numpy as np
import cv2
from skimage.feature import hog
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split


In [83]:
pedestrian_folder = './Dataset/Pedestrian/Pedestrian'
non_pedestrian_folder = './Dataset/Pedestrian/NonPedestrian'

# Hàm đọc dữ liệu

In [84]:
def load_dataset(dir1, label1, dir2, label2, resize = (32,64)):
    images = []
    labels = []
    
    for filename in os.listdir(dir1):
        filepath = os.path.join(dir1, filename)
        image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(image, dsize = resize)     
        images.append(img)
        labels.append(label1)

    for filename in os.listdir(dir2):
        filepath = os.path.join(dir2, filename)
        image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(image, dsize = resize)        
        images.append(img)
        labels.append(label2)

    return np.array(images), np.array(labels)

# Hàm rút trích đặc trưng HOG

In [85]:
def hog_features(images):
    features = []
    for img in images:
        hog_feature = hog(
            img,
            orientations = 9,
            pixels_per_cell = (8,8),
            cells_per_block = (2,2),
            block_norm="L2-Hys",
            visualize=False,
            feature_vector=True
        )

        features.append(hog_feature)
    return np.array(features)

# Logistic Regression

In [86]:
def logistic_regression(train_features, test_features, train_labels, test_labels, label_encoder):
    param_grid = {
        'C': [0.01, 0.1, 1, 10], 
        'solver': ['liblinear', 'saga']
    }
    
    logreg = LogisticRegression(max_iter=500)
    
    grid_search = GridSearchCV(logreg, param_grid, cv=5, n_jobs=-1, verbose=1)
    
    grid_search.fit(train_features, train_labels)
    
    logreg_predictions = grid_search.predict(test_features)
    print("Logistic Regression - Accuracy: {:.2f}%".format(accuracy_score(test_labels, logreg_predictions) * 100))
    print("Logistic Regression - Classification Report:")
    print(classification_report(test_labels, logreg_predictions, target_names=label_encoder.classes_))
    
    return grid_search.best_params_

# KNN

In [None]:
def KNN(train_features, test_features, train_labels, test_labels, label_encoder):

    param_grid = {
        'n_neighbors': [1, 3, 5, 7], 
        'weights': ['uniform', 'distance'],  
        'metric': ['euclidean']  
    }
    
    knn = KNeighborsClassifier()
    

    grid_search = GridSearchCV(knn, param_grid, cv=5, n_jobs=-1, verbose=1)
    

    grid_search.fit(train_features, train_labels)
    

    knn_predictions = grid_search.predict(test_features)
    
    print("KNN - Accuracy: {:.2f}%".format(accuracy_score(test_labels, knn_predictions) * 100))
    print("KNN - Classification Report:")
    print(classification_report(test_labels, knn_predictions, target_names=label_encoder.classes_))
    

    return grid_search.best_params_

# Run

In [88]:
# Đọc dữ liệu
imgs, labels = load_dataset(pedestrian_folder, 'Pedestrian', non_pedestrian_folder, 'NonPedestrian')
train_imgs, test_imgs, train_labels, test_labels = train_test_split(imgs, labels, test_size=0.2, random_state=42)

In [89]:
# Rút trích đặc trưng HOG
train_features = hog_features(train_imgs)
test_features = hog_features(test_imgs)

In [90]:
# # Chuyển nhãn từ category sang dạng số
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
test_labels_encoded = label_encoder.transform(test_labels)

In [91]:
# Logistic Regression
logistic_regression(train_features, test_features, train_labels_encoded, test_labels_encoded, label_encoder)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Logistic Regression - Accuracy: 75.00%
Logistic Regression - Classification Report:
               precision    recall  f1-score   support

NonPedestrian       0.67      0.67      0.67         3
   Pedestrian       0.80      0.80      0.80         5

     accuracy                           0.75         8
    macro avg       0.73      0.73      0.73         8
 weighted avg       0.75      0.75      0.75         8



{'C': 1, 'solver': 'liblinear'}

In [92]:
#KNN
KNN(train_features, test_features, train_labels, test_labels, label_encoder)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
KNN - Accuracy: 87.50%
KNN - Classification Report:
               precision    recall  f1-score   support

NonPedestrian       0.75      1.00      0.86         3
   Pedestrian       1.00      0.80      0.89         5

     accuracy                           0.88         8
    macro avg       0.88      0.90      0.87         8
 weighted avg       0.91      0.88      0.88         8



{'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}