In [139]:
#https://www.geeksforgeeks.org/ml-face-recognition-using-pca-implementation/
# En iyi parametreler: {'iterated_power': 3, 'n_components': 20, 'random_state': 42, 'svd_solver': 'auto'} ile elde edilen en yüksek doğruluk: 0.4377299745258987

In [124]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import matplotlib.pyplot as plt

import os

from tqdm import tqdm
from sklearn.decomposition import PCA
import cv2
from sklearn.ensemble import RandomForestClassifier

In [125]:
def load_images_from_folder_fer_2013(folder):
    images = []
    labels = []
    for label_folder in os.listdir(folder):
        label_folder_path = os.path.join(folder, label_folder)
        if os.path.isdir(label_folder_path):
            for image_file in os.listdir(label_folder_path):
                image_path = os.path.join(label_folder_path, image_file)
                img = cv2.imread(image_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                img = cv2.resize(img, (48, 48))
                images.append(img.flatten())
                labels.append(label_folder)
    return np.array(images), np.array(labels)

In [126]:
train_folder = '../../../../Data/fer2013/train'
test_folder = '../../../../Data/fer2013/validation'
X_train, y_train = load_images_from_folder_fer_2013(train_folder)
X_test, y_test = load_images_from_folder_fer_2013(test_folder)

In [127]:
import numpy as np

def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))


In [128]:
def predict_with_euclidean_distance(X_train, y_train, X_test):
    predictions = []
    for test_sample in X_test:
        # Test örneği ile tüm eğitim örnekleri arasındaki mesafeleri hesapla
        distances = [euclidean_distance(test_sample, train_sample) for train_sample in X_train]
        # En küçük mesafeye sahip eğitim örneğinin indeksini bul
        min_index = np.argmin(distances)
        # Bu eğitim örneğinin etiketini tahmin olarak kullan
        predictions.append(y_train[min_index])
    return predictions


In [131]:
def pca_specific_component_euclidian_distance(X_train, y_train, X_test, y_test, n_components=15, pca=None):
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    
    # Eğitim seti üzerinde tahmin yap ve doğruluk oranını hesapla
    y_pred_test = predict_with_euclidean_distance(X_train_pca, y_train, X_test_pca)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    #print(f"Component: {n_components} -- Test Doğruluğu: {test_accuracy}")

    component_accuracy = {'component':  n_components, 'test_accuracy': test_accuracy}
    
    return component_accuracy

def pca_specific_component(X_train, y_train, X_test, y_test, n_components=15, pca=None):
    # pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    
    # Modeli tanımla ve eğit
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train_pca, y_train)

    # Eğitim seti üzerinde tahmin yap ve doğruluk oranını hesapla
    y_pred_train = model.predict(X_train_pca)
    train_accuracy = accuracy_score(y_train, y_pred_train)
    #print(f"Component: {n_components} --  Eğitim Doğruluğu: {train_accuracy}")

    # Test seti üzerinde tahmin yap ve doğruluk oranını hesapla
    y_pred_test = model.predict(X_test_pca)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    #print(f"Component: {n_components} -- Test Doğruluğu: {test_accuracy}")

    component_accuracy = {'component':  n_components, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy}
    
    return component_accuracy

def find_best_pca(X_train, y_train, X_test, y_test):
    component_accuracy = []

    for n_components in tqdm(range(1, 100)):
        pca = PCA(n_components=n_components)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)
        
        # Modeli tanımla ve eğit
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_pca, y_train)

        # Eğitim seti üzerinde tahmin yap ve doğruluk oranını hesapla
        y_pred_train = model.predict(X_train_pca)
        train_accuracy = accuracy_score(y_train, y_pred_train)
        #print(f"Component: {n_components} --  Eğitim Doğruluğu: {train_accuracy}")

        # Test seti üzerinde tahmin yap ve doğruluk oranını hesapla
        y_pred_test = model.predict(X_test_pca)
        test_accuracy = accuracy_score(y_test, y_pred_test)
        #print(f"Component: {n_components} -- Test Doğruluğu: {test_accuracy}")

        component_accuracy.append({'component':  n_components, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy})

        if len(component_accuracy) > 5:
            # Early stopping, if 3 times decreases
            if all(current['test_accuracy'] <= prev['test_accuracy'] for current, prev in zip(component_accuracy[-3:], component_accuracy[-4:-1])):
                print("Erken durdurma: Son 3 test doğruluğunda azalma trendi.")
                break
            threshold = 0.01
            # Early stopping, if acc dif less than threshold:0.01 times decreases
            recent_accuracies = [acc['test_accuracy'] for acc in component_accuracy[-5:]]
            if max(recent_accuracies) - min(recent_accuracies) < threshold:
                print("Erken durdurma: Son 5 test doğruluğundaki fark 0.01'den az.")
                break
        
    return component_accuracy

def best_accuracy_component(component_accuracy):
    best_accuracy = max(component_accuracy, key=lambda x:x['test_accuracy'])
    return best_accuracy

In [137]:
from sklearn.model_selection import ParameterGrid

def pca_with_parameters(X_train, y_train, X_test, y_test, params):
    # Parametre kombinasyonları için bir ızgara oluştur
    param_grid = ParameterGrid(params)
    
    best_accuracy = 0
    best_params = None
    
    for param in tqdm(list(param_grid)):
        pca = PCA(n_components=param['n_components'],
                  svd_solver=param['svd_solver'],
                  iterated_power=param['iterated_power'],
                  random_state=param['random_state'])
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)

        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_pca, y_train)
        
        y_pred = model.predict(X_test_pca)
        accuracy = accuracy_score(y_test, y_pred)
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = param
            
        print(f"Params: {param} -- Accuracy: {accuracy}")

    print(f"En iyi parametreler: {best_params} ile elde edilen en yüksek doğruluk: {best_accuracy}")
    return best_params, best_accuracy

# PCA parametrelerinin farklı kombinasyonlarını deneme
params = {
    'n_components': [15, 20],
    'svd_solver': ['auto', 'full', 'arpack', 'randomized'],
    'iterated_power': ['auto', 3, 7],
    'random_state': [42]
}

# Fonksiyonu çağırma
best_params, best_accuracy = pca_with_parameters(X_train, y_train, X_test, y_test, params)


  4%|▍         | 1/24 [00:15<05:57, 15.55s/it]

Params: {'iterated_power': 'auto', 'n_components': 15, 'random_state': 42, 'svd_solver': 'auto'} -- Accuracy: 0.43263515425983584


  8%|▊         | 2/24 [00:41<08:02, 21.92s/it]

Params: {'iterated_power': 'auto', 'n_components': 15, 'random_state': 42, 'svd_solver': 'full'} -- Accuracy: 0.4336258137560147


 12%|█▎        | 3/24 [01:01<07:16, 20.77s/it]

Params: {'iterated_power': 'auto', 'n_components': 15, 'random_state': 42, 'svd_solver': 'arpack'} -- Accuracy: 0.4336258137560147


 17%|█▋        | 4/24 [01:16<06:13, 18.68s/it]

Params: {'iterated_power': 'auto', 'n_components': 15, 'random_state': 42, 'svd_solver': 'randomized'} -- Accuracy: 0.43263515425983584


 21%|██        | 5/24 [01:34<05:50, 18.46s/it]

Params: {'iterated_power': 'auto', 'n_components': 20, 'random_state': 42, 'svd_solver': 'auto'} -- Accuracy: 0.4354656099632041


 25%|██▌       | 6/24 [02:03<06:35, 21.96s/it]

Params: {'iterated_power': 'auto', 'n_components': 20, 'random_state': 42, 'svd_solver': 'full'} -- Accuracy: 0.43631474667421455


 29%|██▉       | 7/24 [02:26<06:19, 22.35s/it]

Params: {'iterated_power': 'auto', 'n_components': 20, 'random_state': 42, 'svd_solver': 'arpack'} -- Accuracy: 0.43631474667421455


 33%|███▎      | 8/24 [02:45<05:37, 21.11s/it]

Params: {'iterated_power': 'auto', 'n_components': 20, 'random_state': 42, 'svd_solver': 'randomized'} -- Accuracy: 0.4354656099632041


 38%|███▊      | 9/24 [02:57<04:33, 18.22s/it]

Params: {'iterated_power': 3, 'n_components': 15, 'random_state': 42, 'svd_solver': 'auto'} -- Accuracy: 0.43164449476365696


 42%|████▏     | 10/24 [03:23<04:49, 20.66s/it]

Params: {'iterated_power': 3, 'n_components': 15, 'random_state': 42, 'svd_solver': 'full'} -- Accuracy: 0.4336258137560147


 46%|████▌     | 11/24 [03:42<04:21, 20.09s/it]

Params: {'iterated_power': 3, 'n_components': 15, 'random_state': 42, 'svd_solver': 'arpack'} -- Accuracy: 0.4336258137560147


 50%|█████     | 12/24 [03:53<03:29, 17.46s/it]

Params: {'iterated_power': 3, 'n_components': 15, 'random_state': 42, 'svd_solver': 'randomized'} -- Accuracy: 0.43164449476365696


 54%|█████▍    | 13/24 [04:07<03:01, 16.52s/it]

Params: {'iterated_power': 3, 'n_components': 20, 'random_state': 42, 'svd_solver': 'auto'} -- Accuracy: 0.4377299745258987


 58%|█████▊    | 14/24 [04:33<03:14, 19.40s/it]

Params: {'iterated_power': 3, 'n_components': 20, 'random_state': 42, 'svd_solver': 'full'} -- Accuracy: 0.43631474667421455


 62%|██████▎   | 15/24 [04:59<03:11, 21.26s/it]

Params: {'iterated_power': 3, 'n_components': 20, 'random_state': 42, 'svd_solver': 'arpack'} -- Accuracy: 0.43631474667421455


 67%|██████▋   | 16/24 [05:13<02:33, 19.13s/it]

Params: {'iterated_power': 3, 'n_components': 20, 'random_state': 42, 'svd_solver': 'randomized'} -- Accuracy: 0.4377299745258987


 71%|███████   | 17/24 [05:30<02:08, 18.30s/it]

Params: {'iterated_power': 7, 'n_components': 15, 'random_state': 42, 'svd_solver': 'auto'} -- Accuracy: 0.43263515425983584


 75%|███████▌  | 18/24 [05:57<02:07, 21.20s/it]

Params: {'iterated_power': 7, 'n_components': 15, 'random_state': 42, 'svd_solver': 'full'} -- Accuracy: 0.4336258137560147


 79%|███████▉  | 19/24 [06:16<01:42, 20.54s/it]

Params: {'iterated_power': 7, 'n_components': 15, 'random_state': 42, 'svd_solver': 'arpack'} -- Accuracy: 0.4336258137560147


 83%|████████▎ | 20/24 [06:33<01:17, 19.31s/it]

Params: {'iterated_power': 7, 'n_components': 15, 'random_state': 42, 'svd_solver': 'randomized'} -- Accuracy: 0.43263515425983584


 88%|████████▊ | 21/24 [06:51<00:56, 18.83s/it]

Params: {'iterated_power': 7, 'n_components': 20, 'random_state': 42, 'svd_solver': 'auto'} -- Accuracy: 0.4354656099632041


 92%|█████████▏| 22/24 [07:20<00:43, 21.92s/it]

Params: {'iterated_power': 7, 'n_components': 20, 'random_state': 42, 'svd_solver': 'full'} -- Accuracy: 0.43631474667421455


 96%|█████████▌| 23/24 [07:43<00:22, 22.41s/it]

Params: {'iterated_power': 7, 'n_components': 20, 'random_state': 42, 'svd_solver': 'arpack'} -- Accuracy: 0.43631474667421455


100%|██████████| 24/24 [08:01<00:00, 20.07s/it]

Params: {'iterated_power': 7, 'n_components': 20, 'random_state': 42, 'svd_solver': 'randomized'} -- Accuracy: 0.4354656099632041
En iyi parametreler: {'iterated_power': 3, 'n_components': 20, 'random_state': 42, 'svd_solver': 'auto'} ile elde edilen en yüksek doğruluk: 0.4377299745258987





In [141]:
from sklearn.decomposition import KernelPCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

def kernel_pca_with_parameters(X_train, y_train, X_test, y_test, params):
    # Parametre kombinasyonları için bir ızgara oluştur
    param_grid = ParameterGrid(params)
    
    best_accuracy = 0
    best_params = None
    
    for param in tqdm(list(param_grid)):
        kpca = KernelPCA(kernel=param['kernel'],
                         n_components=param['n_components'],
                         gamma=param['gamma'],
                         degree=param['degree'],
                         coef0=param['coef0'],
                         random_state=param['random_state'])
        
        # Eğitim seti üzerinde Kernel PCA uygula
        X_train_kpca = kpca.fit_transform(X_train)
        # Test seti üzerinde Kernel PCA uygula
        X_test_kpca = kpca.transform(X_test)

        # Sınıflandırıcı modelini oluştur ve eğit
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_kpca, y_train)
        
        # Test seti üzerinde tahmin yap ve doğruluk oranını hesapla
        y_pred = model.predict(X_test_kpca)
        accuracy = accuracy_score(y_test, y_pred)
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = param
        
        print(f"Params: {param} -- Accuracy: {accuracy}")

    print(f"En iyi parametreler: {best_params} ile elde edilen en yüksek doğruluk: {best_accuracy}")
    return best_params, best_accuracy

# Kernel PCA parametrelerinin farklı kombinasyonlarını deneme
params = {
    'n_components': [20],  # Bileşen sayısı
    'kernel': ['rbf', 'poly', 'sigmoid'],  # Kullanılacak çekirdek fonksiyonu
    'gamma': [0.1],  # Kernel katsayısı (rbf, poly ve sigmoid için)
    'degree': [3, 5],  # 'poly' kerneli için derece
    'coef0': [0, 1],  # 'poly' ve 'sigmoid' kerneli için bağımsız terim
    'random_state': [42]  # Rastgelelik durumu
}

best_params, best_accuracy = kernel_pca_with_parameters(X_train, y_train, X_test, y_test, params)


  8%|▊         | 1/12 [13:08<2:24:35, 788.70s/it]

Params: {'coef0': 0, 'degree': 3, 'gamma': 0.1, 'kernel': 'rbf', 'n_components': 20, 'random_state': 42} -- Accuracy: 0.2676195867534673


In [None]:
def plot_accuracy_by_components(results):
    # Component değerlerini ve karşılık gelen accuracy değerlerini çıkar
    components = [result['component'] for result in results]
    train_accuracies = [result['train_accuracy'] for result in results]
    test_accuracies = [result['test_accuracy'] for result in results]

    # Çizgi grafikleri çiz
    plt.figure(figsize=(10, 6))
    plt.plot(components, train_accuracies, label='Eğitim Doğruluğu', marker='o')
    plt.plot(components, test_accuracies, label='Test Doğruluğu', marker='x')


    plt.xticks(np.arange(components[0], components[-1]+1, 3))
    plt.title('PCA Bileşen Sayısına Göre Eğitim ve Test Doğruluğu')
    plt.xlabel('PCA Bileşen Sayısı')
    plt.ylabel('Doğruluk Oranı')
    plt.legend()

    # Göster
    plt.grid(True)
    plt.show()

In [None]:
train_folder = '../../../../Data/fer2013/train'
test_folder = '../../../../Data/fer2013/validation'
X_train, y_train = load_images_from_folder_fer_2013(train_folder)
X_test, y_test = load_images_from_folder_fer_2013(test_folder)

In [None]:
component_accuracy = find_best_pca(X_train, y_train, X_test, y_test)

In [None]:
plot_accuracy_by_components(component_accuracy)
print(f"Best accuracy component: {best_accuracy_component(component_accuracy)}")

In [None]:
import joblib

def export_pca(pca, filename):
    joblib.dump(pca, filename)
    print(f"PCA modeli '{filename}' olarak kaydedildi.")
def export_classifier(classifier, filename):
    joblib.dump(classifier, filename)
    print(f"Model '{filename}' olarak kaydedildi.")

In [None]:
component_count = best_accuracy_component(component_accuracy)['component']
pca = PCA(n_components=component_count)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Sınıflandırıcıyı eğit
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_pca, y_train)

export_pca(pca, f'pca_model{component_count}.pkl')
export_classifier(model, f'classifier_model{component_count}.pkl')
