In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import matplotlib.pyplot as plt

import os

from tqdm import tqdm
from sklearn.decomposition import PCA
import cv2
from sklearn.ensemble import RandomForestClassifier

In [None]:
def load_images_from_folder_fer_2013(folder):
    images = []
    labels = []
    for label_folder in os.listdir(folder):
        label_folder_path = os.path.join(folder, label_folder)
        if os.path.isdir(label_folder_path):
            for image_file in os.listdir(label_folder_path):
                image_path = os.path.join(label_folder_path, image_file)
                img = cv2.imread(image_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                img = cv2.resize(img, (48, 48))
                images.append(img.flatten())
                labels.append(label_folder)
    return np.array(images), np.array(labels)

In [None]:
train_folder = '../../../../Data/fer2013/train'
test_folder = '../../../../Data/fer2013/validation'
X_train, y_train = load_images_from_folder_fer_2013(train_folder)
X_test, y_test = load_images_from_folder_fer_2013(test_folder)

In [None]:
images_directory = '../../../../Data/images/train'

In [None]:
def prepare_data_and_apply_pca(directory, n_components=10):
    all_images = []
    labels = []
    for emotion_folder in os.listdir(directory):
        emotion_folder_path = os.path.join(directory, emotion_folder)
        if os.path.isdir(emotion_folder_path):
            for image_file in os.listdir(emotion_folder_path):
                image_path = os.path.join(emotion_folder_path, image_file)
                img = cv2.imread(image_path)
                img_gray = cv2.cvtColor(cv2.resize(img, (48, 48)), cv2.COLOR_BGR2GRAY).flatten()
                all_images.append(img_gray)
                labels.append(emotion_folder)
    
    all_images = np.array(all_images)
    
    # PCA modelini eğitin ve veri setini dönüştürün
    pca = PCA(n_components=n_components)
    features_pca = pca.fit_transform(all_images)
    
    return features_pca, labels


In [None]:
import numpy as np

def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))


In [None]:
def predict_with_euclidean_distance(X_train, y_train, X_test):
    predictions = []
    for test_sample in X_test:
        # Test örneği ile tüm eğitim örnekleri arasındaki mesafeleri hesapla
        distances = [euclidean_distance(test_sample, train_sample) for train_sample in X_train]
        # En küçük mesafeye sahip eğitim örneğinin indeksini bul
        min_index = np.argmin(distances)
        # Bu eğitim örneğinin etiketini tahmin olarak kullan
        predictions.append(y_train[min_index])
    return predictions


In [None]:
def find_best_pca(X_train, y_train, X_test, y_test):
    component_accuracy = []

    for n_components in tqdm(range(1, 100)):
        pca = PCA(n_components=n_components)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)
        
        # Modeli tanımla ve eğit
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_pca, y_train)

        # Eğitim seti üzerinde tahmin yap ve doğruluk oranını hesapla
        y_pred_train = model.predict(X_train_pca)
        train_accuracy = accuracy_score(y_train, y_pred_train)
        #print(f"Component: {n_components} --  Eğitim Doğruluğu: {train_accuracy}")

        # Test seti üzerinde tahmin yap ve doğruluk oranını hesapla
        y_pred_test = model.predict(X_test_pca)
        test_accuracy = accuracy_score(y_test, y_pred_test)
        #print(f"Component: {n_components} -- Test Doğruluğu: {test_accuracy}")

        component_accuracy.append({'component':  n_components, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy})

        if len(component_accuracy) > 5:
            # Early stopping, if 3 times decreases
            if all(current['test_accuracy'] <= prev['test_accuracy'] for current, prev in zip(component_accuracy[-3:], component_accuracy[-4:-1])):
                print("Erken durdurma: Son 3 test doğruluğunda azalma trendi.")
                break
            threshold = 0.01
            # Early stopping, if acc dif less than threshold:0.01 times decreases
            recent_accuracies = [acc['test_accuracy'] for acc in component_accuracy[-5:]]
            if max(recent_accuracies) - min(recent_accuracies) < threshold:
                print("Erken durdurma: Son 5 test doğruluğundaki fark 0.01'den az.")
                break
        
    return component_accuracy

def best_accuracy_component(component_accuracy):
    best_accuracy = max(component_accuracy, key=lambda x:x['test_accuracy'])
    return best_accuracy

In [None]:
def plot_accuracy_by_components(results):
    # Component değerlerini ve karşılık gelen accuracy değerlerini çıkar
    components = [result['component'] for result in results]
    train_accuracies = [result['train_accuracy'] for result in results]
    test_accuracies = [result['test_accuracy'] for result in results]

    # Çizgi grafikleri çiz
    plt.figure(figsize=(10, 6))
    plt.plot(components, train_accuracies, label='Eğitim Doğruluğu', marker='o')
    plt.plot(components, test_accuracies, label='Test Doğruluğu', marker='x')


    plt.xticks(np.arange(components[0], components[-1]+1, 3))
    plt.title('PCA Bileşen Sayısına Göre Eğitim ve Test Doğruluğu')
    plt.xlabel('PCA Bileşen Sayısı')
    plt.ylabel('Doğruluk Oranı')
    plt.legend()

    # Göster
    plt.grid(True)
    plt.show()

In [None]:
train_folder = '../../../../Data/fer2013/train'
test_folder = '../../../../Data/fer2013/validation'
X_train, y_train = load_images_from_folder_fer_2013(train_folder)
X_test, y_test = load_images_from_folder_fer_2013(test_folder)

In [None]:
component_accuracy = find_best_pca(X_train, y_train, X_test, y_test)

In [None]:
plot_accuracy_by_components(component_accuracy)
print(f"Best accuracy component: {best_accuracy_component(component_accuracy)}")

In [None]:
import joblib

def export_pca(pca, filename):
    joblib.dump(pca, filename)
    print(f"PCA modeli '{filename}' olarak kaydedildi.")
def export_classifier(classifier, filename):
    joblib.dump(classifier, filename)
    print(f"Model '{filename}' olarak kaydedildi.")

In [None]:
component_count = best_accuracy_component(component_accuracy)['component']
pca = PCA(n_components=component_count)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Sınıflandırıcıyı eğit
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_pca, y_train)

export_pca(pca, f'pca_model{component_count}.pkl')
export_classifier(model, f'classifier_model{component_count}.pkl')
