In [None]:
import numpy as np
from data_loading import load_data
from KNN import KNN, cross_validate_knn
from skimage.feature import hog
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, KFold

# Correct paths to your dataset
csv_file = r"./archive/data.csv"
base_folder = r"./archive/dataset"

# Load images and labels
X, y = load_data(csv_file, base_folder)

### Feature Engineering ###

# Step 1: Convert RGB images to grayscale
def rgb_to_grayscale(images):
    return np.dot(images[..., :3], [0.2989, 0.5870, 0.1140])

X_gray = rgb_to_grayscale(X)

# Step 2: Extract HOG features
def extract_hog_features(images):
    hog_features = []
    for img in images:
        features = hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
        hog_features.append(features)
    return np.array(hog_features)

X_hog = extract_hog_features(X_gray)

# Step 3: Apply PCA for dimensionality reduction
def apply_pca(features, n_components=100):
    pca = PCA(n_components=n_components)
    return pca.fit_transform(features)

X_pca = apply_pca(X_hog, n_components=100)

# Step 4: Normalize the features
def normalize_features(features):
    scaler = StandardScaler()
    return scaler.fit_transform(features)

X_normalized = normalize_features(X_pca)

### Prepare labels ###
# Encode labels to numerical values
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split into training, validation, and test sets
X_train_full, X_test, y_train_full, y_test = train_test_split(X_normalized, y_encoded, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42)




In [None]:
# Perform cross-validation with k=5 and 5 folds
#cross_validation_accuracy = cross_validate_knn(X_normalized, y_encoded, k=5, num_folds=5)

### Final Model Training and Testing ###

# Train the final k-NN model on the full training set
final_knn = KNN(k=5)
final_knn.fit(X_train_full, y_train_full)

# Test the final model
y_test_pred = final_knn.predict(X_test)
final_accuracy = np.mean(y_test == y_test_pred)
print(f"Final Test Accuracy: {final_accuracy}")

Final Test Accuracy: 0.5371929824561403
