In [1]:
import os
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,precision_score


train_folder='/Users/apoorvagayatrik/PerspectAIProj/processed_images/train_ds'
test_folder='/Users/apoorvagayatrik/PerspectAIProj/processed_images/test_ds'
val_folder='/Users/apoorvagayatrik/PerspectAIProj/processed_images/val_ds'

In [2]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for person_folder in os.listdir(folder):
        person_path = os.path.join(folder, person_folder)
        if not os.path.isdir(person_path):  # Skip non-directory files like .DS_Store
            continue
        for filename in os.listdir(person_path):
            img_path = os.path.join(person_path, filename)
            if filename.endswith('.jpg') or filename.endswith('.png'):
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Read image as grayscale
                if img is not None:
                    images.append(img)  # Flatten image to 1D array
                    labels.append(person_folder)  # Use folder name as label
    return np.array(images), np.array(labels)


In [3]:
# Load images and labels from respective folders

train_images, train_labels = load_images_from_folder(train_folder)
val_images, val_labels = load_images_from_folder(val_folder)
test_images, test_labels = load_images_from_folder(test_folder)

print(f"Loaded {len(train_images)} training images and {len(val_images)} validation images.")


Loaded 17260 training images and 2580 validation images.


In [4]:
from tqdm import tqdm
import pickle    

class TwoDPCA:
    def __init__(self,num_components):
        self.num_components=num_components
        self.eigenvectors=None
        
    def fit(self,X):
        mean_image=np.mean(X,axis=0)
        
        covariance_matrix=np.zeros((X.shape[1],X.shape[1]))
        
        for img in tqdm(X):
            diff=img-mean_image
            covariance_matrix+=np.dot(diff.T,diff)
            
            eigenvalues,eigenvectors=np.linalg.eigh(covariance_matrix)
            
            idx=np.argsort(-eigenvalues)
            
            self.eigenvectors=eigenvectors[:,idx[:self.num_components]]
            
            
    def transform(self,X):
        projected_images=[]
        
        for img in X:
            projected_img=np.dot(img,self.eigenvectors)
            projected_images.append(projected_img.flatten())
            
        return np.array(projected_images)
    
    
num_components = 50
two_d_pca = TwoDPCA(num_components)

two_d_pca.fit(train_images)

train_images_2dpca = two_d_pca.transform(train_images)
val_images_2dpca = two_d_pca.transform(val_images)
test_images_2dpca = two_d_pca.transform(test_images)

with open('two_d_pca_model.pkl', 'wb') as f:
        pickle.dump(two_d_pca, f)
with open('transformed_data.pkl', 'wb') as f:
        pickle.dump((train_images_2dpca, val_images_2dpca, test_images_2dpca), f)
print("Saved PCA model and transformed data.")

print(f"train_images_2dpca shape: {train_images_2dpca.shape}")
print(f"val_images_2dpca shape: {val_images_2dpca.shape}")
print(f"test_images_2dpca shape: {test_images_2dpca.shape}")

    

100%|████████████████████████████████████| 17260/17260 [00:27<00:00, 625.20it/s]


Saved PCA model and transformed data.
train_images_2dpca shape: (17260, 3200)
val_images_2dpca shape: (2580, 3200)
test_images_2dpca shape: (4086, 3200)


In [8]:
from sklearn.ensemble import RandomForestClassifier



In [9]:
# Instantiate and train KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(train_images_2dpca, train_labels)



# Predict on validation set
val_predictions = knn.predict(val_images_2dpca)
val_accuracy = accuracy_score(val_labels, val_predictions)
val_precision = precision_score(val_labels, val_predictions, average='macro',zero_division=1)

print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation Precision: {val_precision}")

# Optional: Print more details for debugging
#print("Validation labels:", val_labels[:10])
#print("Validation predictions:", val_predictions[:10])

test_images_2dpca = two_d_pca.transform(test_images)
test_predictions = knn.predict(test_images_2dpca)

test_accuracy = accuracy_score(test_labels, test_predictions)
test_precision = precision_score(test_labels, test_predictions, average='macro',zero_division=1)

print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")





Validation Accuracy: 0.9670542635658915
Validation Precision: 0.431583981158723
Test Accuracy: 0.9725893294175233
Test Precision: 0.8084737360278212


In [1]:
rf_classifier = RandomForestClassifier(n_estimators=100)
n_estimators = rf_classifier.n_estimators

rf_classifier.fit(train_images_2dpca, train_labels)   

batch_size = 10  # Adjust as needed
progress_bar = tqdm(total=n_estimators, desc="Training Random Forest", position=0, leave=True)

for i in range(0, n_estimators, batch_size):
    # Ensure all samples in the batch are used for training
    end_idx = min(i + batch_size, len(train_labels))
    rf_classifier.fit(train_images_2dpca[i:end_idx], train_labels[i:end_idx])
    progress_bar.update(end_idx - i)

# Close tqdm progress bar after training completes
progress_bar.close()

val_predictions = rf_classifier.predict(val_images_2dpca)
accuracy = accuracy_score(val_labels, val_predictions)
    
print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation Precision: {val_precision}")

test_images_2dpca = two_d_pca.transform(test_images)
test_predictions = rf_classifier.predict(test_images_2dpca)
test_accuracy = accuracy_score(test_labels, test_predictions)
test_precision = precision_score(test_labels, test_predictions, average='macro',zero_division=1)

print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")


NameError: name 'RandomForestClassifier' is not defined