In [1]:
import cv2
import os
import zipfile
import torch
import torchvision.transforms as transforms
from PIL import Image
import numpy as np


In [2]:
!pip3 install scikit-learn



In [3]:
import os
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,precision_score


train_folder='/Users/apoorvagayatrik/PerspectAIProj/processed_images_noesrgan/train_ds'
test_folder='/Users/apoorvagayatrik/PerspectAIProj/processed_images_noesrgan/test_ds'
val_folder='/Users/apoorvagayatrik/PerspectAIProj/processed_images_noesrgan/val_ds'

In [4]:
import os
import cv2
import numpy as np

def load_images_from_folder(folder):
    final_labels = []
    final_images = []

    for person_folder in os.listdir(folder):
        person_path = os.path.join(folder, person_folder)
        if not os.path.isdir(person_path):
            continue

        images = []

        # Load images from the person's folder
        for filename in os.listdir(person_path):
            img_path = os.path.join(person_path, filename)
            if filename.endswith('.jpg') or filename.endswith('.png'):
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    images.append(img)

        # Create image pairs
        for i in range(len(images)):
            for j in range(i + 1, len(images)):
                # Append image pair and corresponding label
                final_images.append((images[i], images[j]))
                final_labels.append(person_folder)  # Use folder name as label

    return np.array(final_images), np.array(final_labels)


In [5]:
# Load images and labels from respective folders

train_images, train_labels = load_images_from_folder(train_folder)
val_images, val_labels = load_images_from_folder(val_folder)
test_images, test_labels = load_images_from_folder(test_folder)

print(f"Loaded {len(train_images)} training images and {len(val_images)} validation images.")


Loaded 54917 training images and 100411 validation images.


In [6]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import pickle

class TwoDPCA:
    def __init__(self, num_components, batch_size=32):
        self.num_components = num_components
        self.eigenvectors_rows = None
        self.eigenvectors_cols = None
        self.batch_size = batch_size

    def fit(self, X):
        mean_image = np.mean(X, axis=0)
        rows, cols = X[0].shape

        covariance_matrix_rows = np.zeros((rows, rows))
        covariance_matrix_cols = np.zeros((cols, cols))

        for start in tqdm(range(0, len(X), self.batch_size)):
            end = min(start + self.batch_size, len(X))
            batch_images = X[start:end]

            for img in batch_images:
                diff_rows = img - mean_image
                diff_cols = (img - mean_image).T
                covariance_matrix_rows += np.dot(diff_rows.T, diff_rows)
                covariance_matrix_cols += np.dot(diff_cols.T, diff_cols)

        eigenvalues_rows, eigenvectors_rows = np.linalg.eigh(covariance_matrix_rows)
        eigenvalues_cols, eigenvectors_cols = np.linalg.eigh(covariance_matrix_cols)

        idx_rows = np.argsort(-eigenvalues_rows)
        idx_cols = np.argsort(-eigenvalues_cols)

        self.eigenvectors_rows = eigenvectors_rows[:, idx_rows[:self.num_components]]
        self.eigenvectors_cols = eigenvectors_cols[:, idx_cols[:self.num_components]]

    def transform(self, X):
        projected_images = []

        for start in tqdm(range(0, len(X), self.batch_size)):
            end = min(start + self.batch_size, len(X))
            batch_images = X[start:end]

            for img in batch_images:
                projected_img = np.dot(np.dot(self.eigenvectors_rows.T, img), self.eigenvectors_cols)
                projected_images.append(projected_img.flatten())

        return np.array(projected_images)


# Example usage with batch processing
num_components = 50
batch_size = 32

# Define your train_images, val_images, and test_images here
train_images_flat = np.array([pair[0] for pair in train_images])
val_images_flat = np.array([pair[0] for pair in val_images])
test_images_flat = np.array([pair[0] for pair in test_images])

# Instantiate and fit the 2DPCA model
two_d_pca = TwoDPCA(num_components, batch_size=batch_size)
two_d_pca.fit(train_images_flat)

# Transform images using the trained 2DPCA model
train_images_2dpca = two_d_pca.transform(train_images_flat)
val_images_2dpca = two_d_pca.transform(val_images_flat)
test_images_2dpca = two_d_pca.transform(test_images_flat)

# Save the 2DPCA model and transformed data
with open('two_d_pca_model.pkl', 'wb') as f:
    pickle.dump(two_d_pca, f)

with open('transformed_data.pkl', 'wb') as f:
    pickle.dump((train_images_2dpca, val_images_2dpca, test_images_2dpca), f)

print("Saved 2DPCA model and transformed data.")

# Print shapes of transformed data
print(f"train_images_2dpca shape: {train_images_2dpca.shape}")
print(f"val_images_2dpca shape: {val_images_2dpca.shape}")
print(f"test_images_2dpca shape: {test_images_2dpca.shape}")

print(f"train_labels shape: {train_labels.shape}")


100%|██████████████████████████████████████| 1717/1717 [00:05<00:00, 338.58it/s]
100%|██████████████████████████████████████| 1717/1717 [00:03<00:00, 512.75it/s]
100%|██████████████████████████████████████| 3138/3138 [00:07<00:00, 430.65it/s]
100%|██████████████████████████████████████| 1784/1784 [00:04<00:00, 399.64it/s]


Saved 2DPCA model and transformed data.
train_images_2dpca shape: (54917, 2500)
val_images_2dpca shape: (100411, 2500)
test_images_2dpca shape: (57067, 2500)
train_labels shape: (54917,)


In [7]:
from sklearn.ensemble import RandomForestClassifier



In [8]:
# Instantiate and train KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(train_images_2dpca, train_labels)



# Predict on validation set
val_predictions = knn.predict(val_images_2dpca)
val_accuracy = accuracy_score(val_labels, val_predictions)
val_precision = precision_score(val_labels, val_predictions, average='macro',zero_division=1)

print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation Precision: {val_precision}")



test_predictions = knn.predict(test_images_2dpca)

test_accuracy = accuracy_score(test_labels, test_predictions)
test_precision = precision_score(test_labels, test_predictions, average='macro',zero_division=1)

print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")







Validation Accuracy: 0.0
Validation Precision: 0.1335149863760218
Test Accuracy: 0.0
Test Precision: 0.2737642585551331


In [None]:
rf_classifier = RandomForestClassifier(n_estimators=100,verbose=1,n_jobs=-1)
n_estimators = rf_classifier.n_estimators

rf_classifier.fit(train_images_2dpca, train_labels)


val_predictions = rf_classifier.predict(val_images_2dpca)
accuracy = accuracy_score(val_labels, val_predictions)

print(f"Validation Accuracy: {val_accuracy}")
print(f"Validation Precision: {val_precision}")

test_images_2dpca = two_d_pca.transform(test_images)
test_predictions = rf_classifier.predict(test_images_2dpca)
test_accuracy = accuracy_score(test_labels, test_predictions)
test_precision = precision_score(test_labels, test_predictions, average='macro',zero_division=1)

print(f"Test Accuracy: {test_accuracy}")
print(f"Test Precision: {test_precision}")



[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 28.6min


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score

# Instantiate SVM classifier
svm = SVC(kernel='linear', C=1.0, random_state=42,n_jobs=-1,verbose=1)  # You can adjust kernel and C parameter as needed

# Train SVM classifier
svm.fit(train_images_2dpca, train_labels)

# Predict on validation set
val_predictions = svm.predict(val_images_2dpca)

# Evaluate on validation set
val_accuracy = accuracy_score(val_labels, val_predictions)
val_precision = precision_score(val_labels, val_predictions, average='macro', zero_division=1)

print(f"Validation Accuracy (SVM): {val_accuracy}")
print(f"Validation Precision (SVM): {val_precision}")

# Predict on test set
test_predictions = svm.predict(test_images_2dpca)

# Evaluate on test set
test_accuracy = accuracy_score(test_labels, test_predictions)
test_precision = precision_score(test_labels, test_predictions, average='macro', zero_division=1)

print(f"Test Accuracy (SVM): {test_accuracy}")
print(f"Test Precision (SVM): {test_precision}")
