In [28]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_images_from_folder(folder, label, image_size=(400, 400)):
    images = []
    labels = []
    for filename in os.listdir(folder):
        path = os.path.join(folder, filename)
        img = load_img(path, target_size=image_size, color_mode='grayscale')
        img_array = img_to_array(img).flatten() / 255.0  # Normalize and flatten
        images.append(img_array)
        labels.append(label)
    return images, labels

# Load both classes
normal_images, normal_labels = load_images_from_folder('../chest_Xray/test/NORMAL', 0)
pneumonia_images, pneumonia_labels = load_images_from_folder('../chest_Xray/test/PNEUMONIA', 1)

# Create features X and target y.
X = np.array(normal_images + pneumonia_images)
y = np.array(normal_labels + pneumonia_labels)

# Split the dataset into training (80%) and testing (20%) sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply PCA for dimensionality reduction
n_components = 0.95
pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train a classifier on the PCA-reduced data to classify the images into one of the two categories
classifier = LogisticRegression(max_iter=1000) 
classifier.fit(X_train_pca, y_train)


y_pred = classifier.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with PCA + Logistic Regression: {accuracy:.2f}")

Accuracy with PCA + Logistic Regression: 0.89
