In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report
import cv2
import os
import glob

# Define the path to the dataset
dataset_path = r'C:\Users\ahmad\OneDrive\Desktop\Optimizaiton project\chest_xray\train'

# Load the dataset
def load_data(dataset_path):
    x = []
    y = []
    
    # Load normal images
    for img_path in glob.glob(os.path.join(dataset_path, 'Normal', '*.jpeg')):
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (64, 64)).flatten()
        x.append(img)
        y.append(0)

    # Load pneumonia images
    for img_path in glob.glob(os.path.join(dataset_path, 'Pneumonia', '*.jpeg')):
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (64, 64)).flatten()
        x.append(img)
        y.append(1)

    return np.array(x), np.array(y)

X, y = load_data(dataset_path)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



##    PCA using Singular Value Decomposition (PCA-SVD):

In [2]:
from sklearn.decomposition import PCA

def run_PCA_SVD(X, n_components):
    """
    Pseudo Code:
    1. Initialize the PCA model with the desired number of components.
    2. Fit the PCA model to the data.
    3. Transform the data to its principal components.
    """
    # Initialize the PCA model
    pca = PCA(n_components=n_components)

    # Fit the model to the data
    pca.fit(X)

    # Transform the data to its principal components
    X_pca = pca.transform(X)

    return X_pca


In [3]:
# Define the number of components
n_components = 150  # replace this with the desired number of components

# Run PCA-SVD
X_train_pca_svd = run_PCA_SVD(X_train, n_components)
X_test_pca_svd = run_PCA_SVD(X_test, n_components)

# Print the transformed data
print("Transformed Training Data:", X_train_pca_svd)
print("Transformed Testing Data:", X_test_pca_svd)


Transformed Training Data: [[  670.59589973   741.80218269   662.40792802 ...    64.75575335
    -13.03167937   -37.08802755]
 [ 2362.35588337 -1411.5192587    416.45878133 ...    20.46099301
     20.42818613     4.21137117]
 [-1379.56964269 -1126.46758467  -485.43079751 ...   -53.90114477
     50.05548714    13.37762131]
 ...
 [ -365.99615655  -639.41523654  -528.86824717 ...   -19.56579578
     39.45965118   -30.37173481]
 [ -571.31744331  -825.36476222   139.50910308 ...   -85.05121623
    -55.91009747    69.74468444]
 [ 1242.43454943  -589.02265583   635.21497127 ...   -63.17914106
     56.02788043   -37.84246142]]
Transformed Testing Data: [[-4.66160116e+02 -2.11522016e+03  3.57177181e+02 ...  1.51936679e+01
   2.54595912e+00  8.56660161e+01]
 [-5.57276654e+02  5.63673926e+02 -1.51493392e+03 ...  1.17966670e+02
  -2.23113506e+00 -4.74101102e+01]
 [-4.78826694e+02  4.19721499e+02  2.85799769e+02 ...  1.17921727e+01
  -1.40702549e-01 -6.11438616e-01]
 ...
 [ 1.40123444e+03 -4.910984

##     Incremental PCA using Stochastic Gradient Descent (IPCA-SGD):

In [4]:
from sklearn.decomposition import IncrementalPCA

def run_IPCA_SGD(X, n_components, batch_size):
    """
    Pseudo Code:
    1. Initialize the IncrementalPCA model with the desired number of components.
    2. Fit the model to the data in batches.
    3. Transform the data to its principal components.
    """
    # Initialize the IncrementalPCA model
    ipca = IncrementalPCA(n_components=n_components)

    # Fit the model to the data in batches
    for i in range(0, X.shape[0], batch_size):
        ipca.partial_fit(X[i:i+batch_size])

    # Transform the data to its principal components
    X_ipca = ipca.transform(X)

    return X_ipca


In [5]:
# Define the batch size
batch_size = 200  
n_components = 44 # this is the maximum number of component the SGD will accept 
# Run IPCA-SGD
X_train_ipca_sgd = run_IPCA_SGD(X_train, n_components, batch_size)
X_test_ipca_sgd = run_IPCA_SGD(X_test, n_components, batch_size)

# Print the transformed data
print("Transformed Training Data:", X_train_ipca_sgd)
print("Transformed Testing Data:", X_test_ipca_sgd)


Transformed Training Data: [[ 6.70597077e+02  7.41814169e+02  6.62430754e+02 ...  1.26191817e+02
  -4.91051624e+01  1.12363371e+02]
 [ 2.36235675e+03 -1.41153194e+03  4.16428020e+02 ...  7.27884196e+00
  -1.34037817e+01  1.01911803e+02]
 [-1.37957117e+03 -1.12646722e+03 -4.85467674e+02 ...  5.84392353e+01
   1.18043803e+02 -2.97946024e+01]
 ...
 [-3.65998125e+02 -6.39403020e+02 -5.28900404e+02 ... -2.03731298e+00
   8.63158486e+01 -5.87695138e+01]
 [-5.71316263e+02 -8.25371336e+02  1.39488244e+02 ... -1.78949814e+01
  -4.10320204e+01 -7.83044510e+01]
 [ 1.24243184e+03 -5.89019326e+02  6.35221318e+02 ... -5.02663239e+01
   9.71800906e+01 -1.40131025e+02]]
Transformed Testing Data: [[ -466.15704302 -2115.22657768  -357.18495015 ...   108.8926501
     22.36605014   -10.80149685]
 [ -557.26922712   563.69254469  1514.97953047 ...  -221.10992806
    100.04764702    48.29789557]
 [ -478.8274571    419.74858325  -285.76488528 ...    46.86618086
    -18.70076215    14.63181687]
 ...
 [ 1401.24

##     Candid Covariance-free IPCA (CCIPCA):

In [6]:
from CCIPCA import CCIPCA
n_components = 150

def run_CCIPCA(X, n_components):
    """
    Pseudo Code:
    1. Initialize the PCA model with the desired number of components.
    2. Fit the PCA model to the data.
    3. Transform the data to its principal components.
    """
    # Initialize the PCA model
    ccipca = CCIPCA(n_components=n_components)

    # Fit the model to the data
    ccipca.fit(X)

    # Transform the data to its principal components
    X_pca = ccipca.transform(X)

    return X_pca

In [7]:
# Run CCIPCA

X_train_ccipca = run_CCIPCA(X_train, n_components)
X_test_ccipca = run_CCIPCA(X_test, n_components)

# Print the transformed data
print("Transformed Training Data:", X_train_ccipca)
print("Transformed Testing Data:", X_test_ccipca)


Transformed Training Data: [[-3883.51908789 -1021.33950493  -633.96594591 ...    50.57361129
     67.03316394    50.88076379]
 [-1719.06952582   261.80296463  -735.61559619 ...   -55.65324865
    -63.63180508    12.68567751]
 [-5135.79005862  1496.63663692   415.78030959 ...  -132.20002887
     20.44455957   -47.32830659]
 ...
 [-4253.55310056   746.39330806   477.1358679  ...    68.44690784
    -88.35249164    99.87281054]
 [-4500.44685926   908.26298944  -200.63006689 ...    14.25919139
     12.42871429     5.45072423]
 [-2913.07518212    -6.25891355  -760.47898652 ...    79.78341308
     36.43473272    99.94163922]]
Transformed Testing Data: [[-3.65348983e+03  2.16904723e+03 -4.04420756e+02 ... -3.60421420e+01
   1.35335317e+02 -6.92891125e+01]
 [-5.13861578e+03 -4.72837704e+02  1.42292689e+03 ...  3.30234079e+01
  -1.84222088e+02  1.45469159e+02]
 [-4.71649953e+03 -2.72485939e+02 -3.44036590e+02 ... -6.08397053e+01
  -1.26419815e+02 -1.36748796e+01]
 ...
 [-2.89482255e+03 -1.735011

## Principal Component Analysis (PCA) using Recursive Regularized Least Squares Algorithm (RRLSA). 

In [8]:
from IPCA_RLS import IPCA_RLS

n_components = 150

def run_IPCA_RLS(X, n_components):
    """
    Pseudo Code:
    1. Initialize the PCA model with the desired number of components.
    2. Fit the PCA model to the data.
    3. Transform the data to its principal components.
    """
    # Initialize the PCA model
    pcca_rls = IPCA_RLS(n_components=n_components)

    # Fit the model to the data
    pcca_rls.fit(X)

    # Transform the data to its principal components
    X_pca = pcca_rls.transform(X)

    return X_pca


In [9]:
X_train_ccipca = run_IPCA_RLS(X_train, n_components)
X_test_ccipca = run_IPCA_RLS(X_test, n_components)

# Print the transformed data
print("Transformed Training Data:", X_train_ccipca)
print("Transformed Testing Data:", X_test_ccipca)
