In [None]:
import os
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler

# Paths
input_folder = r"D:\project work 2 phase 2\fingerprint\code"  # Folder where original .npy files are stored
output_folder = r"D:\project work 2 phase 2\fingerprint\reduced_features"  # Folder to store reduced feature files
os.makedirs(output_folder, exist_ok=True)

# Feature & Label Files
feature_files = [
    "inverted_alexnet_features.npy",
    "inverted_googlenet_features.npy",
    "multi_augmented_alexnet_features.npy",
    "multi_augmented_googlenet_features.npy"
]

# Function to load labels (User ID & Class Label)
def load_labels(feature_file):
    label_file = feature_file.replace("_features.npy", "_labels.npy")  # Generate label file name
    label_path = os.path.join(input_folder, label_file)

    if os.path.exists(label_path):
        labels = np.load(label_path)

        if labels.shape[1] < 2:
            raise ValueError(f"Labels file {label_file} does not have both User ID and Class Label.")

        user_ids = labels[:, 0].astype(int)  # First column -> User ID
        class_labels = labels[:, 1].astype(int)  # Second column -> Class Label
        return user_ids, class_labels
    else:
        raise FileNotFoundError(f"Label file not found: {label_file}")

# Apply PCA & LDA
for file in feature_files:
    feature_path = os.path.join(input_folder, file)
    features = np.load(feature_path)

    # Load corresponding User IDs and Class Labels
    user_ids, class_labels = load_labels(file)
    
    # Ensure features and labels have matching samples
    if features.shape[0] != class_labels.shape[0]:
        raise ValueError(f"Mismatch in feature and label samples for {file}")

    # Standardize the features (zero mean, unit variance)
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    # Apply PCA (retain 95% variance)
    pca = PCA(n_components=0.95, svd_solver='full')
    features_pca = pca.fit_transform(features_scaled)
    pca_filename = os.path.join(output_folder, file.replace("_features.npy", "_pca.npy"))
    np.save(pca_filename, features_pca)

    # Save labels for PCA-transformed data (User ID & Class Label)
    pca_labels = np.column_stack((user_ids, class_labels))
    pca_label_filename = os.path.join(output_folder, file.replace("_features.npy", "_pca_labels.npy"))
    np.save(pca_label_filename, pca_labels)

    # Apply LDA (requires labels)
    num_classes = len(np.unique(class_labels))
    lda_components = min(num_classes - 1, features.shape[1])  # LDA components ≤ (num_classes - 1)

    lda = LinearDiscriminantAnalysis(n_components=lda_components)
    features_lda = lda.fit_transform(features_scaled, class_labels)
    lda_filename = os.path.join(output_folder, file.replace("_features.npy", "_lda.npy"))
    np.save(lda_filename, features_lda)

    # Save labels for LDA-transformed data (User ID & Class Label)
    lda_labels = np.column_stack((user_ids, class_labels))
    lda_label_filename = os.path.join(output_folder, file.replace("_features.npy", "_lda_labels.npy"))
    np.save(lda_label_filename, lda_labels)

print("PCA and LDA transformations complete. Files saved in 'reduced_features' folder.")

PCA and LDA transformations complete. Files saved in 'reduced_features' folder.


In [None]:
# Folder where reduced features and labels are stored
output_folder = r"D:\project work 2 phase 2\fingerprint\reduced_features"

# List of reduced feature and label files
reduced_files = [
    "inverted_alexnet_pca.npy",
    "inverted_alexnet_lda.npy",
    "inverted_alexnet_pca_labels.npy",
    "inverted_alexnet_lda_labels.npy"
]

# Function to display `.npy` file contents
def display_npy(file_name):
    file_path = os.path.join(output_folder, file_name)
    
    if not os.path.exists(file_path):
        print(f"❌ File not found: {file_name}")
        return
    
    # Load data
    data = np.load(file_path)
    
    # Check if the file is a label file (User ID & Class Label)
    is_label_file = "labels" in file_name.lower()
    
    print(f"\n### Displaying {file_name} ###")
    print(f"📏 Shape: {data.shape}\n")

    # Handle feature and label files separately
    if is_label_file:
        if data.ndim == 2 and data.shape[1] == 2:  # Expecting (N, 2) for User ID & Class Label
            print("🔹 First 5 samples (User ID, Class Label):\n", data[:5])
        else:
            print("⚠ Warning: Label file format unexpected!\n", data[:5])
    else:
        print("🔹 First 5 feature vectors:\n", data[:5])

    print("=" * 50)

# Display features and labels after reduction
for file in reduced_files:
    display_npy(file)


### Displaying inverted_alexnet_pca.npy ###
📏 Shape: (1600, 760)

🔹 First 5 feature vectors:
 [[ 13.746619     7.6956472  -13.257836   ...  -0.89637095  -0.39373615
    0.795328  ]
 [ 13.905582    16.57034     -1.9058607  ...   0.5971702   -1.0074791
    0.3893588 ]
 [ 11.100484    -2.1537268   -1.5542495  ...  -0.14830323  -0.33420244
   -0.34953043]
 [  9.5971775  -10.78886      2.5744007  ...   0.5123791   -0.950725
   -0.37664774]
 [  9.627034    10.320029     0.6665535  ...  -0.28425255   0.97310394
   -0.13844754]]

### Displaying inverted_alexnet_lda.npy ###
📏 Shape: (1600, 79)

🔹 First 5 feature vectors:
 [[-7.89488733e-01  6.71535969e-01 -1.27668214e+00 -1.88672960e-01
   1.53564513e+00  8.79255593e-01  3.54835224e+00 -3.00050330e+00
   2.80794907e+00 -6.84235191e+00  1.89293230e+00 -8.99302363e-01
   3.57269078e-01 -1.09685087e+00  4.43714190e+00 -4.68501949e+00
   1.58614337e+00  1.15398788e+00 -2.41469479e+00  2.80030632e+00
  -1.16932774e+00 -1.28439784e+00  2.87140071e-0