Using Principal Component Analysis (PCA)

In [None]:
!pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.6.3-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading category_encoders-2.6.3-py2.py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: category_encoders
Successfully installed category_encoders-2.6.3


Data Preprocessing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import time

# Function to load and preprocess image data using ImageDataGenerator
def load_image_data(image_dir, target_variable, batch_size=32, img_size=(224, 224)):
    datagen = ImageDataGenerator(rescale=1./255)
    data_flow = datagen.flow_from_directory(
        image_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='sparse'  # For sparse categorical labels
    )
    images, labels = [], []
    for _ in range(len(data_flow)):
        img_batch, label_batch = data_flow.next()
        images.extend(img_batch)
        labels.extend(label_batch)
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

# Function to perform PCA on image data
def perform_pca(df):
    df_flat = df.reshape(df.shape[0], -1)  # Flatten images
    pca = PCA()
    pca.fit(df_flat)
    explained_variance_ratio = pca.explained_variance_ratio_
    return explained_variance_ratio, pca

# Function to plot Scree plot
def plot_scree(explained_variance_ratio):
    plt.figure(figsize=(10, 6))
    plt.bar(range(1, len(explained_variance_ratio) + 1), explained_variance_ratio, alpha=0.7)
    plt.title('Scree Plot')
    plt.xlabel('Principal Component')
    plt.ylabel('Explained Variance Ratio')
    plt.grid()
    plt.show()

Model Development and Evaluation

In [None]:
# Function to evaluate image classification model
def evaluate_image_models(X_train, X_test, y_train, y_test):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(len(np.unique(y_train)), activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    start_time = time.time()
    model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=32)
    training_time = time.time() - start_time

    evaluation = model.evaluate(X_test, y_test, verbose=0)
    accuracy = evaluation[1]

    results = {
        'Training time': training_time,
        'Accuracy': accuracy
    }

    return results

# Example usage:
# Load and preprocess data
image_dir = 'path_to_image_directory'  # Directory containing image subfolders
target_variable = 'label'
X, y = load_image_data(image_dir, target_variable)

# PCA analysis
explained_variance_ratio, pca = perform_pca(X)
plot_scree(explained_variance_ratio)
total_variance = explained_variance_ratio.cumsum()[-1]
print(f"\nTotal Variance Explained by PCA: {total_variance:.4f}")

# Split data and evaluate model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
evaluation_results = evaluate_image_models(X_train, X_test, y_train, y_test)

# Print evaluation results
print("\nModel Evaluation Results for Computer Vision:")
for metric_name, value in evaluation_results.items():
    print(f"  {metric_name}: {value:.4f}")