In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
import plotly.graph_objs as go
import pandas as pd

In [2]:
def load_images_from_folder(folder, label_filter):
    images = []
    labels = []
    filenames = []
    for filename in os.listdir(folder):
        if filename.endswith(".jpg"):
            parts = filename.split('_')
            label = int(parts[2])
            if label in label_filter:
                img_path = os.path.join(folder, filename)
                img = Image.open(img_path).convert('L')  # Convert to grayscale
                img = img.resize((64, 64))  # Resize for consistency
                img_array = np.array(img).flatten()
                images.append(img_array)
                labels.append(label)
                filenames.append(filename)
    return np.array(images), np.array(labels), filenames

In [3]:
def create_pca_plot_3d(df, title):
    fig = go.Figure()
    for label in df['label'].unique():
        label_df = df[df['label'] == label]
        fig.add_trace(go.Scatter3d(
            x=label_df['PC1'],
            y=label_df['PC2'],
            z=label_df['PC3'],
            mode='markers',
            marker=dict(color=colors[label], size=3),
            name=labels[label],
            text=label_df['info'],
            hoverinfo='text'
        ))

    fig.update_layout(
        title=title,
        scene=dict(
            xaxis_title='Principal Component 1',
            yaxis_title='Principal Component 2',
            zaxis_title='Principal Component 3'
        ),
        showlegend=True,
        legend=dict(
            x=1,
            y=1,
            traceorder='normal',
            bgcolor='rgba(0,0,0,0)',
            bordercolor='rgba(0,0,0,0)'
        ),
        coloraxis_showscale=False
    )
    return fig

In [4]:
# Paths
train_path = r'C:\personal file\work\TKU\PCA_WORK\train-original cut'
test_path = r'C:\personal file\work\TKU\PCA_WORK\test-original cut'

In [5]:
# Define label pairs for comparison
label_pairs = [(0, 1), (2, 3), (4, 5), (6, 7)]
pair_names = ['Hat', 'Clothes', 'Shoes', 'Mask']

In [6]:
# Colors for labels
colors = {0: 'red', 1: 'yellow', 2: 'blue', 3: 'green', 4: 'purple', 5: 'orange', 6: 'pink', 7: 'brown'}
labels = {0: 'hat_on', 1: 'hat_off', 2: 'clothes_on', 3: 'clothes_off', 4: 'shoes_on', 5: 'shoes_off', 6: 'mask_on', 7: 'mask_off'}

In [8]:
for (label1, label2), pair_name in zip(label_pairs, pair_names):
    # Load and filter images
    train_images, train_labels, train_filenames = load_images_from_folder(train_path, label_filter=[label1, label2])
    test_images, test_labels, test_filenames = load_images_from_folder(test_path, label_filter=[label1, label2])

    # Perform PCA for 3D
    pca_3d = PCA(n_components=3)
    train_pca_3d = pca_3d.fit_transform(train_images)
    test_pca_3d = pca_3d.transform(test_images)

    # Create DataFrames for Plotly
    train_df_3d = pd.DataFrame(train_pca_3d, columns=['PC1', 'PC2', 'PC3'])
    train_df_3d['label'] = train_labels
    train_df_3d['filename'] = train_filenames
    train_df_3d['info'] = train_df_3d.apply(
        lambda row: f"Filename: {row['filename']}<br>PC1: {row['PC1']:.2f}<br>PC2: {row['PC2']:.2f}<br>PC3: {row['PC3']:.2f}",
        axis=1
    )

    test_df_3d = pd.DataFrame(test_pca_3d, columns=['PC1', 'PC2', 'PC3'])
    test_df_3d['label'] = test_labels
    test_df_3d['filename'] = test_filenames
    test_df_3d['info'] = test_df_3d.apply(
        lambda row: f"Filename: {row['filename']}<br>PC1: {row['PC1']:.2f}<br>PC2: {row['PC2']:.2f}<br>PC3: {row['PC3']:.2f}",
        axis=1
    )

    # Create and show plots
    fig_train_3d = create_pca_plot_3d(train_df_3d, f'3D PCA of Train Images ({pair_name})')
    fig_test_3d = create_pca_plot_3d(test_df_3d, f'3D PCA of Test Images ({pair_name})')

    fig_train_3d.show()
    fig_test_3d.show()