In [13]:
import os
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd

In [14]:
def load_images_from_folder(folder, label_filter):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith(".jpg"):
            # Extract class label from filename
            parts = filename.split('_')
            label = int(parts[2])
            if label in label_filter:
                img_path = os.path.join(folder, filename)
                img = Image.open(img_path).convert('L')  # Convert to grayscale
                img = img.resize((64, 64))  # Resize for consistency
                img_array = np.array(img).flatten()
                images.append(img_array)
                labels.append(label)
    return np.array(images), np.array(labels)

In [15]:
# Paths
train_path = r'C:\personal file\work\TKU\PCA_WORK\train-original cut'
test_path = r'C:\personal file\work\TKU\PCA_WORK\test-original cut'

In [16]:
# Load and filter images
train_images, train_labels = load_images_from_folder(train_path, label_filter=[0, 1])
test_images, test_labels = load_images_from_folder(test_path, label_filter=[0, 1])

In [17]:
# Combine train and test data
all_images = np.vstack((train_images, test_images))
all_labels = np.hstack((train_labels, test_labels))

In [18]:
# Perform PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(all_images)
explained_variance = pca.explained_variance_ratio_

In [19]:
print(f"Explained variance by component 1: {explained_variance[0]:.2f}")
print(f"Explained variance by component 2: {explained_variance[1]:.2f}")

Explained variance by component 1: 0.31
Explained variance by component 2: 0.08


In [20]:
# Create a DataFrame for Plotly
df = pd.DataFrame(pca_result, columns=['PC1', 'PC2'])
df['label'] = all_labels

In [21]:
# Map labels to colors
colors = {0: 'red', 1: 'yellow'}
df['color'] = df['label'].map(colors)
labels = {0: 'hat_on', 1: 'hat_off'}

In [22]:
# Create the Plotly figure
fig = go.Figure()

In [23]:
# Add scatter plot for each label
for label in df['label'].unique():
    label_df = df[df['label'] == label]
    fig.add_trace(go.Scatter(
        x=label_df['PC1'],
        y=label_df['PC2'],
        mode='markers',
        marker=dict(color=colors[label], size=10),
        name=labels[label]
    ))

In [24]:
# Update layout to remove color scale and position the legend
fig.update_layout(
    title='PCA of Images',
    xaxis_title='Principal Component 1',
    yaxis_title='Principal Component 2',
    showlegend=True,
    legend=dict(
        x=1,
        y=1,
        traceorder='normal',
        bgcolor='rgba(0,0,0,0)',
        bordercolor='rgba(0,0,0,0)'
    ),
    coloraxis_showscale=False
)

fig.show()