In [None]:
# ===============================
# 1. Import Libraries
# ===============================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from PIL import Image
import cv2
%matplotlib inline

In [None]:
# ===============================
# 2. Set Paths
# ===============================
ATTR_CSV = r'D:\Shiva\Shiva New\Shiva New Project\list_attr_celeba.csv'
IMG_FOLDER = r'D:\Shiva\Shiva New\Shiva New Project\img_align_celeba\img_align_celeba'
PARTITION_CSV = r'D:\Shiva\Shiva New\Shiva New Project\list_eval_partition.csv'

In [None]:
# ===============================
# 3. Load Attribute Data
# ===============================
df_attr = pd.read_csv(ATTR_CSV)
df_attr.set_index('image_id', inplace=True)

In [None]:
# Convert -1 to 0 for binary classification
df_attr = (df_attr + 1) // 2

In [None]:
# Display basic info
print("Shape:", df_attr.shape)
print("Columns:", df_attr.columns.tolist())
display(df_attr.head())

In [None]:
# ===============================
# 4. Load Partition Information
# ===============================
df_partition = pd.read_csv(PARTITION_CSV)
df_partition.columns = ['image_id', 'partition']
df_partition.set_index('image_id', inplace=True)
display(df_partition['partition'].value_counts())

In [None]:
# ===============================
# 5. Merge for Easier Management
# ===============================
df = df_attr.merge(df_partition, left_index=True, right_index=True)
display(df.head())

In [None]:
# ===============================
# 6. Class Distribution for Each Attribute
# ===============================
attr_counts = df.iloc[:, :-1].sum().sort_values(ascending=False)

# Plot distribution
plt.figure(figsize=(12, 10))
sns.barplot(x=attr_counts.values, y=attr_counts.index)
plt.title('Positive Sample Count per Attribute')
plt.xlabel('Count')
plt.ylabel('Attribute')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# ===============================
# 7. Attribute Correlation Matrix
# ===============================
plt.figure(figsize=(14, 12))
corr = df.iloc[:, :-1].corr()
sns.heatmap(corr, cmap='coolwarm', center=0, square=True)
plt.title("Correlation Between Facial Attributes")
plt.show()

In [None]:
# ===============================
# 8. Example Images with Attributes
# ===============================
def show_images_with_attributes(attr, count=5):
    sample = df[df[attr] == 1].sample(count)
    plt.figure(figsize=(15, 5))
    for i, img_id in enumerate(sample.index):
        img_path = os.path.join(IMG_FOLDER, img_id)
        img = Image.open(img_path)
        plt.subplot(1, count, i + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(img_id)
    plt.suptitle(f"Sample Images with Attribute: {attr}")
    plt.show()

In [None]:
# Example: show images with 'Smiling'
show_images_with_attributes('Smiling')

In [None]:
# ===============================
# 9. Preprocessing: Image Resizing & Normalization
# ===============================
def preprocess_image(image_path, size=(128, 128)):
    img = Image.open(image_path)
    img = img.resize(size)
    img = np.array(img) / 255.0  # Normalize to [0, 1]
    return img

In [None]:
# Example usage:
sample_image_path = os.path.join(IMG_FOLDER, df.index[0])
img_array = preprocess_image(sample_image_path)
print("Preprocessed image shape:", img_array.shape)