# MLP Autoencoder
This model was generated using Claude 3.5 Sonnet, with the following prompt:
" write an autoencoder in python that takes in a dataset of images of eyes,
along with a csv containing the names of each image and the corresponding
label for the color ofthe iris. The Autoencoder should take the images and
vectorieze them. Then, somehow, create vectors containing important data
that represents the color of the iris. For example, take all of the vectors
representing brown eyes, and average them out, such that the 'browness' is
extracted. The classes are gray, brown, blue, green, and hazel."


This concept is very complex, and we cannot expect an LLM to generate the perfect solution.

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Dense, Flatten, Reshape
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder

In [None]:
# GPU Configuration
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for gpu in physical_devices:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is available and configured")
    except RuntimeError as e:
        print(f"GPU configuration error: {e}")
else:
    print("No GPU found, using CPU")

# Enable mixed precision training
tf.keras.mixed_precision.set_global_policy('mixed_float16')

No GPU found, using CPU


In [None]:
IMG_SIZE = 128
LATENT_DIM = 256
BATCH_SIZE = 32  # Increase batch size for GPU
EPOCHS = 50

# Rest of your functions remain the same
def load_dataset(image_folder, csv_path):
    # Read CSV
    df = pd.read_csv(csv_path)

    # Initialize arrays
    images = []
    labels = []

    # Load and preprocess images
    for idx, row in df.iterrows():
        img_path = os.path.join(image_folder, row['filename'])
        img = cv2.imread(img_path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = img.astype('float32') / 255.0
        images.append(img)
        labels.append(row['label'])

    return np.array(images), np.array(labels)

In [None]:






def build_autoencoder():
    # Encoder
    input_img = Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)

    # Flatten and encode
    x = Flatten()(x)
    encoded = Dense(LATENT_DIM, activation='relu')(x)

    # Decoder
    x = Dense(16*16*128, activation='relu')(encoded)
    x = Reshape((16, 16, 128))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

    # Create models
    autoencoder = Model(input_img, decoded)
    encoder = Model(input_img, encoded)

    return autoencoder, encoder

def extract_color_features(encoder, images, labels):
    # Get encoded features
    encoded_features = encoder.predict(images)

    # Create dictionary to store features by color
    color_features = {}
    unique_labels = np.unique(labels)

    # Group features by color
    for color in unique_labels:
        color_mask = labels == color
        color_features[color] = {
            'mean_vector': np.mean(encoded_features[color_mask], axis=0),
            'std_vector': np.std(encoded_features[color_mask], axis=0),
            'samples': encoded_features[color_mask]
        }

    return color_features

def main():
    # Log device placement
    tf.debugging.set_log_device_placement(True)

    # Paths
    #image_folder = r"C:\Users\mnest\OneDrive\URI\SEM 5 (Fall 24)\CSC 461\UBIRISv2\ubiris2_2\CLASSES_400_300_Part2"
    #csv_path = r"C:\Users\mnest\OneDrive\URI\SEM 5 (Fall 24)\CSC 461\UBIRISv2\iris_labels_part1.csv"



    image_folder = '/content/drive/MyDrive/ML final project/datasets/CLASSES_400_300_Part2/'
    csv_path = '/content/drive/MyDrive/ML final project/datasets/iris_labelsShort.csv'



    #csv_file = '/content/drive/MyDrive/ML final project/datasets/iris_labelsShort.csv'  # Path to your CSV file
    #root_dir = '/content/drive/MyDrive/ML final project/datasets/CLASSES_400_300_Part2/'

    with tf.device('/GPU:0'):  # Force operations to run on GPU
        # Load dataset
        images, labels = load_dataset(image_folder, csv_path)

        # Build and compile autoencoder with GPU optimizations
        autoencoder, encoder = build_autoencoder()
        autoencoder.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss='mse',
            jit_compile=True  # XLA compilation for faster training
        )

        # Train autoencoder with GPU optimizations
        autoencoder.fit(
            images,
            images,
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
            shuffle=True,
            validation_split=0.2,
            #workers=4
        )

        # Extract color features
        color_features = extract_color_features(encoder, images, labels)

        # Print results
        for color, features in color_features.items():
            print(f"\nColor: {color}")
            print(f"Mean vector shape: {features['mean_vector'].shape}")
            print(f"Number of samples: {len(features['samples'])}")

if __name__ == "__main__":
    main()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Range in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Mean in device /job:localhost/replica:0/task:0/device:CPU:0
[1m23/74[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m4:02[0m 5s/step - loss: 0.0138Executing op __inference_one_step_on_iterator_4905 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Range in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Mean in device /job:localhost/replica:0/task:0/device:CPU:0
[1m24/74[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m3:56[0m 5s/step - loss: 0.0137Executing op __inference_one_step_on_iterator_4905 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op 

KeyboardInterrupt: 