In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

# === Configuration ===
image_dir = 'image folder'
label_csv = 'labels.csv'
img_height, img_width = 400, 400
batch_size = 1
epochs = 100
learning_rate = 1e-4

# === Load CSV and one-hot encode labels ===
label_df = pd.read_csv(label_csv)
filenames = label_df['filename'].values

label_tensors = []
label_splits = [5, 5, 4, 2]  # Class counts
label_names = ['concentration', 'composition', 'UV_treatment', 'NOM']

for i, name in enumerate(label_names):
    one_hot = tf.keras.utils.to_categorical(label_df[name], num_classes=label_splits[i])
    label_tensors.append(one_hot)

# Concatenate all label vectors into one large tensor per image
label_matrix = tf.concat(label_tensors, axis=1)

# === Load image and labels ===
def load_image_and_labels(filename, label):
    image_path = tf.strings.join([image_dir, filename], separator='/')
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [img_height, img_width]) / 255.0
    return image, {
        'concentration': label[:5],
        'composition': label[5:10],
        'UV_treatment': label[10:14],
        'NOM': label[14:]
    }

# === Dataset creation ===
dataset = tf.data.Dataset.from_tensor_slices((filenames, label_matrix))
dataset = dataset.map(load_image_and_labels, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.shuffle(buffer_size=100).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# === Split dataset ===
total_samples = len(filenames)
train_size = int(0.8 * total_samples)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

# === Model ===
base_model = VGG16(include_top=False, input_shape=(img_height, img_width, 3), weights='imagenet')
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.2)(x)

outputs = {
    'concentration': Dense(4, activation='softmax', name='concentration')(x),
    'composition': Dense(5, activation='softmax', name='composition')(x),
    'UV_treatment': Dense(4, activation='softmax', name='UV_treatment')(x),
    'NOM': Dense(2, activation='softmax', name='NOM')(x)
}

model = Model(inputs=base_model.input, outputs=outputs)

# === Compile ===
model.compile(
    optimizer=Adam(learning_rate),
    loss={
        'concentration': 'categorical_crossentropy',
        'composition': 'categorical_crossentropy',
        'UV_treatment': 'categorical_crossentropy',
        'NOM': 'categorical_crossentropy'
    },
    metrics=['accuracy']
)

# === Checkpoint ===
checkpoint = ModelCheckpoint(
    'MicoplasticMultiLabelModel.keras',
    monitor='val_loss',
    save_best_only=True,
    verbose=1,
    mode='min'
)

# === Train ===
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=epochs,
    callbacks=[checkpoint]
)

# === Save Final Model ===
model.save('MicroplasticMultiLabelModel_Final.keras')
