In [12]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ProgbarLogger
import pandas as pd
import os

# Load and prepare the labels
labels_df = pd.read_csv("labels.csv")
print(labels_df.head())
labels_df.columns = labels_df.columns.str.strip()
print(labels_df.columns)


# Set up data generators
data_generator = ImageDataGenerator(rescale=1.0 / 255)  # Add other augmentations here for training


# Function to create data generators
def create_data_generator(directory, labels_df, batch_size=32):
    image_files = os.listdir(directory)

    # Adjust these column names to match your CSV structure
    image_column = 'Pothole number'  # The column with the image filenames
    label_column = "Bags used "  # The column with the labels

    # Convert image filenames to match the format in the CSV (e.g., remove 'p' prefix and '.jpg' suffix)
    image_files = [f.replace("p", "").replace(".jpg", "") for f in image_files]

    # Filter the dataframe to only include images present in the directory
    valid_labels = labels_df[labels_df[image_column].isin(image_files)]

    # Restore image file names (add back the 'p' prefix and '.jpg' suffix)
    valid_labels[image_column] = "p" + valid_labels[image_column].astype(str) + ".jpg"

    return data_generator.flow_from_dataframe(
        dataframe=valid_labels, directory=directory, x_col=image_column, y_col=label_column, target_size=(224, 224), batch_size=batch_size, class_mode="raw", shuffle=False  # For regression tasks
    )


# Create generators for each set
train_generator = create_data_generator("./training", labels_df)
val_generator = create_data_generator("./validation", labels_df)
test_generator = create_data_generator("./testing", labels_df)

# Define the model
def create_model(input_shape):
    model = models.Sequential(
        [
            # Data augmentation layer
            layers.RandomFlip("horizontal"),
            layers.RandomRotation(0.1),
            layers.RandomZoom(0.1),
            # Convolutional layers
            layers.Conv2D(32, (3, 3), activation="relu", input_shape=input_shape),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation="relu"),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation="relu"),
            # Flatten and fully connected layers
            layers.Flatten(),
            layers.Dense(64, activation="relu"),
            layers.Dropout(0.5),
            layers.Dense(32, activation="relu", name="last_hidden"),
            layers.Dense(1),  # Regression output
        ]
    )
    return model

# Define and compile the model (using the create_model function from before)
model = create_model((224, 224, 3))  # Adjust input shape as needed
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Train the model
early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
progbar = ProgbarLogger(count_mode="steps", stateful_metrics=None)

history = model.fit(train_generator, validation_data=val_generator, epochs=100, callbacks=[early_stopping, progbar])

# Evaluate on test set
test_loss, test_mae = model.evaluate(test_generator)
print(f"Test MAE: {test_mae}")

# Extract features from the last hidden layer
feature_extractor = tf.keras.Model(inputs=model.inputs, outputs=model.get_layer("last_hidden").output)


# Function to extract features
def extract_features(generator, feature_extractor):
    features = feature_extractor.predict(generator)
    return features


# Extract features for each set
train_features = extract_features(train_generator, feature_extractor)
val_features = extract_features(val_generator, feature_extractor)
test_features = extract_features(test_generator, feature_extractor)

   Pothole number  Bags used 
0             101         0.5
1             102         1.0
2             106         0.5
3             107         0.5
4             109         0.5
Index(['Pothole number', 'Bags used'], dtype='object')


KeyError: 'Pothole number'