In [1]:
import pandas as pd
df = pd.read_csv('train_labels_m.csv')
print(df.head())

  IMAGE_ID     X1     Y1     X2     Y2     label
0    1.tif  148.0   40.0  183.0  125.0  rockfall
1   10.tif  214.0  159.0  287.0  209.0  rockfall
2   10.tif  553.0  192.0  596.0  229.0  rockfall
3   11.tif  418.0   58.0  452.0   88.0  rockfall
4   11.tif  421.0  141.0  468.0  182.0  rockfall


In [11]:
df['X1'].isna().sum()+df['label'].isna().sum()

600

In [25]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Input, Reshape
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from collections import defaultdict
import cv2
import os

# Function to load and resize the images
def load_tif_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_UNCHANGED)
    
    if img is None:
        raise FileNotFoundError(f"Image {file_path} not found.")

    # Convert grayscale to RGB if necessary
    if len(img.shape) == 2:
        img = np.stack((img,) * 3, axis=-1)
    
    return img

# Function to pad image to square
def pad_image_to_square(img, size):
    height, width = img.shape[:2]
    # Compute padding amounts
    delta_w = max(size - width, 0)
    delta_h = max(size - height, 0)
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    # Apply padding
    padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

    # Resize image to the target size
    padded_img = cv2.resize(padded_img, (size, size))
    return padded_img

# Normalize bounding boxes
def normalize_bbox(bbox, original_size):
    old_w, old_h = original_size
    x1, y1, x2, y2 = bbox
    return [x1 / old_w, y1 / old_h, x2 / old_w, y2 / old_h]

# Denormalize bounding boxes after prediction
def denormalize_bbox(bbox, original_size):
    old_w, old_h = original_size
    x1, y1, x2, y2 = bbox
    return [x1 * old_w, y1 * old_h, x2 * old_w, y2 * old_h]

# Adjust bounding boxes after padding/resizing
def adjust_bbox_after_padding(bbox, original_size, padded_size=256):
    old_w, old_h = original_size
    scale_w = padded_size / old_w
    scale_h = padded_size / old_h
    x1, y1, x2, y2 = bbox
    return [x1 * scale_w, y1 * scale_h, x2 * scale_w, y2 * scale_h]

# Load data and handle multiple bounding boxes
def load_data(image_paths, labels_df, size=256, max_boxes=10):
    images = []
    class_labels = []
    bbox_labels = []

    # Convert labels to numeric format
    labels_df['label'] = labels_df['label'].apply(lambda x: 1 if x == 'rockfall' else 0)

    # Create a dictionary to store multiple bounding boxes per image
    image_dict = defaultdict(list)

    for _, row in labels_df.iterrows():
        img_path = row['image_id']
        bbox = [row['X1'], row['Y1'], row['X2'], row['Y2']]
        image_dict[img_path].append((row['label'], bbox))

    for img_path in image_paths:
        img = load_tif_image(img_path)
        original_size = img.shape[1], img.shape[0]  # (width, height)
        img = pad_image_to_square(img, size)
        img = img / 255.0  # Normalize

        # Get all bounding boxes for this image
        if img_path in image_dict:
            bboxes = []
            labels = []
            for label, bbox in image_dict[img_path]:
                # Normalize the bounding box
                normalized_bbox = normalize_bbox(bbox, original_size)
                print(f"Normalized Bounding Box for {img_path}: {normalized_bbox}")
                adjusted_bbox = adjust_bbox_after_padding(normalized_bbox, original_size)
                bboxes.append(adjusted_bbox)
                labels.append(label)
        else:
            # No bounding boxes for this image
            bboxes = [[0, 0, 0, 0]]
            labels = [0]

        # Pad bounding boxes and labels to max_boxes length
        bboxes = pad_bboxes(bboxes, max_boxes)
        labels = pad_labels(labels, max_boxes)

        images.append(img)
        class_labels.append(labels)
        bbox_labels.append(bboxes)

    return np.array(images), np.array(class_labels), np.array(bbox_labels)

# Pad bounding boxes to max_boxes length
def pad_bboxes(bboxes, max_boxes=10):
    padded_bboxes = bboxes[:max_boxes]
    while len(padded_bboxes) < max_boxes:
        padded_bboxes.append([0, 0, 0, 0])  # Add dummy boxes
    return padded_bboxes

# Pad labels to max_boxes length
def pad_labels(labels, max_boxes=10):
    padded_labels = labels[:max_boxes]
    while len(padded_labels) < max_boxes:
        padded_labels.append(0)  # Add dummy labels
    return padded_labels

# Define the CNN model
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Conv2D, MaxPooling2D, Flatten, Dense, Reshape, Dropout, BatchNormalization, Add
)
from tensorflow.keras.models import Model

def residual_block(x, filters):
    """Residual block for improved learning."""
    shortcut = x
    x = Conv2D(filters, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Add()([x, shortcut])  # Add shortcut connection
    return x

from tensorflow.keras.layers import Conv2D, BatchNormalization, Add, Activation

def residual_block(x, filters):
    # Shortcut connection (identity mapping)
    shortcut = x

    # First convolutional layer
    x = Conv2D(filters, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)

    # Second convolutional layer
    x = Conv2D(filters, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)

    # Adjust the shortcut if the number of filters has changed
    if shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, (1, 1), padding='same')(shortcut)

    # Add shortcut connection and apply ReLU activation
    x = Add()([x, shortcut])
    x = Activation('relu')(x)

    return x

def create_complex_model(input_shape=(256, 256, 3), max_boxes=10):
    inputs = Input(shape=input_shape)

    # Initial convolutional layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Residual blocks
    x = residual_block(x, 32)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = residual_block(x, 64)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = residual_block(x, 128)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Flatten and Dense layers
    x = Flatten()(x)

    # Bounding box output
    bbox_output = Dense(max_boxes * 4, name='bbox_output')(x)
    bbox_output = Reshape((max_boxes, 4), name='bbox_reshape')(bbox_output)

    # Class output
    class_output = Dense(max_boxes, activation='sigmoid', name='class_output')(x)
    class_output = Reshape((max_boxes, 1), name='class_reshape')(class_output)

    # Model
    model = Model(inputs=inputs, outputs=[class_output, bbox_output])

    # Compile the model
    model.compile(
        optimizer='adam',
        loss=['binary_crossentropy', 'mean_squared_error'],
        metrics=[['accuracy'], ['mean_squared_error']]
    )

    return model




# Load the labels DataFrame from your CSV file
labels_df = pd.read_csv('train_labels_m.csv')
labels_df = labels_df.rename(columns={'IMAGE_ID': 'image_id'})

# Get unique image paths
image_paths = labels_df['image_id'].unique().tolist()

# Ensure all image paths are correct
image_paths = [os.path.join('train_images', img) for img in image_paths]

# Load and preprocess the data
X, y_class, y_bbox = load_data(image_paths, labels_df)

# Verify shapes
print("X shape:", X.shape)
print("y_class shape:", y_class.shape)
print("y_bbox shape:", y_bbox.shape)

# Split into training and test sets
X_train, X_test, y_train_class, y_test_class, y_train_bbox, y_test_bbox = train_test_split(
    X, y_class, y_bbox, test_size=0.2, random_state=42)

# Create and compile the model
model = create_complex_model(input_shape=(256, 256, 3), max_boxes=10)

# Train the model
model.fit(X_train, [y_train_class, y_train_bbox], epochs=8, batch_size=4,
          validation_data=(X_test, [y_test_class, y_test_bbox]))



X shape: (649, 256, 256, 3)
y_class shape: (649, 10)
y_bbox shape: (649, 10, 4)
Epoch 1/8
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 440ms/step - bbox_reshape_loss: 76.2591 - bbox_reshape_mean_squared_error: 76.2597 - class_reshape_accuracy: 0.9792 - class_reshape_loss: 0.0723 - loss: 76.3320 - val_bbox_reshape_loss: 0.0878 - val_bbox_reshape_mean_squared_error: 0.0880 - val_class_reshape_accuracy: 1.0000 - val_class_reshape_loss: 1.1921e-07 - val_loss: 0.0880
Epoch 2/8
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 353ms/step - bbox_reshape_loss: 0.3928 - bbox_reshape_mean_squared_error: 0.3928 - class_reshape_accuracy: 1.0000 - class_reshape_loss: 1.1921e-07 - loss: 0.3928 - val_bbox_reshape_loss: 0.0851 - val_bbox_reshape_mean_squared_error: 0.0858 - val_class_reshape_accuracy: 1.0000 - val_class_reshape_loss: 1.2400e-07 - val_loss: 0.0858
Epoch 3/8
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 494ms/step - bbox_reshap

<keras.src.callbacks.history.History at 0x1a65164c090>

In [26]:
# Evaluate the model
results = model.evaluate(X_test, [y_test_class, y_test_bbox])

# Unpack total loss, classification loss, and bounding box loss
total_loss = results[0]
class_loss = results[1]
bbox_loss = results[2]

print(f"Total Loss: {total_loss}, Classification Loss: {class_loss}, Bounding Box Loss: {bbox_loss}")


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 473ms/step - bbox_reshape_loss: 0.1255 - bbox_reshape_mean_squared_error: 0.1295 - class_reshape_accuracy: 1.0000 - class_reshape_loss: 1.1921e-07 - loss: 0.1295
Total Loss: 0.1280205398797989, Classification Loss: 1.1920930376163597e-07, Bounding Box Loss: 0.11608558893203735


In [27]:
def visualize_predictions(image, bboxes):
    img_copy = image.copy()  # Make a copy of the original image for drawing
    for bbox in bboxes:
        x_min, y_min, x_max, y_max = map(int, bbox)  # Convert coordinates to integers

        # Draw the rectangle on the image
        img_copy = cv2.rectangle(img_copy, (x_min, y_min), (x_max, y_max), color=(0, 255, 0), thickness=2)

    # Display the image with the bounding boxes
    cv2.imshow('Predicted Bounding Boxes', img_copy)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Assuming `test_image_paths` contains the paths of test images
def denormalize_bboxes(bboxes, original_size, bias=0.1):
    """Convert normalized bounding boxes to pixel coordinates with bias for negatives."""
    height, width = original_size
    denorm_bboxes = []
    for bbox in bboxes:
        x_min, y_min, x_max, y_max = bbox

        # Apply bias if coordinates are negative
        if x_min < 0:
            x_min = 0 + bias  # Ensure x_min is at least 'bias' pixels inside the image
        if y_min < 0:
            y_min = 0 + bias  # Ensure y_min is at least 'bias' pixels inside the image
        if x_max < 0:
            x_max = 0 + bias  # Ensure x_max is at least 'bias' pixels inside the image
        if y_max < 0:
            y_max = 0 + bias  # Ensure y_max is at least 'bias' pixels inside the image

        # Convert normalized to pixel coordinates
        denorm_bboxes.append([
            int(max(0, x_min * width)),   # Ensuring x_min is not negative
            int(max(0, y_min * height)),  # Ensuring y_min is not negative
            int(min(width, x_max * width)),  # Ensuring x_max does not exceed image width
            int(min(height, y_max * height))  # Ensuring y_max does not exceed image height
        ])
    return denorm_bboxes






def predict_and_visualize(model, image_paths, size=256):
    for img_path in image_paths:
        img = load_tif_image(img_path)  # Load image from path
        if img is None:
            print(f"Image {img_path} could not be loaded!")
            continue

        original_size = img.shape[1], img.shape[0]  # (width, height)
        padded_img = pad_image_to_square(img, size)
        padded_img = np.expand_dims(padded_img, axis=0)  # Add batch dimension

        # Prediction
        class_preds, bbox_preds = model.predict(padded_img)
        print(f"Raw bounding box predictions: {bbox_preds[0]}")  # Print raw bounding box predictions

        # Check predictions
        class_preds = (class_preds[0] > 0.7).astype(int).flatten()

        # Denormalize bounding boxes
        denorm_bboxes = denormalize_bboxes(bbox_preds[0], original_size)
        print(f"Denormalized bounding boxes: {denorm_bboxes}")  # Print denormalized bounding boxes

        # Visualize the image with bounding boxes
        visualize_predictions(img, denorm_bboxes)



test_image_paths = [
    "test_images/test10.tif",  # Ensure it's in a list
    "test_images/test11.tif",
    "test_images/neg1.tif"
]
# Pass the correct list of test image paths instead of X_test (which contains image data)
predict_and_visualize(model, test_image_paths)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403ms/step
Raw bounding box predictions: [[-340.55286    29.840252  863.5465   -346.9479  ]
 [-256.66415   749.1929    644.6697    -90.256744]
 [-396.12045   728.2723    166.54254   498.96442 ]
 [-748.1896   2254.8372    756.2518   -638.94885 ]
 [-246.75055  -886.1847    -42.48365  -238.64795 ]
 [-359.89493   597.51855   523.4731    259.78082 ]
 [-501.61465  -358.0847    448.00113   115.80741 ]
 [-484.94843   629.37054   134.2784   -656.4552  ]
 [ 514.46045   270.27786   810.80505  -320.6215  ]
 [-794.00684  -416.68307   649.8046    152.37881 ]]
Denormalized bounding boxes: [[77, 42432, 770, 142], [77, 1065352, 770, 142], [77, 1035603, 770, 1422], [77, 3206378, 770, 142], [77, 142, 77, 142], [77, 849671, 770, 1422], [77, 142, 770, 1422], [77, 894964, 770, 142], [396134, 384335, 770, 142], [77, 142, 770, 1422]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Raw bounding box predictions: [[ -618.9631  