### Imports

In [10]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split

First we have to transform our labels data set into a csv file

In [7]:
# Iterate over each label file in the labels folder
def create_csv_annotations(images_folder, labels_folder, name):
    annotations = []
    image_width = 300
    image_height = 300

    for label_file in os.listdir(labels_folder):
        if label_file.endswith('.txt'):
            with open(os.path.join(labels_folder, label_file), 'r') as f:
                lines = f.readlines()
            
            image_name = os.path.splitext(label_file)[0] + '.jpg'
            image_path = os.path.join(images_folder, image_name)
            
            for line in lines:
                class_label, x_center, y_center, width, height = map(float, line.split())
                x_min = (x_center - width / 2)
                y_min = (y_center - height / 2)
                x_max = (x_center + width / 2)
                y_max = (y_center + height / 2)
                
                annotations.append([image_path, x_min, y_min, x_max, y_max, image_width, image_height, class_label])

        # Here we create a DataFrame from annotations list and then we convert the df into a csv file
        df = pd.DataFrame(annotations, columns=['img_path', 'xmin', 'ymin', 'xmax', 'ymax', 'width', 'height', 'label'])
        df.to_csv(name, index=False)

In [8]:
create_csv_annotations('images/train', 'labels/train', 'annotations_train.csv')

In [9]:
create_csv_annotations('images/val', 'labels/val', 'annotations_val.csv')

[Tensor flow intro -> Why sequential](https://towardsdatascience.com/a-comprehensive-introduction-to-tensorflows-sequential-api-and-model-for-deep-learning-c5e31aee49fa#:~:text=The%20sequential%20model%20allows%20us,for%20building%20deep%20learning%20models.)

[Input and output shapes for CNN](https://towardsdatascience.com/understanding-input-and-output-shapes-in-convolution-network-keras-f143923d56ca)

[Basics of the R-CNN model](https://towardsdatascience.com/object-detection-explained-r-cnn-a6c813937a76)

In [11]:
# Load annotations from CSV
train_annotations = pd.read_csv('annotations_train.csv')
val_annotations = pd.read_csv('annotations_val.csv')

#train_annotations, val_annotations = train_test_split(annotations, test_size=0.2, random_state=42)


input_shape = (300, 300, 3)  # height, width, depth (this is the # of color channels RGB = 3)

# Function to preprocess image and annotations -> this is because 
# the annotations are still not in the format required for TF
def preprocess_data(annotation):
    image = load_img(annotation['img_path'], target_size=(input_shape[0], input_shape[1]))
    image_array = img_to_array(image)
    image_array /= 255.0
    bbox = [annotation['xmin'], annotation['ymin'], annotation['xmax'], annotation['ymax']]
    label = annotation['label']
    return image_array, bbox, label

train_data = train_annotations.apply(preprocess_data, axis=1)
val_data = val_annotations.apply(preprocess_data, axis=1)

# Convert preprocessed data into arrays -> this is the format needed for TF
X_train, y_train_bbox, y_train_label = zip(*train_data)
X_val, y_val_bbox, y_val_label = zip(*val_data)

# Convert lists to numpy arrays
X_train = tf.convert_to_tensor(X_train)
y_train_bbox = tf.convert_to_tensor(y_train_bbox)
y_train_label = tf.convert_to_tensor(y_train_label)
X_val = tf.convert_to_tensor(X_val)
y_val_bbox = tf.convert_to_tensor(y_val_bbox)
y_val_label = tf.convert_to_tensor(y_val_label)

# This is the RCNN model, this is just base model for testing
def create_rcnn_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),#batch_input_shape (if you wanted to give the batch_size)
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(), # Here we are basically changing the 4D output of the CNN to 2D so that we can use Dense
        layers.Dense(64, activation='relu'),
        layers.Dense(num_classes, activation='softmax', name='classifier_output')
    ])
    return model

# Number of classes
num_classes = train_annotations['label'].nunique()

# Create an instance of the R-CNN model
rcnn_model = create_rcnn_model(input_shape, num_classes)

# Compile the model with appropriate losses and metrics
rcnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
rcnn_model.fit(X_train, y_train_label, validation_data=(X_val, y_val_label), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1f233511c70>

In [18]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

y_pred = np.argmax(rcnn_model.predict(X_val), axis=1)

y_val_label = np.array(y_val_label, dtype=int)
y_pred = np.array(y_pred, dtype=int)

target_names = ['Vehiculos', 'Construcciones', 'Vias', 'Rios', 'Mineria']  # Get unique class labels
print(classification_report(y_val_label, y_pred, target_names=target_names))

conf_matrix = confusion_matrix(y_val_label, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

                precision    recall  f1-score   support

     Vehiculos       0.00      0.00      0.00         5
Construcciones       0.42      0.38      0.40        29
          Vias       0.60      0.62      0.61        24
          Rios       0.91      0.89      0.90        76
       Mineria       0.17      0.25      0.20        16

      accuracy                           0.65       150
     macro avg       0.42      0.43      0.42       150
  weighted avg       0.65      0.65      0.65       150

Confusion Matrix:
[[ 0  4  0  0  1]
 [ 0 11  3  0 15]
 [ 0  2 15  6  1]
 [ 0  0  5 68  3]
 [ 0  9  2  1  4]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
print(target_names)

['3.0' '2.0' '1.0' '4.0' '0.0']
