## <center> MLP Classifier applied on RGBD images  </center>

In [12]:
#import necessary libraries
import os
import torch
import numpy as np
from PIL import Image
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [13]:
data_dir = r'C:\Users\Siwar\Downloads\data'  
indoor_dir = os.path.join(data_dir, 'indoors')  # Path to the indoor images and depth maps folder
outdoor_dir = os.path.join(data_dir, 'outdoors')  # Path to the outdoor images and depth maps folder

# Function to read and process the images and depth maps
def read_data(directory):
    images = []
    depth_maps = []
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if filename.endswith('.png'):
            img = Image.open(file_path)  
            images.append(img)
        elif filename.endswith('.npy'):
            depth_map = np.load(file_path)  
            depth_maps.append(depth_map)
    return images, depth_maps

# Read indoor images and indoor depth maps
indoor_images, indoor_depth_maps = read_data(indoor_dir)
print(f"Total indoor images: {len(indoor_images)}")
print(f"Total indoor depth maps: {len(indoor_depth_maps)}")

# Read outdoor images and outdoor depth maps
outdoor_images, outdoor_depth_maps = read_data(outdoor_dir)
print(f"Total outdoor images: {len(outdoor_images)}")
print(f"Total outdoor depth maps: {len(outdoor_depth_maps)}")


Total indoor images: 829
Total indoor depth maps: 829
Total outdoor images: 834
Total outdoor depth maps: 834


In [14]:
#Function to divide an image into smaller sub-images
def divide_image(image):
    sub_images = [image]
    for _ in range(2):
        new_sub_images = []
        for sub_image in sub_images:
            width, height = sub_image.size
            sub_width = width // 2
            sub_height = height // 2
            top_left = sub_image.crop((0, 0, sub_width, sub_height))
            top_right = sub_image.crop((sub_width, 0, width, sub_height))
            bottom_left = sub_image.crop((0, sub_height, sub_width, height))
            bottom_right = sub_image.crop((sub_width, sub_height, width, height))
            new_sub_images.extend([top_left, top_right, bottom_left, bottom_right])
        sub_images = new_sub_images
    return sub_images

#Function to calculate the average of an RGB sub-image
def calculate_average_rgb(sub_images):
    averages = []
    for sub_image in sub_images:
        rgb_average = np.array(sub_image).mean(axis=(0, 1))
        averages.append(rgb_average)
    return np.array(averages)

#Function to calculate the average of a depth map sub-image
def calculate_average_depth(depth_map):
    sub_height, sub_width = depth_map.shape[0] // 4, depth_map.shape[1] // 4
    average_depth_values = []
    for i in range(4):
        for j in range(4):
            sub_image = depth_map[i * sub_height:(i + 1) * sub_height, j * sub_width:(j + 1) * sub_width]
            average_depth = np.mean(sub_image)
            average_depth_values.append(average_depth)
    return np.array(average_depth_values)

# Function to create an RGBD tensor from RGB image and its corresponding depth map
def create_rgbd_tensor(rgb_image_path, depth_map_path):
    rgb_image = Image.open(rgb_image_path)
    sub_images = divide_image_recursive(rgb_image)
    average_rgb_values = calculate_average_rgb(sub_images)
    depth_map = np.load(depth_map_path, allow_pickle=True)
    average_depth_values = calculate_average_depth(depth_map)
    matrix = np.column_stack((average_rgb_values, average_depth_values))
    rgbd_tensor = torch.tensor(matrix)
    return rgbd_tensor

In [15]:
# Creating a list of indoor RGBD tensors
indoor_tensors = []
for filename in os.listdir(indoor_dir):
    if filename.endswith('.png'):
        image_path = os.path.join(indoor_dir, filename)
        depth_map_filename = filename.split('.')[0] + '_depth.npy'
        depth_map_path = os.path.join(indoor_dir, depth_map_filename)
        image = Image.open(image_path)
        sub_images = divide_image(image)
        average_rgb_values = calculate_average_rgb(sub_images)
        depth_map = np.load(depth_map_path)
        average_depth_values = calculate_average_depth(depth_map)
        matrix = np.column_stack((average_rgb_values, average_depth_values))
        tensor = torch.tensor(matrix)
        indoor_tensors.append(torch.flatten(tensor))

# Creating a list of outdoor RGBD tensors
outdoor_tensors = []
for filename in os.listdir(outdoor_dir):
    if filename.endswith('.png'):
        image_path = os.path.join(outdoor_dir, filename)
        depth_map_filename = filename.split('.')[0] + '_depth.npy'
        depth_map_path = os.path.join(outdoor_dir, depth_map_filename)
        image = Image.open(image_path)
        sub_images = divide_image(image)
        average_rgb_values = calculate_average_rgb(sub_images)
        depth_map = np.load(depth_map_path)
        average_depth_values = calculate_average_depth(depth_map)
        matrix = np.column_stack((average_rgb_values, average_depth_values))
        tensor = torch.tensor(matrix)
        outdoor_tensors.append(torch.flatten(tensor))

In [16]:
# shape of every indoor or outdoor tensor
indoor_tensors[0].shape, outdoor_tensors[0].shape

(torch.Size([64]), torch.Size([64]))

In [17]:
# Split into training and testing sets
indoor_tensors_train, indoor_tensors_test = train_test_split(indoor_tensors, test_size=0.2, random_state=42)
outdoor_tensors_train, outdoor_tensors_test = train_test_split(outdoor_tensors, test_size=0.2, random_state=42)

# Print the shapes of the train and test sets
print("Indoor tensors - Train:", len(indoor_tensors_train))
print("Indoor tensors - Test:", len(indoor_tensors_test))
print("Outdoor tensors - Train:", len(outdoor_tensors_train))
print("Outdoor tensors - Test:", len(outdoor_tensors_test))

Indoor tensors - Train: 663
Indoor tensors - Test: 166
Outdoor tensors - Train: 667
Outdoor tensors - Test: 167


In [18]:
#train data and labels
indoor_train_data = torch.stack(indoor_tensors_train)
outdoor_train_data = torch.stack(outdoor_tensors_train)
train_data = torch.cat([indoor_train_data, outdoor_train_data])
train_labels = [0] * len(indoor_tensors_train) + [1] * len(outdoor_tensors_train)

#test data and labels
indoor_test_data = torch.stack(indoor_tensors_test)
outdoor_test_data = torch.stack(outdoor_tensors_test)
test_data = torch.cat([indoor_test_data, outdoor_test_data])
test_labels = [0] * len(indoor_tensors_test) + [1] * len(outdoor_tensors_test)

In [19]:
len(train_data), len(test_data)

(1330, 333)

In [20]:
#mlp classifier
mlp = MLPClassifier(hidden_layer_sizes=(100,), batch_size=200, solver='adam', learning_rate_init=0.001, activation='relu', alpha= 0.001,  random_state=42)
mlp.fit(train_data, train_labels)

In [21]:
predictions = mlp.predict(test_data)

In [22]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy:", round(accuracy,2))

# Calculate recall
recall = recall_score(test_labels, predictions)
print("Recall:", round(recall,2))

# Calculate precision
precision = precision_score(test_labels, predictions)
print("Precision:", round(precision,2))

# Calculate F1 score
f1 = f1_score(test_labels, predictions)
print("F1 Score:", round(f1,2))

# Calculate AUC-ROC score
auc = roc_auc_score(test_labels, predictions)
print("AUC-ROC:", round(auc,2))


Accuracy: 0.8
Recall: 0.9
Precision: 0.75
F1 Score: 0.82
AUC-ROC: 0.8
