In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
import os
import cv2
import struct
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array, load_img

In [7]:
dataset_dir = 'dataset/'

img_size = (224, 224)

In [19]:

def load_pose_txt(file_path):
    """Load pose from text file."""
    with open(file_path, 'r') as f:
        pose = [float(x) for x in f.readline().split()]
    return np.array(pose)

def load_depth_bin(file_path):
    """Load depth data from binary file, attempt to reshape, and resize to (224, 224)."""
    try:
        with open(file_path, 'rb') as f:
            buffer = f.read()
            print(f"File: {file_path}, Buffer size: {len(buffer)} bytes")

            # Try interpreting the buffer as float32 data
            try:
                depth_data = np.frombuffer(buffer, dtype=np.float32)
                print(f"Loaded data as float32 with length: {len(depth_data)}")
            except ValueError:
                print(f"Could not interpret {file_path} as float32.")
                return None

            # Attempt to reshape the data to its likely original dimensions
            possible_shapes = [(224, 224), (640, 480)]  # Add possible dimensions here
            for shape in possible_shapes:
                if depth_data.size == np.prod(shape):
                    depth_img = depth_data.reshape(shape)
                    print(f"Successfully reshaped to {shape}")
                    # Resize the depth image to the target shape (224, 224)
                    depth_img_resized = cv2.resize(depth_img, (224, 224))
                    return depth_img_resized

            print(f"Could not reshape depth data: Could not reshape array of size {depth_data.size} into known shapes")
            return None  # Skip this file

    except Exception as e:
        print(f"Error loading depth data from {file_path}: {e}")
        return None

def preprocess_biwi_dataset(dataset_dir):
    """Traverse the BIWI dataset directory and preprocess the images and depth maps."""
    rgb_images = []
    depth_images = []
    poses = []
    masks = []
    
    sub_dirs = ['faces_0', 'db_annotations', 'head_pose_marks']

    for sub_dir in sub_dirs:
        sub_dir_path = os.path.join(dataset_dir, sub_dir)

        if not os.path.isdir(sub_dir_path):
            print(f"Skipping non-directory {sub_dir_path}")
            continue

        for folder in os.listdir(sub_dir_path):
            folder_path = os.path.join(sub_dir_path, folder)
            
            if not os.path.isdir(folder_path):
                print(f"Skipping non-directory {folder_path}")
                continue

            for file_name in os.listdir(folder_path):
                if sub_dir == 'faces_0' and file_name.endswith('_rgb.png'):
                    rgb_img_path = os.path.join(folder_path, file_name)
                    rgb_img = cv2.imread(rgb_img_path)
                    rgb_img = cv2.resize(rgb_img, (224, 224))
                    rgb_images.append(rgb_img)

                    # Load corresponding Depth data
                    depth_bin_path = rgb_img_path.replace('_rgb.png', '_depth.bin')
                    depth_img = load_depth_bin(depth_bin_path)
                    if depth_img is not None:
                        depth_images.append(depth_img)

                elif sub_dir == 'db_annotations' and file_name.endswith('_pose.bin'):
                    pose_bin_path = os.path.join(folder_path, file_name)
                    pose_data = np.fromfile(pose_bin_path, dtype=np.float32)
                    poses.append(pose_data)

                elif sub_dir == 'head_pose_marks' and file_name.endswith('.png'):
                    mask_img_path = os.path.join(folder_path, file_name)
                    mask_img = cv2.imread(mask_img_path, cv2.IMREAD_GRAYSCALE)
                    mask_img = cv2.resize(mask_img, (224, 224))
                    masks.append(mask_img)
                    
    return rgb_images, depth_images, poses, masks

In [20]:
# Use the function to preprocess the dataset
dataset_dir = 'dataset/'
rgb_images, depth_images, poses, masks = preprocess_biwi_dataset(dataset_dir)

# Normalize images and masks
rgb_images = rgb_images / 255.0
depth_images = depth_images / np.max(depth_images)
masks = masks / 255.0

# Prepare TensorFlow dataset
train_dataset = tf.data.Dataset.from_tensor_slices((rgb_images, depth_images, masks))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)

File: dataset/faces_0\01\frame_00003_depth.bin, Buffer size: 116280 bytes
Loaded data as float32 with length: 29070
Could not reshape depth data: Could not reshape array of size 29070 into known shapes
File: dataset/faces_0\01\frame_00004_depth.bin, Buffer size: 115952 bytes
Loaded data as float32 with length: 28988
Could not reshape depth data: Could not reshape array of size 28988 into known shapes
File: dataset/faces_0\01\frame_00005_depth.bin, Buffer size: 115992 bytes
Loaded data as float32 with length: 28998
Could not reshape depth data: Could not reshape array of size 28998 into known shapes
File: dataset/faces_0\01\frame_00006_depth.bin, Buffer size: 116056 bytes
Loaded data as float32 with length: 29014
Could not reshape depth data: Could not reshape array of size 29014 into known shapes
File: dataset/faces_0\01\frame_00007_depth.bin, Buffer size: 115896 bytes
Loaded data as float32 with length: 28974
Could not reshape depth data: Could not reshape array of size 28974 into kno

KeyboardInterrupt: 

In [None]:

# Updated function to handle your dataset structure
def preprocess_biwi_image(rgb_img_path, depth_img_path, img_size=(224, 224)):
    # Load RGB image
    rgb_img = load_img(rgb_img_path, target_size=img_size)
    rgb_img = img_to_array(rgb_img)

    # Load Depth image
    depth_img = cv2.imread(depth_img_path, cv2.IMREAD_UNCHANGED)
    depth_img = cv2.resize(depth_img, img_size)
    
    # Normalize depth image (optional)
    depth_img = depth_img.astype('float32')
    depth_img = depth_img / np.max(depth_img)  # Normalize to range [0, 1]

    return rgb_img, depth_img

def save_preprocessed_data(output_dir, rgb_img, depth_img, file_id):
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Save RGB and Depth images
    np.save(os.path.join(output_dir, f'{file_id}_rgb.npy'), rgb_img)
    np.save(os.path.join(output_dir, f'{file_id}_depth.npy'), depth_img)

def preprocess_biwi_dataset(dataset_dir, output_dir, img_size=(224, 224)):
    faces_dir = os.path.join(dataset_dir, 'faces_0')
    depth_dir = os.path.join(dataset_dir, 'head_pose_marks')
    annotations_dir = os.path.join(dataset_dir, 'db_annotations')

    subjects = [d for d in os.listdir(faces_dir) if os.path.isdir(os.path.join(faces_dir, d))]
    
    for subject in subjects:
        subject_face_path = os.path.join(faces_dir, subject)
        subject_depth_path = os.path.join(depth_dir, subject)
        subject_annotations_path = os.path.join(annotations_dir, subject)

        frames = sorted(os.listdir(subject_face_path))
        
        for frame in frames:
            frame_id = frame.split('.')[0]  # Assuming frames are named like 001.png
            rgb_img_path = os.path.join(subject_face_path, frame)
            depth_img_path = os.path.join(subject_depth_path, f'{frame_id}.png')

            # Check if the necessary files exist
            if not os.path.exists(rgb_img_path) or not os.path.exists(depth_img_path):
                continue

            # Preprocess the images
            rgb_img, depth_img = preprocess_biwi_image(rgb_img_path, depth_img_path, img_size)

            # Save the preprocessed data
            file_id = f'{subject}_{frame_id}'
            save_preprocessed_data(output_dir, rgb_img, depth_img, file_id)

            print(f'Processed and saved {file_id}')


In [3]:

def build_transfer_learning_model(input_shape=(224, 224, 3)):
    # Load the pre-trained ResNet50 model
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

    # Optionally unfreeze the last few layers of ResNet50 for fine-tuning
    for layer in base_model.layers[:-10]:
        layer.trainable = False
    for layer in base_model.layers[-10:]:
        layer.trainable = True

    # Add skip connections and multi-scale feature fusion
    skip1 = base_model.get_layer('conv3_block4_out').output
    skip2 = base_model.get_layer('conv4_block6_out').output

    # Custom layers for depth estimation
    x = base_model.output
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Concatenate()([x, skip2])
    
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Concatenate()([x, skip1])
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    
    x = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    
    # Create the model
    model = models.Model(inputs=base_model.input, outputs=x)
    return model


# Build and compile the model
model = build_transfer_learning_model()
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='mse')
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_2[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                            