In [1]:
# type: ignore
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
import os
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import img_to_array, load_img 

In [2]:
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array

def preprocess_image(image_path):
    # Load the grayscale image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Check if the image is loaded successfully
    if img is None:
        print(f"Failed to load image at {image_path}")
        return None
    
    img = cv2.resize(img, (224, 224))
    
    # Convert grayscale to RGB by replicating the grayscale channel
    img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    
    # Convert image to array and expand dimensions to match model input
    img_array = img_to_array(img_rgb)
    img_array = np.expand_dims(img_array, axis=0)
    
    return img_array

# Example usage with a specific image file
image_path = r"C:\AFLW2000-HeadPose\testimg\image03537.jpg"  # Replace with an actual image filename
processed_image = preprocess_image(image_path)


In [3]:
import pandas as pd

# Load the angular data from the CSV file
csv_path = r"C:\AFLW2000-HeadPose\angle_data.csv"
angular_data = pd.read_csv(csv_path)

# Debugging step: Print the first few rows of the DataFrame
print("Loaded DataFrame:")
print(angular_data.head())

# Identify non-numeric columns
non_numeric_cols = angular_data.select_dtypes(exclude=[float, int]).columns.tolist()

if non_numeric_cols:
    print(f"Non-numeric columns found: {non_numeric_cols}")
    print("Attempting to convert non-numeric columns to numeric, if possible...")

    # Convert non-numeric columns to numeric, if possible
    for col in non_numeric_cols:
        angular_data[col] = pd.to_numeric(angular_data[col], errors='coerce')
        
    print("Converted non-numeric columns:")
    print(angular_data[non_numeric_cols].head())

# Drop rows with NaN values resulting from coercion, if any
angular_data = angular_data.dropna()

# Ensure the DataFrame now contains only numeric data
if not angular_data.select_dtypes(include=[float, int]).columns.tolist():
    print("Error: The DataFrame still does not contain numeric columns after conversion.")
else:
    # Normalize or preprocess the angular data using Z-score normalization
    angular_data_normalized = (angular_data - angular_data.mean()) / angular_data.std()

    # Debugging step: Print the normalized data
    print("Normalized DataFrame:")
    print(angular_data_normalized.head())


Loaded DataFrame:
   Unnamed: 0        img_name      pitch        yaw       roll
0           0  image00002.jpg -22.874239   1.044306   4.908885
1           1  image00004.jpg  26.932741  68.155235  17.243670
2           2  image00006.jpg -10.579652  50.485413 -13.570644
3           3  image00008.jpg -10.048456  17.143373 -21.392782
4           4  image00010.jpg -50.544579  68.640549 -59.207973
Non-numeric columns found: ['img_name']
Attempting to convert non-numeric columns to numeric, if possible...
Converted non-numeric columns:
   img_name
0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
Normalized DataFrame:
Empty DataFrame
Columns: [Unnamed: 0, img_name, pitch, yaw, roll]
Index: []


In [4]:

def build_transfer_learning_model(input_shape=(224, 224, 3)):
    # Load the pre-trained ResNet50 model
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

    # Optionally unfreeze the last few layers of ResNet50 for fine-tuning
    for layer in base_model.layers[:-10]:
        layer.trainable = False
    for layer in base_model.layers[-10:]:
        layer.trainable = True

    # Add skip connections and multi-scale feature fusion
    skip1 = base_model.get_layer('conv3_block4_out').output
    skip2 = base_model.get_layer('conv4_block6_out').output

    # Custom layers for depth estimation
    x = base_model.output
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Concatenate()([x, skip2])
    
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Concatenate()([x, skip1])
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    
    x = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    
    # Create the model
    model = models.Model(inputs=base_model.input, outputs=x)
    return model


# Build and compile the model
model = build_transfer_learning_model()
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='mse')
model.summary()

In [6]:
from sklearn.model_selection import train_test_split

def load_and_preprocess_image(image_path, target_size=(224, 224)):
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image)
    image = image / 255.0  # Normalize the image
    return image

def load_dataset(image_folder, csv_file, target_size=(224, 224)):
    df = pd.read_csv(csv_file)
    
    images = []
    angles = []
    
    for index, row in df.iterrows():
        image_path = os.path.join(image_folder, row['image_name'])
        if os.path.exists(image_path):
            image = load_and_preprocess_image(image_path, target_size)
            images.append(image)
            angles.append(row['angle'])
    
    return np.array(images), np.array(angles)

# Paths to your data
train_image_folder = r'C:\AFLW2000-HeadPose\trainimg'
test_image_folder = r'C:\AFLW2000-HeadPose\testimg'
csv_file_path = r'C:\AFLW2000-HeadPose\angle_data.csv'

# Load and preprocess the training data
X_train, y_train = load_dataset(train_image_folder, csv_file_path)
X_test, y_test = load_dataset(test_image_folder, csv_file_path)

# Train/test split (optional if you don't have a separate test set)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Build and compile the model
model = build_transfer_learning_model()
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='mse')

# Fit the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=16)

# Evaluate on the test set
test_loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')

KeyError: 'image_name'

In [None]:
import time
# Assuming your model is already built and loaded as `model`
input_shape = (224, 224)  # The input shape your model expects

# Function to preprocess the frame
def preprocess_frame(frame):
    frame = cv2.resize(frame, input_shape)  # Resize frame to match the input shape
    frame = frame / 255.0  # Normalize the frame
    frame = np.expand_dims(frame, axis=0)  # Add batch dimension
    return frame

# Initialize video capture from webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is opened correctly
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

last_capture_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    current_time = time.time()
    
    # Capture a frame every 1 second
    if current_time - last_capture_time >= 1:
        last_capture_time = current_time
        
        # Preprocess the frame
        preprocessed_frame = preprocess_frame(frame)
        
        # Get prediction from the model
        prediction = model.predict(preprocessed_frame)
        
        # Assuming the output is a single depth map, we visualize it
        depth_map = prediction[0, :, :, 0]  # Remove batch dimension and channel dimension
        depth_map = cv2.resize(depth_map, (frame.shape[1], frame.shape[0]))  # Resize depth map to original frame size
        depth_map = (depth_map * 255).astype(np.uint8)  # Convert to 8-bit image

        # Combine original frame with depth map for visualization
        combined_image = np.hstack((frame, cv2.cvtColor(depth_map, cv2.COLOR_GRAY2BGR)))

        # Show the output
        cv2.imshow('Original Frame and Depth Map', combined_image)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()