<a href="https://www.kaggle.com/code/anirudhrangu/cs6350?scriptVersionId=271827926" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# KITTI Dataset Visualization

In this step, we:
1. **Load** stereo image pairs (left and right camera) from the KITTI dataset.
2. **Read** the corresponding calibration text file to extract:
   - The **rotation matrix (R)**
   - The **translation vector (t)**
3. **Display** a few sample images with their R and t values to confirm that data loading is correct.

This helps us ensure that the image data and ground-truth poses are aligned before moving into keypoint detection and pose estimation.


In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
from mpl_toolkits.mplot3d import Axes3D

In [None]:
base_dir = "/kaggle/input/kitti-dataset"  
left_dir = os.path.join(base_dir, "data_object_image_2/training/image_2/000000.png")  # left camera images
pose_file = os.path.join(base_dir, "data_object_label_2/training/label_2/000000.txt")  # pose file

In [None]:
import pandas as pd

def parse_kitti_label_file(label_path):

    columns = [
        "type", "truncated", "occluded", "alpha",
        "bbox_xmin", "bbox_ymin", "bbox_xmax", "bbox_ymax",
        "height", "width", "length",
        "pos_x", "pos_y", "pos_z",
        "rotation_y"
    ]
    
    data = []

    try:
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if parts[0] == "DontCare":
                    continue  # skip non-labeled regions
                values = parts[:15]  # ensure only 15 fields are taken
                data.append(values)
        
        # Convert to DataFrame
        df = pd.DataFrame(data, columns=columns)
        
        # Convert numeric columns to float
        for col in columns[1:]:
            df[col] = df[col].astype(float)
        
        return df
    except:
        with exception as e:
            print("error:", e)

# Example usage:
label_file = "/kaggle/input/kitti-dataset/data_object_label_2/training/label_2/000001.txt"
df = parse_kitti_label_file(label_file)
print(df.head())


## Loading KITTI Stereo Images, Labels, and Calibration Data

The KITTI dataset includes:
- Left image: for stereo, used as reference image
- Right image: corresponding stereo pair
- Calibration file: contains camera projection matrices for the stereo cameras
- Label file: includes ground-truth 2D/3D bounding boxes, object dimensions, and location

This code dynamically reads all these files for the first image and prints relevant information for visualization and understanding.


In [2]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Base directory for KITTI dataset on Kaggle
KITTI_BASE = '/kaggle/input/kitti-dataset'

# Define relative paths (dynamic)
RIGHT_IMG_DIR = os.path.join(KITTI_BASE, 'data_object_image_2', 'training', 'image_2')
LEFT_IMG_DIR = os.path.join(KITTI_BASE, 'data_object_image_3', 'training', 'image_3')
CALIB_DIR = os.path.join(KITTI_BASE, 'data_object_calib', 'training', 'calib')
LABEL_DIR = os.path.join(KITTI_BASE, 'data_object_label_2', 'training', 'label_2')

# Function to load calibration projection matrices from calib file
def load_calibration(file_path):
    calib_data = {}
    with open(file_path, 'r') as f:
        lines = f.readlines()
        for line in lines:
            key, value = line.split(':', 1)
            values = np.array([float(v) for v in value.split()])
            if key.startswith('R') and values.size == 9:  # Rectification matrix 3x3
                calib_data[key] = values.reshape(3, 3)
            elif values.size == 12:  # Projection matrix 3x4
                calib_data[key] = values.reshape(3, 4)
            else:
                calib_data[key] = values  # Any other numeric data remains raw for now
    return calib_data


# Load first frame index
img_id = '000000'

# Load stereo images
right_img_path = os.path.join(RIGHT_IMG_DIR, f'{img_id}.png')
left_img_path = os.path.join(LEFT_IMG_DIR, f'{img_id}.png')

right_img = cv2.cvtColor(cv2.imread(right_img_path), cv2.COLOR_BGR2RGB)
left_img = cv2.cvtColor(cv2.imread(left_img_path), cv2.COLOR_BGR2RGB)

# Load calibration data
calib_path = os.path.join(CALIB_DIR, f'{img_id}.txt')
calib = load_calibration(calib_path)

# Load labels
label_path = os.path.join(LABEL_DIR, f'{img_id}.txt')
with open(label_path, 'r') as f:
    labels = f.readlines()

# Display left and right stereo images
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(left_img)
plt.title('Left Image (image_3)')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(right_img)
plt.title('Right Image (image_2)')
plt.axis('off')
plt.show()

# Print Projection matrices from calibration
print("Camera Projection Matrices (P2 - right camera, P3 - left camera):")
for key in ['P2', 'P3']:
    if key in calib:
        print(f"\n{key} matrix:\n{calib[key]}")

# Parse and print label info for visualization
print("\nDetected Objects and their Labels:")
for line in labels:
    parts = line.strip().split(' ')
    class_name = parts[0]
    truncation = float(parts[1])
    occlusion = int(parts[2])
    alpha = float(parts[3])
    bbox = list(map(float, parts[4:8]))
    dimensions = list(map(float, parts[8:11]))
    location = list(map(float, parts[11:14]))
    rotation_y = float(parts[14])

    print(f"\nClass: {class_name}")
    print(f"Truncation: {truncation}, Occlusion: {occlusion}, Alpha: {alpha}")
    print(f"2D Bounding Box: {bbox}")
    print(f"3D Dimensions (h,w,l): {dimensions}")
    print(f"3D Location (x,y,z): {location}")
    print(f"Rotation Y (radians): {rotation_y}")

    # Draw bounding box on left image for visualization
    cv2.rectangle(left_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
    cv2.putText(left_img, class_name, (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

# Show left image with bounding boxes drawn
plt.figure(figsize=(10,6))
plt.imshow(left_img)
plt.title("Left Image with Ground Truth Bounding Boxes")
plt.axis('off')
plt.show()


ValueError: not enough values to unpack (expected 2, got 1)