In [2]:
# -----------------------------------
# Step 1: Imports and Utility Functions
# -----------------------------------
import rosbag
import open3d as o3d
import numpy as np
import sensor_msgs.point_cloud2 as pc2
import os
import glob
import csv

def ros_point_cloud2_to_o3d(point_cloud_msg):
    """
    Convert a sensor_msgs/PointCloud2 message to an Open3D PointCloud.
    """
    points = np.array(list(pc2.read_points(point_cloud_msg, skip_nans=True, field_names=("x", "y", "z"))))
    o3d_pcd = o3d.geometry.PointCloud()
    o3d_pcd.points = o3d.utility.Vector3dVector(points)
    return o3d_pcd

def save_processed_point_cloud(o3d_pcd, output_directory, bag_file_name, msg_index):
    """
    Save the processed Open3D point cloud to a file with a unique index.
    """
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_file_path = os.path.join(output_directory, f"{os.path.splitext(bag_file_name)[0]}_{msg_index}.pcd")
    o3d.io.write_point_cloud(output_file_path, o3d_pcd)
    print(f"Saved {output_file_path}")


Jupyter environment detected. Enabling Open3D WebVisualizer.


AttributeError: partially initialized module 'open3d' has no attribute 'visualization' (most likely due to a circular import)

In [None]:
# -----------------------------------
# Step 2: Define Processing Function
# -----------------------------------
def process_rosbag(bag_file, topic_name, base_output_directory, error_log_file):
    """
    Process a ROS bag file to convert all messages on a topic to PCD files and aggregate them.
    If any error arises, log the error with the file name into a CSV file.
    """
    bag_name = os.path.splitext(os.path.basename(bag_file))[0]
    output_directory = os.path.join(base_output_directory, bag_name)
    
    # Skip processing if the file has already been processed
    grand_pcd_path = os.path.join(output_directory, f"{bag_name}_grand.pcd")
    if os.path.exists(grand_pcd_path):
        print(f"{bag_file} already processed. Skipping...")
        return
    
    try:
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        bag = rosbag.Bag(bag_file, "r")
        all_pcds = []
        for index, (topic, msg, t) in enumerate(bag.read_messages(topics=[topic_name])):
            o3d_pcd = ros_point_cloud2_to_o3d(msg)
            save_processed_point_cloud(o3d_pcd, output_directory, bag_name, index)
            all_pcds.append(o3d_pcd)
        bag.close()

        # Aggregate all PCDs into one grand PCD file
        grand_pcd = o3d.geometry.PointCloud()
        for pcd in all_pcds:
            grand_pcd += pcd
        o3d.io.write_point_cloud(grand_pcd_path, grand_pcd)
        print(f"Aggregated PCD saved to {grand_pcd_path}")
    except Exception as e:
        print(f"Error processing {bag_file}: {e}")
        with open(error_log_file, mode='a') as file:
            writer = csv.writer(file)
            writer.writerow([bag_file, str(e)])


In [None]:
# -----------------------------------
# Step 3: Set Directory Paths and Topic Name
# -----------------------------------
source_directory = os.path.expanduser("/home/nrelab-titan/Desktop/shovon/data/rosbags_cyglidar/rosbags_for_training")
base_output_directory = os.path.join(source_directory, "processed_pointclouds_combined")
point_cloud_topic = "/scan_3D"
error_log_file = os.path.join(base_output_directory, "error_log.csv")

# Ensure the error log file exists
if not os.path.exists(base_output_directory):
    os.makedirs(base_output_directory)

if not os.path.exists(error_log_file):
    with open(error_log_file, mode='w') as file:
        writer = csv.writer(file)
        writer.writerow(["Bag File", "Error"])


In [None]:
# ---------------------------------------
# Step 4: Process Each Bag File in Subdirectories
# ---------------------------------------
folders = ["stairs", "non_stairs"]

for folder in folders:
    folder_path = os.path.join(source_directory, folder)
    output_folder_path = os.path.join(base_output_directory, folder)
    
    bag_files = glob.glob(os.path.join(folder_path, "*.bag"))
    print(f"Found {len(bag_files)} rosbag(s) in {folder} to process.")
    
    for bag_file in bag_files:
        print(f"Processing {bag_file} in {folder}...")
        process_rosbag(bag_file, point_cloud_topic, output_folder_path, error_log_file)


In [None]:
# ---------------------------------------
# Step 5: Define Downsampling Functions
# ---------------------------------------
def downsample_point_cloud(pcd, voxel_size):
    """
    Downsample the given point cloud using a voxel grid filter with the specified voxel size.
    """
    downsampled_pcd = pcd.voxel_down_sample(voxel_size)
    return downsampled_pcd

def downsample_pcd_files(input_directory, output_directory, voxel_size=0.05, error_log_file="downsample_error_log.csv"):
    """
    Downsample all PCD files in the input directory and save them to the output directory.
    Log any errors to a CSV file.
    """
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    if not os.path.exists(error_log_file):
        with open(error_log_file, mode='w') as file:
            writer = csv.writer(file)
            writer.writerow(["PCD File", "Error"])
    
    pcd_files = glob.glob(os.path.join(input_directory, "*.pcd"))
    for pcd_file in pcd_files:
        try:
            print(f"Reading {pcd_file}")
            pcd = o3d.io.read_point_cloud(pcd_file)
            downsampled_pcd = downsample_point_cloud(pcd, voxel_size)
            output_file_path = os.path.join(output_directory, os.path.basename(pcd_file))
            o3d.io.write_point_cloud(output_file_path, downsampled_pcd)
            print(f"Downsampled and saved {output_file_path}")
        except Exception as e:
            print(f"Error processing {pcd_file}: {e}")
            with open(error_log_file, mode='a') as file:
                writer = csv.writer(file)
                writer.writerow([pcd_file, str(e)])


In [None]:
# ---------------------------------------
# Step 6: Set Downsampled Output Directory
# ---------------------------------------
downsampled_output_directory = os.path.expanduser("/home/nrelab-titan/Desktop/shovon/data/rosbags_cyglidar/rosbags_for_training/downsampled")

# Ensure that the downsampled output directory exists
if not os.path.exists(downsampled_output_directory):
    os.makedirs(downsampled_output_directory)


In [None]:
# ----------------------------------------------
# Step 7: Downsample PCD Files in Subdirectories
# ----------------------------------------------
folders = ["stairs", "non_stairs"]
error_log_file = os.path.join(downsampled_output_directory, "downsample_error_log.csv")

for folder in folders:
    subfolders = os.listdir(os.path.join(base_output_directory, folder))
    for subfolder in subfolders:
        processed_folder_path = os.path.join(base_output_directory, folder, subfolder)
        downsampled_folder_path = os.path.join(downsampled_output_directory, folder, subfolder)
        
        print(f"Downsampling PCD files in {folder}/{subfolder}...")
        downsample_pcd_files(processed_folder_path, downsampled_folder_path, voxel_size=0.05, error_log_file=error_log_file)


In [None]:
# ----------------------------------------------
# Step 8: Define PCA Function
# ----------------------------------------------
def perform_pca(pcd):
    """
    Perform PCA on the given point cloud.
    Returns eigenvalues and eigenvectors.
    """
    points = np.asarray(pcd.points)
    if points.shape[0] < 50:  # Check if there are enough points (threshold is 50)
        raise ValueError("Point cloud does not have enough points for PCA")
    
    cov_matrix = np.cov(points, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
    # Sort the eigenvalues and eigenvectors
    sort_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sort_indices]
    eigenvectors = eigenvectors[:, sort_indices]
    return eigenvalues, eigenvectors



In [None]:
# ----------------------------------------------
# Step 9: Define PCA Processing Function
# ----------------------------------------------
def process_pca_for_downsampled_pcds(input_directory, output_csv):
    """
    Perform PCA on all downsampled PCD files in the input directory and store results in a CSV file.
    Log any errors and files with insufficient points to separate CSV files.
    """
    if not os.path.exists(os.path.dirname(output_csv)):
        os.makedirs(os.path.dirname(output_csv))
    
    error_log_file = os.path.join(os.path.dirname(output_csv), "pca_error_log.csv")
    insufficient_points_log_file = os.path.join(os.path.dirname(output_csv), "pca_insufficient_points_log.csv")
    
    with open(output_csv, mode='w') as file, open(error_log_file, mode='w') as err_file, open(insufficient_points_log_file, mode='w') as ins_file:
        writer = csv.writer(file)
        err_writer = csv.writer(err_file)
        ins_writer = csv.writer(ins_file)
        
        header = ["File Name", "Label"] + [f"Eigenvalue_{i+1}" for i in range(3)] + [f"Eigenvector_{i+1}_X" for i in range(3)] + [f"Eigenvector_{i+1}_Y" for i in range(3)] + [f"Eigenvector_{i+1}_Z" for i in range(3)]
        writer.writerow(header)
        err_writer.writerow(["PCD File", "Error"])
        ins_writer.writerow(["PCD File", "Number of Points"])
        
        folders = {"stairs": 1, "non_stairs": 0}
        for folder, label in folders.items():
            subfolders = os.listdir(os.path.join(input_directory, folder))
            for subfolder in subfolders:
                pcd_files = glob.glob(os.path.join(input_directory, folder, subfolder, "*.pcd"))
                for pcd_file in pcd_files:
                    try:
                        print(f"Performing PCA on {pcd_file}")
                        pcd = o3d.io.read_point_cloud(pcd_file)
                        points = np.asarray(pcd.points)
                        if points.shape[0] < 50:
                            raise ValueError(f"Insufficient points: {points.shape[0]}")
                        eigenvalues, eigenvectors = perform_pca(pcd)
                        row = [os.path.basename(pcd_file), label] + eigenvalues.tolist() + eigenvectors[:, 0].tolist() + eigenvectors[:, 1].tolist() + eigenvectors[:, 2].tolist()
                        writer.writerow(row)
                        print(f"PCA results saved for {pcd_file}")
                    except ValueError as ve:
                        if "Insufficient points" in str(ve):
                            print(f"Insufficient points for {pcd_file}: {points.shape[0]}")
                            ins_writer.writerow([pcd_file, points.shape[0]])
                        else:
                            print(f"Error performing PCA on {pcd_file}: {ve}")
                            err_writer.writerow([pcd_file, str(ve)])
                    except Exception as e:
                        print(f"Error performing PCA on {pcd_file}: {e}")
                        err_writer.writerow([pcd_file, str(e)])
    
    return error_log_file, insufficient_points_log_file



In [None]:
# ----------------------------------------------
# Step 10: Set Output CSV Path and Perform PCA
# ----------------------------------------------
output_csv = os.path.join(source_directory, "pca_results.csv")
error_log_file, insufficient_points_log_file = process_pca_for_downsampled_pcds(downsampled_output_directory, output_csv)

# Print the number of rows in the error and insufficient points CSV files
error_log = pd.read_csv(error_log_file)
insufficient_points_log = pd.read_csv(insufficient_points_log_file)
print(f"Number of rows in error log: {len(error_log)}")
print(f"Number of rows in insufficient points log: {len(insufficient_points_log)}")

In [None]:
# ----------------------------------------------
# Step 11: Import Necessary Libraries for Machine Learning
# ----------------------------------------------
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# ----------------------------------------------
# Step 12: Load and Preprocess the PCA Results
# ----------------------------------------------
# Load the PCA results CSV
pca_results_csv = os.path.join(source_directory, "pca_results.csv")
pca_data = pd.read_csv(pca_results_csv)

# Extract features and labels
X = pca_data.drop(columns=["File Name", "Label"])
y = pca_data["Label"]

In [None]:
# ----------------------------------------------
# Step 13: Perform 5-Fold Cross-Validation
# ----------------------------------------------

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Set up the SVM classifier
svm_classifier = SVC(kernel='linear')

# Set up the k-fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Perform cross-validation and collect accuracy scores
cv_scores = cross_val_score(svm_classifier, X, y, cv=kf, scoring='accuracy')

# Print cross-validation scores and mean accuracy
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation accuracy: {cv_scores.mean()}")

In [None]:
# ----------------------------------------------
# Step 14: Train and Evaluate the Classifier with Detailed Metrics
# ----------------------------------------------
# Initialize lists to hold the results
accuracy_list = []
conf_matrix_list = []
class_report_list = []

# Perform manual k-fold cross-validation
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Train the SVM classifier
    svm_classifier.fit(X_train, y_train)
    
    # Predict on the test data
    y_pred = svm_classifier.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy)
    
    # Calculate confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    conf_matrix_list.append(conf_matrix)
    
    # Calculate classification report
    class_report = classification_report(y_test, y_pred, output_dict=True)
    class_report_list.append(class_report)

# Print the results
print(f"Mean cross-validation accuracy: {np.mean(accuracy_list)}")
print("Confusion Matrices for each fold:")
for i, cm in enumerate(conf_matrix_list):
    print(f"Fold {i+1}:\n{cm}")
print("Classification Reports for each fold:")
for i, cr in enumerate(class_report_list):
    print(f"Fold {i+1}:\n{pd.DataFrame(cr).transpose()}")

In [None]:
# ------------------------------------------------------------
# Step 15: Perform PCA with 3 Components and Evaluate Accuracy
# ------------------------------------------------------------
from sklearn.decomposition import PCA
# import plotly.express as px
# import plotly.graph_objects as go

# Reduce the data to 3D using PCA
pca_3d = PCA(n_components=3)
X_3d = pca_3d.fit_transform(X)

# Train the SVM model on the 3D PCA-reduced data
svm_classifier_3d = SVC(kernel='linear')
svm_classifier_3d.fit(X_3d, y)

# Evaluate the classifier with 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores_3d = cross_val_score(svm_classifier_3d, X_3d, y, cv=kf, scoring='accuracy')

# Print cross-validation scores and mean accuracy
print(f"Cross-validation scores (3D PCA): {cv_scores_3d}")
print(f"Mean cross-validation accuracy (3D PCA): {cv_scores_3d.mean()}")

In [None]:
# ----------------------------------------------
# Step 15: Visualizing the SVM Decision Boundary
# ----------------------------------------------
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import numpy as np

# Reduce the data to 2D using PCA
pca_2d = PCA(n_components=2)
X_2d = pca_2d.fit_transform(X)

# Train the SVM model with initialized parameters
svm_classifier_initialized = SVC(kernel='linear')  # Using 'linear' kernel as in Step 13
svm_classifier_initialized.fit(X_2d, y)

# Plot the decision boundary
h = .02  # Step size in the mesh
x_min, x_max = X_2d[:, 0].min() - 1, X_2d[:, 0].max() + 1
y_min, y_max = X_2d[:, 1].min() - 1, X_2d[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = svm_classifier_initialized.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y, edgecolors='k', marker='o')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.title('SVM Decision Boundary with PCA Reduced Data (Initialized Parameters)')
plt.show()

In [None]:
# ----------------------------------------------
# Step 16: Hyperparameter Tuning with GridSearchCV (CPU-based)
# ----------------------------------------------
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd

# Define a parameter grid for SVM
param_grid_cpu = {
    'kernel': ['linear', 'rbf'],
    'C': [0.01, 0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1]
}

# Set up the SVM classifier
svm_classifier_cpu = SVC()

# Set up RandomizedSearchCV with n_jobs=-1 to use all available cores and verbose=2 for detailed output
random_search_cpu = RandomizedSearchCV(svm_classifier_cpu, param_distributions=param_grid_cpu, n_iter=20, cv=5, scoring='accuracy', n_jobs=-1, random_state=42, verbose=2)

try:
    # Perform the random search
    random_search_cpu.fit(X, y)
except KeyboardInterrupt:
    print("Process interrupted. Printing current results...")

# Get the best parameters and the best score
best_params_cpu = random_search_cpu.best_params_ if hasattr(random_search_cpu, 'best_params_') else None
best_score_cpu = random_search_cpu.best_score_ if hasattr(random_search_cpu, 'best_score_') else None

print(f"Best parameters (CPU): {best_params_cpu}")
print(f"Best cross-validation accuracy (CPU): {best_score_cpu}")

# Save the search results
if hasattr(random_search_cpu, 'cv_results_'):
    cv_results_cpu_df = pd.DataFrame(random_search_cpu.cv_results_)
    cv_results_cpu_df.to_csv('hyperparameter_tuning_results_cpu.csv', index=False)

    # Define the minimum expected accuracy
    min_expected_accuracy = 0.80

    # Filter the results
    filtered_results = cv_results_cpu_df[cv_results_cpu_df['mean_test_score'] >= min_expected_accuracy]

    print(f"Filtered results with accuracy >= {min_expected_accuracy * 100}%:")
    print(filtered_results)


In [None]:
# # ----------------------------------------------
# # Step 17: Evaluate the Best Model with Detailed Metrics
# # ----------------------------------------------
# # Train the best model
# best_svm_classifier = grid_search.best_estimator_
# kf = KFold(n_splits=5, shuffle=True, random_state=42)

# # Initialize lists to hold the results
# accuracy_list = []
# conf_matrix_list = []
# class_report_list = []

# # Perform manual k-fold cross-validation
# for train_index, test_index in kf.split(X):
#     X_train, X_test = X.iloc[train_index], X.iloc[test_index]
#     y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
#     # Train the SVM classifier
#     best_svm_classifier.fit(X_train, y_train)
    
#     # Predict on the test data
#     y_pred = best_svm_classifier.predict(X_test)
    
#     # Calculate accuracy
#     accuracy = accuracy_score(y_test, y_pred)
#     accuracy_list.append(accuracy)
    
#     # Calculate confusion matrix
#     conf_matrix = confusion_matrix(y_test, y_pred)
#     conf_matrix_list.append(conf_matrix)
    
#     # Calculate classification report
#     class_report = classification_report(y_test, y_pred, output_dict=True)
#     class_report_list.append(class_report)

# # Print the results
# print(f"Mean cross-validation accuracy: {np.mean(accuracy_list)}")
# print("Confusion Matrices for each fold:")
# for i, cm in enumerate(conf_matrix_list):
#     print(f"Fold {i+1}:\n{cm}")
# print("Classification Reports for each fold:")
# for i, cr in enumerate(class_report_list):
#     print(f"Fold {i+1}:\n{pd.DataFrame(cr).transpose()}")
