<a href="https://colab.research.google.com/github/A00785001/TC5035/blob/main/00_ROS_Bag_Data_Extractor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ROS Bag Data Extractor
Extract data from ROS bags

## <font color='#2E86AB'>▼ Initialization and Setup</font>

In [1]:
# Install required packages
!pip install --quiet rosbags pandas tqdm

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/137.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.9/137.9 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.3/1.3 MB[0m [31m51.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.9/119.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m753.1/753.1 kB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Import libraries
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import defaultdict

print("Libraries loaded successfully!")

Libraries loaded successfully!


In [3]:
from rosbags.rosbag1 import Reader
from rosbags.typesys import Stores, get_typestore
import os

print("Libraries loaded successfully!")

Libraries loaded successfully!


In [4]:
# Initialize typestore for ROS1 message deserialization
typestore = get_typestore(Stores.ROS1_NOETIC)
print("Typestore initialized for ROS1")

Typestore initialized for ROS1


## <font color='#2E86AB'>▼ Storage Mounting</font>

In [5]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## <font color='#2E86AB'>▼ ROS Bag Load</font>

In [6]:
# Specify the path to the sessions folder
data_path = "/content/drive/MyDrive/DATA/Artificial_Intelligence/MNA-V/Subjects/TC5035-Proyecto_Integrador/TC5035.data/jetbot/"

In [7]:
# Specify the session
session = '20251016_133216'
print(f"Using session: {session}")

Using session: 20251016_133216


In [8]:
working_folder = data_path + 'session_' + session
bag_name = 'session_data.bag'

# Change to the specified subfolder
os.chdir(working_folder)
print(f"Changed directory to: {os.getcwd()}")

Changed directory to: /content/drive/MyDrive/DATA/Artificial_Intelligence/MNA-V/Subjects/TC5035-Proyecto_Integrador/TC5035.data/jetbot/session_20251016_133216


In [9]:
bag_file = working_folder + '/' + bag_name
print(f"Bag file name: {bag_name}")

# Set bag_path for the rest of the notebook
bag_path = bag_file

Bag file name: session_data.bag


## <font color='#2E86AB'>▼ ROS Bag Basic EDA</font>

### 1. Basic Bag Information

In [10]:
# Open the bag and get basic information
print("=" * 60)
print("BASIC BAG INFORMATION")
print("=" * 60)

with Reader(bag_path) as reader:
    # Get basic statistics
    duration = reader.duration * 1e-9  # Convert to seconds
    duration_sec = duration
    total_messages = reader.message_count

    # Calculate average message rate
    avg_message_rate = total_messages / duration if duration > 0 else 0

    print(f"\n📁 File: {bag_name}")
    print(f"⏱️  Duration: {duration:.2f} seconds ({duration/60:.2f} minutes)")
    print(f"📬 Total Messages: {total_messages:,}")
    print(f"📊 Average Rate: {avg_message_rate:.2f} msg/sec")

BASIC BAG INFORMATION

📁 File: session_data.bag
⏱️  Duration: 463.17 seconds (7.72 minutes)
📬 Total Messages: 98,205
📊 Average Rate: 212.03 msg/sec


### 2. Topic Analysis

In [11]:
# Analyze topics
print("=" * 60)
print("TOPIC ANALYSIS")
print("=" * 60)

with Reader(bag_path) as reader:
    # Get topics and their details
    topics_data = []
    for connection in reader.connections:
        topics_data.append({
            'Topic': connection.topic,
            'Message Type': connection.msgtype,
            'Count': connection.msgcount
        })

    df_topics = pd.DataFrame(topics_data)
    df_topics = df_topics.sort_values('Count', ascending=False)

    # Calculate frequency
    df_topics['Frequency (Hz)'] = df_topics['Count'] / duration_sec

    print(f"\n📊 Total Topics: {len(df_topics)}")
    print(f"\n{df_topics.to_string(index=False)}")

    # Save to CSV
    df_topics.to_csv('topic_statistics.csv', index=False)
    print(f"\n✅ Topic statistics saved to: topic_statistics.csv")

    unique_topics = df_topics['Topic'].tolist()

TOPIC ANALYSIS

📊 Total Topics: 13

                          Topic                         Message Type  Count  Frequency (Hz)
                            /tf               tf2_msgs/msg/TFMessage  20459       44.172132
                            /tf               tf2_msgs/msg/TFMessage  17614       38.029617
                            /tf               tf2_msgs/msg/TFMessage  16294       35.179663
                       /cmd_vel              geometry_msgs/msg/Twist  11418       24.652104
                           /imu                  sensor_msgs/msg/Imu   9553       20.625464
                            /tf               tf2_msgs/msg/TFMessage   8928       19.276054
         /csi_cam_0/camera_info           sensor_msgs/msg/CameraInfo   7785       16.808253
                          /odom                nav_msgs/msg/Odometry   4013        8.664293
                   /submap_list cartographer_ros_msgs/msg/SubmapList   1207        2.605981
          /trajectory_node_list   visualizat

### 3. Message Frequency Distribution

In [12]:
# Analyze message frequency distribution
print("=" * 60)
print("MESSAGE FREQUENCY DISTRIBUTION")
print("=" * 60)

with Reader(bag_path) as reader:
    topic_timestamps = defaultdict(list)

    print("\nCollecting timestamps...")
    for connection, timestamp, rawdata in tqdm(reader.messages(), total=reader.message_count):
        topic = connection.topic
        topic_timestamps[topic].append(timestamp * 1e-9)  # Convert to seconds

    # Calculate inter-message intervals
    print("\n📈 Inter-Message Interval Statistics:")
    print(f"{'Topic':<40} {'Mean (ms)':<12} {'Std (ms)':<12} {'Min (ms)':<12} {'Max (ms)':<12}")
    print("-" * 88)

    for topic, timestamps in sorted(topic_timestamps.items()):
        if len(timestamps) > 1:
            intervals = np.diff(timestamps) * 1000  # Convert to milliseconds
            print(f"{topic:<40} {np.mean(intervals):<12.2f} {np.std(intervals):<12.2f} {np.min(intervals):<12.2f} {np.max(intervals):<12.2f}")

MESSAGE FREQUENCY DISTRIBUTION

Collecting timestamps...


100%|██████████| 98205/98205 [00:01<00:00, 72125.76it/s]


📈 Inter-Message Interval Statistics:
Topic                                    Mean (ms)    Std (ms)     Min (ms)     Max (ms)    
----------------------------------------------------------------------------------------
/cmd_vel                                 36.54        288.22       0.00         25980.30    
/constraint_list                         2766.79      10945.50     0.02         108550.76   
/csi_cam_0/camera_info                   59.50        500.15       0.00         38755.33    
/csi_cam_0/image_raw/compressed          4480.40      3405.35      0.23         30054.78    
/imu                                     46.95        447.40       0.00         40874.90    
/odom                                    111.79       1323.80      0.00         82738.43    
/scan                                    1427.87      3374.64      0.02         53541.24    
/submap_list                             371.55       1258.17      0.00         29235.69    
/tf                                 




### 4. Summary Report

In [13]:
# Generate summary report
print("=" * 60)
print("ROS BAG EDA - SUMMARY REPORT")
print("=" * 60)

summary_report = f"""
📊 SUMMARY STATISTICS
{'='*60}

FILE INFORMATION:
  • File: {bag_name}
  • Duration: {duration_sec:.2f} seconds ({duration_sec/60:.2f} minutes)

MESSAGE STATISTICS:
  • Total Messages: {total_messages:,}
  • Unique Topics: {len(unique_topics)}
  • Message Types: {df_topics['Message Type'].nunique()}
  • Average Rate: {avg_message_rate:.2f} msg/sec
"""

print(summary_report)

# Save report
with open('rosbag_eda_summary.txt', 'w') as f:
    f.write(summary_report)

ROS BAG EDA - SUMMARY REPORT

📊 SUMMARY STATISTICS

FILE INFORMATION:
  • File: session_data.bag
  • Duration: 463.17 seconds (7.72 minutes)

MESSAGE STATISTICS:
  • Total Messages: 98,205
  • Unique Topics: 13
  • Message Types: 9
  • Average Rate: 212.03 msg/sec

