<a href="https://colab.research.google.com/github/A00785001/TC5035/blob/main/00_ROS_Bag_Data_Extractor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ROS Bag Data Extractor
Extract data from ROS bags

## Extract Images from ROS Bag


In [1]:
# Install required packages
!pip install --quiet rosbags opencv-python pillow numpy matplotlib tqdm seaborn pandas


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/137.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.9/137.9 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.3/1.3 MB[0m [31m58.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.9/119.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m753.1/753.1 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
# Import libraries

import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import json
import csv
from datetime import datetime
from tqdm import tqdm
from collections import defaultdict

print("Libraries loaded successfully!")


Libraries loaded successfully!


In [3]:
from rosbags.rosbag1 import Reader
from rosbags.typesys import Stores, get_typestore
import os
from pathlib import Path
print("Libraries loaded successfully!")

Libraries loaded successfully!


In [5]:
# Initialize typestore for ROS1 message deserialization
typestore = get_typestore(Stores.ROS1_NOETIC)
print("Typestore initialized for ROS1")


Typestore initialized for ROS1


In [6]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [7]:
# Specify the path to the sessions folder
data_path = "/content/drive/MyDrive/DATA/Artificial_Intelligence/MNA-V/Subjects/TC5035-Proyecto_Integrador/TC5035.data/jetbot/"

In [10]:
# Specify the session
session = '20251016_133216'
print(f"Using session: {session}")

Using session: 20251016_133216


In [11]:

working_folder = data_path + 'session_' + session
bag_name = 'session_data.bag'

# Change to the specified subfolder
os.chdir(working_folder)
print(f"Changed directory to: {os.getcwd()}")


Changed directory to: /content/drive/MyDrive/DATA/Artificial_Intelligence/MNA-V/Subjects/TC5035-Proyecto_Integrador/TC5035.data/jetbot/session_20251016_133216


In [12]:
bag_file = working_folder + '/' + bag_name
print(f"Bag file name: {bag_name}")

# Set bag_path for the rest of the notebook
bag_path = bag_file

Bag file name: session_data.bag


In [13]:
# Read the bag file and list topics
print(f"Opening bag file: {bag_file}")
bag_path = Path(bag_file)

with Reader(bag_path) as reader:
    # Get bag info
    print(f"\nBag duration: {reader.duration / 1e9:.2f} seconds")
    print(f"Start time: {reader.start_time / 1e9:.2f}")
    print(f"Message count: {reader.message_count}")

    # List all topics
    print(f"\nTopics in bag:")
    for connection in reader.connections:
        print(f"  {connection.topic} ({connection.msgtype})")

print(f"\n✓ Bag file opened successfully")

Opening bag file: /content/drive/MyDrive/DATA/Artificial_Intelligence/MNA-V/Subjects/TC5035-Proyecto_Integrador/TC5035.data/jetbot/session_20251016_133216/session_data.bag

Bag duration: 463.17 seconds
Start time: 1760649872.66
Message count: 98205

Topics in bag:
  /csi_cam_0/camera_info (sensor_msgs/msg/CameraInfo)
  /constraint_list (visualization_msgs/msg/MarkerArray)
  /csi_cam_0/image_raw/compressed (sensor_msgs/msg/CompressedImage)
  /imu (sensor_msgs/msg/Imu)
  /odom (nav_msgs/msg/Odometry)
  /scan (sensor_msgs/msg/LaserScan)
  /tf (tf2_msgs/msg/TFMessage)
  /tf (tf2_msgs/msg/TFMessage)
  /tf (tf2_msgs/msg/TFMessage)
  /tf (tf2_msgs/msg/TFMessage)
  /submap_list (cartographer_ros_msgs/msg/SubmapList)
  /trajectory_node_list (visualization_msgs/msg/MarkerArray)
  /cmd_vel (geometry_msgs/msg/Twist)

✓ Bag file opened successfully


### 1. Bag Metadata & General Information

In [14]:
# Collect comprehensive bag metadata
print("=" * 60)
print("ROS BAG METADATA")
print("=" * 60)

with Reader(bag_path) as reader:
    # File information
    file_size_mb = os.path.getsize(bag_path) / (1024 * 1024)
    print(f"\n📁 File Information:")
    print(f"   File path: {bag_path}")
    print(f"   File size: {file_size_mb:.2f} MB")

    # Temporal information
    duration_sec = reader.duration / 1e9
    start_time = reader.start_time / 1e9
    end_time = reader.end_time / 1e9
    start_dt = datetime.fromtimestamp(start_time)
    end_dt = datetime.fromtimestamp(end_time)

    print(f"\n⏱️  Temporal Information:")
    print(f"   Duration: {duration_sec:.2f} seconds ({duration_sec/60:.2f} minutes)")
    print(f"   Start time: {start_dt.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]}")
    print(f"   End time: {end_dt.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]}")
    print(f"   Start timestamp: {start_time:.2f}")
    print(f"   End timestamp: {end_time:.2f}")

    # Message statistics
    total_messages = reader.message_count
    avg_message_rate = total_messages / duration_sec if duration_sec > 0 else 0

    print(f"\n📊 Message Statistics:")
    print(f"   Total messages: {total_messages:,}")
    print(f"   Average message rate: {avg_message_rate:.2f} msg/sec")
    print(f"   Average data rate: {file_size_mb / (duration_sec/60):.2f} MB/min")

    # Topic count
    unique_topics = set(conn.topic for conn in reader.connections)
    print(f"   Unique topics: {len(unique_topics)}")
    print(f"   Connections: {len(list(reader.connections))}")

ROS BAG METADATA

📁 File Information:
   File path: /content/drive/MyDrive/DATA/Artificial_Intelligence/MNA-V/Subjects/TC5035-Proyecto_Integrador/TC5035.data/jetbot/session_20251016_133216/session_data.bag
   File size: 34.35 MB

⏱️  Temporal Information:
   Duration: 463.17 seconds (7.72 minutes)
   Start time: 2025-10-16 21:24:32.661
   End time: 2025-10-16 21:32:15.826
   Start timestamp: 1760649872.66
   End timestamp: 1760650335.83

📊 Message Statistics:
   Total messages: 98,205
   Average message rate: 212.03 msg/sec
   Average data rate: 4.45 MB/min
   Unique topics: 10
   Connections: 13


### 2. Topic Analysis with Message Counts & Frequencies

In [15]:
# Analyze topics with detailed statistics
print("=" * 60)
print("TOPIC ANALYSIS")
print("=" * 60)

with Reader(bag_path) as reader:
    duration_sec = reader.duration / 1e9

    # Collect topic information
    topic_stats = defaultdict(lambda: {'count': 0, 'type': '', 'first_ts': None, 'last_ts': None})

    print("\nCounting messages per topic...")
    for connection, timestamp, rawdata in tqdm(reader.messages(), total=reader.message_count):
        topic = connection.topic
        topic_stats[topic]['count'] += 1
        topic_stats[topic]['type'] = connection.msgtype

        if topic_stats[topic]['first_ts'] is None:
            topic_stats[topic]['first_ts'] = timestamp
        topic_stats[topic]['last_ts'] = timestamp

    # Create DataFrame for better visualization
    topic_data = []
    for topic, stats in topic_stats.items():
        topic_duration = (stats['last_ts'] - stats['first_ts']) / 1e9
        frequency = stats['count'] / topic_duration if topic_duration > 0 else 0
        percentage = (stats['count'] / reader.message_count) * 100

        topic_data.append({
            'Topic': topic,
            'Message Type': stats['type'],
            'Count': stats['count'],
            'Frequency (Hz)': frequency,
            'Percentage (%)': percentage,
            'Duration (s)': topic_duration
        })

    df_topics = pd.DataFrame(topic_data)
    df_topics = df_topics.sort_values('Count', ascending=False)

    print("\n📋 Topic Summary:")
    print(df_topics.to_string(index=False))

    # Save to CSV for later use
    df_topics.to_csv('topic_statistics.csv', index=False)


TOPIC ANALYSIS

Counting messages per topic...


100%|██████████| 98205/98205 [00:01<00:00, 64781.96it/s]



📋 Topic Summary:
                          Topic                         Message Type  Count  Frequency (Hz)  Percentage (%)  Duration (s)
                            /tf               tf2_msgs/msg/TFMessage  63295      136.694223       64.451912    463.040784
                       /cmd_vel              geometry_msgs/msg/Twist  11418       27.370702       11.626699    417.161394
                           /imu                  sensor_msgs/msg/Imu   9553       21.299759        9.727611    448.502734
         /csi_cam_0/camera_info           sensor_msgs/msg/CameraInfo   7785       16.808253        7.927295    463.165330
                          /odom                nav_msgs/msg/Odometry   4013        8.947509        4.086350    448.504693
                   /submap_list cartographer_ros_msgs/msg/SubmapList   1207        2.693680        1.229062    448.085825
          /trajectory_node_list   visualization_msgs/msg/MarkerArray    410        0.923788        0.417494    443.825011
      