<a href="https://colab.research.google.com/github/Raswanth-Prasath/NGSIM-Driving-Behavior-Analysis/blob/main/NGSIM_Driving_Behavior_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from scipy import stats

In [4]:
# Define column names for the data files
column_names = ['Vehicle ID', 'Frame ID', 'Lane ID', 'LocalY', 'Mean Speed', 'Mean Acceleration', 'Vehicle length', 'Vehicle Class ID', 'Follower ID', 'Leader ID']  # replace with actual column names
moto_column_names = ['Vehicle ID', 'Frame ID', 'Lane ID', 'LocalY', 'Mean Speed', 'Mean Acceleration', 'Vehicle length', 'Vehicle Class ID']  # replace with actual column names

# Read DATA.txt (adjust delimiter if needed)
data = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt", delimiter=r"\s+", header=None, names=column_names)  # \s+ for multiple spaces
motorcycles = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\MOTORCYCLES.txt", delimiter=r"\s+", header=None, names=moto_column_names)

In [5]:
data.to_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA.csv", index=False)
motorcycles.to_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\MOTORCYCLES.csv", index=False)

In [24]:
# Add missing columns to motorcycles (Leader ID, Follower ID)
motorcycles["Follower ID"] = -1
motorcycles["Leader ID"] = -1

# Combine datasets
combined = pd.concat([data, motorcycles], ignore_index=True)

In [23]:
import pandas as pd

# Ensure data is sorted by Vehicle ID and Frame ID
combined = combined.sort_values(by=['Vehicle ID', 'Frame ID'])

# Create a helper function to validate segments
def validate_segment(follower_data, leader_id, start_frame, end_frame, lane_id, combined):
    """
    Check if a segment meets all criteria:
    - Duration ≥20 seconds (≥200 frames)
    - Leader exists for all frames in the segment
    - Leader and follower stay in the same lane
    """
    # Calculate duration (0.1 seconds per frame)
    num_frames = end_frame - start_frame + 1
    if num_frames < 200:
        return False
    
    # Get leader's data for these frames
    leader_data = combined[
        (combined['Vehicle ID'] == leader_id) &
        (combined['Frame ID'].between(start_frame, end_frame))
    ]
    
    # Check 1: Leader has data for all frames in the segment
    leader_frames = set(leader_data['Frame ID'].unique())
    required_frames = set(range(start_frame, end_frame + 1))
    
    if leader_frames != required_frames:
        return False
    
    # Check 2: Leader and follower stayed in the same lane
    if (leader_data['Lane ID'] != lane_id).any():
        return False
    
    return True

# Main analysis
valid_pairs = []

# Group by follower vehicles
for follower_id, follower_data in combined.groupby('Vehicle ID'):
    current_leader = None
    start_frame = None
    current_lane = None
    
    for _, row in follower_data.iterrows():
        # Skip rows with no leader
        if row['Leader ID'] == -1:
            if current_leader is not None:
                # End of a potential segment
                if validate_segment(follower_data, current_leader, start_frame, row['Frame ID'] - 1, current_lane, combined):
                    valid_pairs.append({
                        'follower_id': follower_id,
                        'leader_id': current_leader,
                        'start_frame': start_frame,
                        'end_frame': row['Frame ID'] - 1,
                        'lane_id': current_lane,
                        'duration_sec': (row['Frame ID'] - start_frame) * 0.1
                    })
            current_leader = None
            start_frame = None
            current_lane = None
            continue
        
        # Initialize or continue tracking
        if row['Leader ID'] != current_leader or row['Lane ID'] != current_lane:
            if current_leader is not None:
                # Check previous segment
                if validate_segment(follower_data, current_leader, start_frame, row['Frame ID'] - 1, current_lane, combined):
                    valid_pairs.append({
                        'follower_id': follower_id,
                        'leader_id': current_leader,
                        'start_frame': start_frame,
                        'end_frame': row['Frame ID'] - 1,
                        'lane_id': current_lane,
                        'duration_sec': (row['Frame ID'] - start_frame) * 0.1
                    })
            
            # Start new segment
            current_leader = row['Leader ID']
            start_frame = row['Frame ID']
            current_lane = row['Lane ID']
        else:
            # Continue existing segment
            pass

# Convert results to DataFrame
valid_pairs_df = pd.DataFrame(valid_pairs)

# Filter duplicates (same pair in overlapping time windows)
valid_pairs_df = valid_pairs_df.drop_duplicates(
    subset=['follower_id', 'leader_id', 'lane_id'], 
    keep='first'
)

# Save results
valid_pairs_df.to_csv('valid_car_following_pairs.csv', index=False)
print(f"Found {len(valid_pairs_df)} valid car-following pairs")

TypeError: 'numpy.float64' object cannot be interpreted as an integer

In [2]:
%pip install tqdm

import pandas as pd
import numpy as np
from dataclasses import dataclass
from typing import List, Tuple, Dict, Generator, Optional
import matplotlib.pyplot as plt
from tqdm import tqdm  # For progress bars
import gc  # For garbage collection
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class TimeWindow:
    """Represents a time window for analysis to manage memory usage"""
    start_frame: int
    end_frame: int
    
    @property
    def duration(self) -> float:
        return (self.end_frame - self.start_frame) * 0.1  # Convert to seconds

class StreamingDataLoader:
    """
    Handles efficient loading and streaming of large trajectory files
    """
    def __init__(self, filename: str, chunk_size: int = 100000):
        self.filename = filename
        self.chunk_size = chunk_size
        
    def get_chunks(self) -> Generator[pd.DataFrame, None, None]:
        """Yields chunks of data to process"""
        # Define column names based on the data dictionary
        columns = ['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'LocalY', 
                  'Speed', 'Acceleration', 'Vehicle_Length', 
                  'Vehicle_Class', 'Follower_ID', 'Leader_ID']
        
        for chunk in pd.read_csv(self.filename, 
                               delimiter='\s+',
                               header=None,
                               names=columns,
                               chunksize=self.chunk_size):
            # Basic data validation
            chunk = chunk[chunk['Vehicle_ID'].notna()]
            chunk = chunk[chunk['Lane_ID'].between(1, 7)]  # Valid lanes
            yield chunk
            
    def get_time_windows(self, window_size: int = 1000) -> Generator[TimeWindow, None, None]:
        """Determine time windows for processing"""
        min_frame = float('inf')
        max_frame = -float('inf')
        
        # Find overall time range from first chunk
        first_chunk = next(self.get_chunks())
        min_frame = min(min_frame, first_chunk['Frame_ID'].min())
        max_frame = max(max_frame, first_chunk['Frame_ID'].max())
        
        # Generate windows
        current_frame = min_frame
        while current_frame < max_frame:
            yield TimeWindow(
                start_frame=int(current_frame),
                end_frame=int(min(current_frame + window_size, max_frame))
            )
            current_frame += window_size

class CarFollowingPair:
    """Represents a validated car-following pair"""
    def __init__(self, leader_id: int, follower_id: int, 
                 start_frame: int, end_frame: int, lane_id: int):
        self.leader_id = leader_id
        self.follower_id = follower_id
        self.start_frame = start_frame
        self.end_frame = end_frame
        self.lane_id = lane_id
        self.metrics = {}  # Store computed metrics
        
    @property
    def duration(self) -> float:
        return (self.end_frame - self.start_frame) * 0.1
    
    def compute_metrics(self, leader_data: pd.DataFrame, follower_data: pd.DataFrame) -> None:
        """Computes and stores various car-following metrics"""
        # Ensure data is sorted by frame
        leader_data = leader_data.sort_values('Frame_ID')
        follower_data = follower_data.sort_values('Frame_ID')
        
        # Compute following distance statistics
        distances = leader_data['LocalY'].values - follower_data['LocalY'].values
        self.metrics.update({
            'mean_distance': np.mean(distances),
            'std_distance': np.std(distances),
            'min_distance': np.min(distances),
            'max_distance': np.max(distances)
        })
        
        # Compute relative speed statistics
        rel_speeds = leader_data['Speed'].values - follower_data['Speed'].values
        self.metrics.update({
            'mean_rel_speed': np.mean(rel_speeds),
            'std_rel_speed': np.std(rel_speeds)
        })
        
        # Compute time headway statistics
        time_headways = distances / follower_data['Speed'].values
        valid_headways = time_headways[~np.isnan(time_headways)]
        self.metrics.update({
            'mean_time_headway': np.mean(valid_headways),
            'std_time_headway': np.std(valid_headways)
        })

class LargeScaleCarFollowingAnalyzer:
    """
    Analyzes car-following behavior in large datasets using windowed processing
    """
    def __init__(self, min_duration: float = 20.0, 
                 min_distance: float = 2.0,
                 max_distance: float = 100.0):
        self.min_duration = min_duration
        self.min_frames = int(min_duration * 10)
        self.min_distance = min_distance
        self.max_distance = max_distance
        self.pairs: List[CarFollowingPair] = []
        
    def process_window(self, data: pd.DataFrame, window: TimeWindow) -> List[CarFollowingPair]:
        """
        Processes a single time window to identify car-following pairs
        """
        window_pairs = []
        
        # Group by lane
        for lane_id, lane_data in data.groupby('Lane_ID'):
            # Get unique vehicles in this lane during this window
            vehicles = lane_data['Vehicle_ID'].unique()
            
            # Check each potential pair
            for i, v1 in enumerate(vehicles[:-1]):
                for v2 in vehicles[i+1:]:
                    pair = self._analyze_vehicle_pair(
                        lane_data[lane_data['Vehicle_ID'] == v1],
                        lane_data[lane_data['Vehicle_ID'] == v2],
                        lane_id
                    )
                    if pair is not None:
                        window_pairs.append(pair)
        
        return window_pairs

    def _analyze_vehicle_pair(self, v1_data: pd.DataFrame, 
                            v2_data: pd.DataFrame, 
                            lane_id: int) -> Optional[CarFollowingPair]:
        """
        Analyzes a potential car-following pair
        """
        # Find overlapping frames
        common_frames = sorted(set(v1_data['Frame_ID']).intersection(set(v2_data['Frame_ID'])))
        
        if len(common_frames) < self.min_frames:
            return None
            
        # Get positions at common frames
        v1_positions = v1_data.set_index('Frame_ID')['LocalY']
        v2_positions = v2_data.set_index('Frame_ID')['LocalY']
        
        # Determine consistent leader/follower relationship
        leader_id = None
        start_frame = None
        consecutive_frames = 0
        
        for frame in common_frames:
            pos1 = v1_positions[frame]
            pos2 = v2_positions[frame]
            
            current_leader = v1_data['Vehicle_ID'].iloc[0] if pos1 > pos2 else v2_data['Vehicle_ID'].iloc[0]
            
            if leader_id is None:
                leader_id = current_leader
                start_frame = frame
                consecutive_frames = 1
            elif leader_id == current_leader:
                consecutive_frames += 1
            else:
                leader_id = current_leader
                start_frame = frame
                consecutive_frames = 1
                
            # Check if we have a valid pair
            if consecutive_frames >= self.min_frames:
                follower_id = v2_data['Vehicle_ID'].iloc[0] if leader_id == v1_data['Vehicle_ID'].iloc[0] else v1_data['Vehicle_ID'].iloc[0]
                
                pair = CarFollowingPair(
                    leader_id=leader_id,
                    follower_id=follower_id,
                    start_frame=start_frame,
                    end_frame=frame,
                    lane_id=lane_id
                )
                
                # Compute metrics for validation
                leader_data = v1_data if leader_id == v1_data['Vehicle_ID'].iloc[0] else v2_data
                follower_data = v2_data if follower_id == v2_data['Vehicle_ID'].iloc[0] else v1_data
                
                pair.compute_metrics(
                    leader_data[(leader_data['Frame_ID'] >= start_frame) & 
                              (leader_data['Frame_ID'] <= frame)],
                    follower_data[(follower_data['Frame_ID'] >= start_frame) & 
                                (follower_data['Frame_ID'] <= frame)]
                )
                
                # Validate distances
                if (pair.metrics['min_distance'] >= self.min_distance and 
                    pair.metrics['max_distance'] <= self.max_distance):
                    return pair
                
        return None

    def analyze_file(self, filename: str) -> List[CarFollowingPair]:
        """
        Main method to analyze a large trajectory file
        """
        loader = StreamingDataLoader(filename)
        
        logger.info("Starting analysis...")
        for window in tqdm(loader.get_time_windows()):
            # Load data for current window
            window_data = pd.concat([
                chunk[(chunk['Frame_ID'] >= window.start_frame) & 
                     (chunk['Frame_ID'] <= window.end_frame)]
                for chunk in loader.get_chunks()
            ])
            
            # Process window
            window_pairs = self.process_window(window_data, window)
            self.pairs.extend(window_pairs)
            
            # Clean up
            del window_data
            gc.collect()
            
        logger.info(f"Analysis complete. Found {len(self.pairs)} car-following pairs.")
        return self.pairs

def main():
    # Initialize analyzer
    analyzer = LargeScaleCarFollowingAnalyzer(
        min_duration=20.0,
        min_distance=2.0,
        max_distance=100.0
    )
    
    # Process the large data file
    pairs = analyzer.analyze_file("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt")
    
    # Print summary statistics
    print(f"\nAnalysis Summary:")
    print(f"Total pairs found: {len(pairs)}")
    
    # Compute aggregate statistics
    durations = [pair.duration for pair in pairs]
    mean_distances = [pair.metrics['mean_distance'] for pair in pairs]
    mean_headways = [pair.metrics['mean_time_headway'] for pair in pairs]
    
    print(f"\nDuration Statistics (seconds):")
    print(f"Mean: {np.mean(durations):.1f}")
    print(f"Median: {np.median(durations):.1f}")
    print(f"Min: {np.min(durations):.1f}")
    print(f"Max: {np.max(durations):.1f}")
    
    print(f"\nFollowing Distance Statistics (meters):")
    print(f"Mean: {np.mean(mean_distances):.1f}")
    print(f"Median: {np.median(mean_distances):.1f}")
    print(f"Min: {np.min(mean_distances):.1f}")
    print(f"Max: {np.max(mean_distances):.1f}")
    
    print(f"\nTime Headway Statistics (seconds):")
    print(f"Mean: {np.mean(mean_headways):.1f}")
    print(f"Median: {np.median(mean_headways):.1f}")
    print(f"Min: {np.min(mean_headways):.1f}")
    print(f"Max: {np.max(mean_headways):.1f}")

if __name__ == "__main__":
    main()

INFO:__main__:Starting analysis...


Collecting tqdm
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.


  time_headways = distances / follower_data['Speed'].values
  x = asanyarray(arr - arrmean)
2it [03:08, 94.38s/it]
INFO:__main__:Analysis complete. Found 999 car-following pairs.



Analysis Summary:
Total pairs found: 999

Duration Statistics (seconds):
Mean: 19.9
Median: 19.9
Min: 19.9
Max: 19.9

Following Distance Statistics (meters):
Mean: 44.5
Median: 42.8
Min: 8.2
Max: 95.1

Time Headway Statistics (seconds):
Mean: inf
Median: 6.8
Min: 1.2
Max: inf


In [12]:
import pandas as pd
import numpy as np
from dataclasses import dataclass
from typing import List, Optional, Dict, Tuple

@dataclass
class CarFollowingPair:
    """
    Represents a validated car-following pair with all relevant information
    """
    leader_id: int
    follower_id: int
    start_frame: int
    end_frame: int
    lane_id: int
    metrics: Dict = None

    @property
    def duration(self) -> float:
        """Duration of car-following episode in seconds"""
        return (self.end_frame - self.start_frame + 1) * 0.1  # Convert frames to seconds

class CarFollowingIdentifier:
    """
    Identifies and validates car-following pairs in trajectory data
    """
    def __init__(self, min_duration: float = 20.0,
                 min_spacing: float = 2.0,
                 max_spacing: float = 100.0):
        """
        Initialize with validation criteria
        
        Parameters:
        min_duration: Minimum duration in seconds for valid car-following
        min_spacing: Minimum allowed spacing between vehicles (meters)
        max_spacing: Maximum allowed spacing between vehicles (meters)
        """
        self.min_duration = min_duration
        self.min_frames = int(min_duration * 10)  # Convert to frames (0.1s intervals)
        self.min_spacing = min_spacing
        self.max_spacing = max_spacing
        
    def identify_pairs(self, df: pd.DataFrame) -> List[CarFollowingPair]:
        """
        Main method to identify valid car-following pairs
        
        Parameters:
        df: DataFrame with columns for Vehicle_ID, Frame_ID, Lane_ID, LocalY
        
        Returns:
        List of validated CarFollowingPair objects
        """
        valid_pairs = []
        
        # Step 1: Group data by lane
        for lane_id, lane_data in df.groupby('Lane_ID'):
            # Skip special lanes (like merge lanes or shoulders)
            if lane_id > 6:  # Assuming regular lanes are 1-6
                continue
                
            # Step 2: Process each time window in the lane
            frames = sorted(lane_data['Frame_ID'].unique())
            
            # Step 3: For each frame, identify potential pairs
            current_pairs = {}  # Track ongoing pairs
            
            for frame in frames:
                frame_data = lane_data[lane_data['Frame_ID'] == frame]
                
                # Sort vehicles by position to identify leader-follower relationships
                frame_vehicles = frame_data.sort_values('LocalY', ascending=False)
                
                # Step 4: Check each consecutive pair of vehicles
                for i in range(len(frame_vehicles) - 1):
                    leader = frame_vehicles.iloc[i]
                    follower = frame_vehicles.iloc[i + 1]
                    
                    pair_id = (leader['Vehicle_ID'], follower['Vehicle_ID'])
                    
                    # Calculate spacing
                    spacing = leader['LocalY'] - follower['LocalY']
                    
                    # Validate spacing
                    if self.min_spacing <= spacing <= self.max_spacing:
                        if pair_id not in current_pairs:
                            # Start new pair tracking
                            current_pairs[pair_id] = {
                                'start_frame': frame,
                                'current_frame': frame,
                                'lane_id': lane_id
                            }
                        else:
                            # Update existing pair
                            current_pairs[pair_id]['current_frame'] = frame
                    else:
                        # Invalid spacing - end pair if exists
                        self._check_and_add_pair(current_pairs, pair_id, valid_pairs)
            
            # Process any remaining pairs
            for pair_id in list(current_pairs.keys()):
                self._check_and_add_pair(current_pairs, pair_id, valid_pairs)
        
        return valid_pairs
    
    def _check_and_add_pair(self, current_pairs: Dict, 
                           pair_id: Tuple[int, int],
                           valid_pairs: List[CarFollowingPair]) -> None:
        """
        Validates and adds a car-following pair if it meets duration criteria
        """
        if pair_id in current_pairs:
            pair_data = current_pairs[pair_id]
            duration_frames = pair_data['current_frame'] - pair_data['start_frame'] + 1
            
            if duration_frames >= self.min_frames:
                # Create validated pair
                valid_pairs.append(CarFollowingPair(
                    leader_id=pair_id[0],
                    follower_id=pair_id[1],
                    start_frame=pair_data['start_frame'],
                    end_frame=pair_data['current_frame'],
                    lane_id=pair_data['lane_id']
                ))
            
            # Remove pair from tracking
            del current_pairs[pair_id]
    
    def compute_pair_metrics(self, pair: CarFollowingPair, 
                           df: pd.DataFrame) -> Dict:
        """
        Computes detailed metrics for a validated car-following pair
        
        Parameters:
        pair: CarFollowingPair object
        df: Original trajectory DataFrame
        
        Returns:
        Dictionary of computed metrics
        """
        # Get leader and follower trajectories
        leader_data = df[(df['Vehicle_ID'] == pair.leader_id) & 
                        (df['Frame_ID'] >= pair.start_frame) & 
                        (df['Frame_ID'] <= pair.end_frame)]
        
        follower_data = df[(df['Vehicle_ID'] == pair.follower_id) & 
                          (df['Frame_ID'] >= pair.start_frame) & 
                          (df['Frame_ID'] <= pair.end_frame)]
        
        # Compute spacing statistics
        spacing = leader_data['LocalY'].values - follower_data['LocalY'].values
        
        # Compute speed difference statistics
        speed_diff = leader_data['Speed'].values - follower_data['Speed'].values
        
        # Compute time headway
        time_headway = spacing / follower_data['Speed'].values
        valid_headway = time_headway[~np.isinf(time_headway)]
        
        return {
            'spacing': {
                'mean': np.mean(spacing),
                'std': np.std(spacing),
                'min': np.min(spacing),
                'max': np.max(spacing)
            },
            'speed_difference': {
                'mean': np.mean(speed_diff),
                'std': np.std(speed_diff),
                'min': np.min(speed_diff),
                'max': np.max(speed_diff)
            },
            'time_headway': {
                'mean': np.mean(valid_headway),
                'std': np.std(valid_headway),
                'min': np.min(valid_headway),
                'max': np.max(valid_headway)
            }
        }

def main():
    # Example usage
    # Read the data file
    df = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt", delimiter='\s+', header=None,
                     names=['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'LocalY',
                           'Speed', 'Acceleration', 'Vehicle_Length',
                           'Vehicle_Class', 'Follower_ID', 'Leader_ID'])
    
    # Initialize identifier
    identifier = CarFollowingIdentifier(
        min_duration=20.0,  # 20 seconds minimum
        min_spacing=2.0,    # 2 meters minimum spacing
        max_spacing=100.0   # 100 meters maximum spacing
    )
    
    # Find car-following pairs
    pairs = identifier.identify_pairs(df)
    
    # Print summary
    print(f"Found {len(pairs)} valid car-following pairs")
    
    # Analyze first few pairs
    for i, pair in enumerate(pairs[:5]):
        print(f"\nPair {i+1}:")
        print(f"Leader ID: {pair.leader_id}")
        print(f"Follower ID: {pair.follower_id}")
        print(f"Duration: {pair.duration:.1f} seconds")
        print(f"Lane: {pair.lane_id}")
        
        # Compute and print metrics
        metrics = identifier.compute_pair_metrics(pair, df)
        print("\nMetrics:")
        for metric, values in metrics.items():
            print(f"\n{metric}:")
            for stat, value in values.items():
                print(f"  {stat}: {value:.2f}")

if __name__ == "__main__":
    main()

Found 2029 valid car-following pairs

Pair 1:
Leader ID: 47.0
Follower ID: 64.0
Duration: 21.8 seconds
Lane: 1

Metrics:

spacing:
  mean: 52.44
  std: 18.54
  min: 35.11
  max: 99.71

speed_difference:
  mean: 2.83
  std: 2.71
  min: -1.43
  max: 8.23

time_headway:
  mean: 4.71
  std: 1.17
  min: 3.39
  max: 8.14

Pair 2:
Leader ID: 2142.0
Follower ID: 2153.0
Duration: 20.5 seconds
Lane: 1

Metrics:

spacing:
  mean: 66.29
  std: 15.14
  min: 45.03
  max: 99.59

speed_difference:
  mean: 2.67
  std: 1.95
  min: -0.61
  max: 6.10

time_headway:
  mean: 4.73
  std: 1.42
  min: 2.75
  max: 8.12

Pair 3:
Leader ID: 2.0
Follower ID: 17.0
Duration: 26.0 seconds
Lane: 1

Metrics:

spacing:
  mean: 36.46
  std: 10.12
  min: 19.77
  max: 57.29

speed_difference:
  mean: 1.36
  std: 1.29
  min: -1.81
  max: 4.81

time_headway:
  mean: 2.82
  std: 0.35
  min: 2.17
  max: 4.07

Pair 4:
Leader ID: 17.0
Follower ID: 25.0
Duration: 25.7 seconds
Lane: 1

Metrics:

spacing:
  mean: 36.95
  std: 10.31

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

class CarFollowingVisualizer:
    """
    Creates detailed visualizations for car-following pairs, showing speed profiles
    and space gaps over time with comprehensive metrics.
    """
    def __init__(self, data_file):
        """
        Initialize with the path to the trajectory data file.
        """
        # Read the full trajectory data
        self.df = pd.read_csv(data_file, 
                            delimiter='\s+',
                            header=None,
                            names=['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'LocalY',
                                  'Speed', 'Acceleration', 'Vehicle_Length',
                                  'Vehicle_Class', 'Follower_ID', 'Leader_ID'])

    def extract_pair_data(self, leader_id, follower_id, start_frame, end_frame):
        """
        Extracts trajectory data for a specific leader-follower pair.
        """
        # Get leader trajectory
        leader_data = self.df[
            (self.df['Vehicle_ID'] == leader_id) &
            (self.df['Frame_ID'] >= start_frame) &
            (self.df['Frame_ID'] <= end_frame)
        ].sort_values('Frame_ID')
        
        # Get follower trajectory
        follower_data = self.df[
            (self.df['Vehicle_ID'] == follower_id) &
            (self.df['Frame_ID'] >= start_frame) &
            (self.df['Frame_ID'] <= end_frame)
        ].sort_values('Frame_ID')
        
        # Create time array (convert frames to seconds)
        time = (leader_data['Frame_ID'] - start_frame) * 0.1
        
        return {
            'time': time,
            'leader_speed': leader_data['Speed'].values,
            'follower_speed': follower_data['Speed'].values,
            'spacing': leader_data['LocalY'].values - follower_data['LocalY'].values
        }

    def plot_pair(self, pair_info, metrics):
        """
        Creates a comprehensive visualization for a single car-following pair.
        """
        # Extract pair data
        data = self.extract_pair_data(
            pair_info['leader_id'],
            pair_info['follower_id'],
            pair_info['start_frame'],
            pair_info['end_frame']
        )
        
        # Create figure with grid layout
        fig = plt.figure(figsize=(12, 8))
        gs = GridSpec(2, 2, figure=fig)
        
        # Speed profiles subplot (larger)
        ax1 = fig.add_subplot(gs[0, :])
        ax1.plot(data['time'], data['leader_speed'], 'b-', label='Leader Speed', linewidth=2)
        ax1.plot(data['time'], data['follower_speed'], 'r--', label='Follower Speed', linewidth=2)
        ax1.set_xlabel('Time (seconds)')
        ax1.set_ylabel('Speed (m/s)')
        ax1.set_title('Speed Profiles')
        ax1.legend()
        ax1.grid(True)
        
        # Space gap subplot
        ax2 = fig.add_subplot(gs[1, 0])
        ax2.plot(data['time'], data['spacing'], 'g-', linewidth=2)
        ax2.set_xlabel('Time (seconds)')
        ax2.set_ylabel('Space Gap (m)')
        ax2.set_title('Following Distance')
        ax2.grid(True)
        
        # Metrics text box
        ax3 = fig.add_subplot(gs[1, 1])
        ax3.axis('off')
        metrics_text = (
            f"Pair Metrics:\n\n"
            f"Duration: {pair_info['duration']:.1f} seconds\n\n"
            f"Spacing (m):\n"
            f"  Mean: {metrics['spacing']['mean']:.2f}\n"
            f"  Std: {metrics['spacing']['std']:.2f}\n"
            f"  Min: {metrics['spacing']['min']:.2f}\n"
            f"  Max: {metrics['spacing']['max']:.2f}\n\n"
            f"Speed Difference (m/s):\n"
            f"  Mean: {metrics['speed_difference']['mean']:.2f}\n"
            f"  Std: {metrics['speed_difference']['std']:.2f}\n\n"
            f"Time Headway (s):\n"
            f"  Mean: {metrics['time_headway']['mean']:.2f}\n"
            f"  Std: {metrics['time_headway']['std']:.2f}"
        )
        ax3.text(0.05, 0.95, metrics_text,
                verticalalignment='top',
                fontfamily='monospace',
                bbox=dict(facecolor='white', alpha=0.8))
        
        # Add overall title
        plt.suptitle(f'Car-Following Pair Analysis\n'
                    f'Leader ID: {pair_info["leader_id"]}, '
                    f'Follower ID: {pair_info["follower_id"]}, '
                    f'Lane: {pair_info["lane"]}',
                    y=0.98)
        
        plt.tight_layout()
        return fig

def visualize_pairs(data_file="D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt"):
    """
    Creates visualizations for the specified car-following pairs.
    """
    # Create visualizer
    visualizer = CarFollowingVisualizer(data_file)
    
    # Define the pairs we want to visualize (from the output)
    pairs = [
        {
            'leader_id': 47.0, 'follower_id': 64.0,
            'start_frame': 0, 'end_frame': 218,  # 21.8 seconds
            'duration': 21.8, 'lane': 1
        },
        {
            'leader_id': 2142.0, 'follower_id': 2153.0,
            'start_frame': 0, 'end_frame': 205,  # 20.5 seconds
            'duration': 20.5, 'lane': 1
        },
        {
            'leader_id': 2.0, 'follower_id': 17.0,
            'start_frame': 0, 'end_frame': 260,  # 26.0 seconds
            'duration': 26.0, 'lane': 1
        },
        {
            'leader_id': 17.0, 'follower_id': 25.0,
            'start_frame': 0, 'end_frame': 257,  # 25.7 seconds
            'duration': 25.7, 'lane': 1
        },
        {
            'leader_id': 25.0, 'follower_id': 39.0,
            'start_frame': 0, 'end_frame': 259,  # 25.9 seconds
            'duration': 25.9, 'lane': 1
        }
    ]
    
    # Create visualizations for each pair
    for i, pair in enumerate(pairs, 1):
        # Get metrics from the output
        metrics = {
            'spacing': {
                'mean': [52.44, 66.29, 36.46, 36.95, 31.29][i-1],
                'std': [18.54, 15.14, 10.12, 10.31, 7.29][i-1],
                'min': [35.11, 45.03, 19.77, 21.97, 21.87][i-1],
                'max': [99.71, 99.59, 57.29, 50.71, 41.43][i-1]
            },
            'speed_difference': {
                'mean': [2.83, 2.67, 1.36, 0.87, 0.63][i-1],
                'std': [2.71, 1.95, 1.29, 1.44, 1.08][i-1],
                'min': [-1.43, -0.61, -1.81, -4.18, -1.99][i-1],
                'max': [8.23, 6.10, 4.81, 3.02, 2.43][i-1]
            },
            'time_headway': {
                'mean': [4.71, 4.73, 2.82, 2.71, 2.28][i-1],
                'std': [1.17, 1.42, 0.35, 0.21, 0.19][i-1],
                'min': [3.39, 2.75, 2.17, 2.12, 1.92][i-1],
                'max': [8.14, 8.12, 4.07, 3.25, 3.14][i-1]
            }
        }
        
        # Create visualization
        fig = visualizer.plot_pair(pair, metrics)
        
        # Save the figure
        plt.savefig(f'car_following_pair_{i}.png', dpi=300, bbox_inches='tight')
        plt.close()

if __name__ == "__main__":
    visualize_pairs()

In [4]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Dict, Tuple

class CarFollowingStatistics:
    """
    Analyzes statistical properties of car-following pairs and generates
    comprehensive visualizations and analyses.
    """
    def __init__(self, data_file: str):
        """
        Initialize with path to trajectory data file
        """
        self.data_file = data_file
        self.pairs_data = []
        self.summary_stats = {}
        self.pair_metrics = {}
        
    def load_and_process_data(self):
        """
        Loads trajectory data and processes it to identify car-following pairs
        """
        # Read the data file
        df = pd.read_csv(self.data_file, 
                        delimiter='\s+',
                        header=None,
                        names=['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'LocalY',
                              'Speed', 'Acceleration', 'Vehicle_Length',
                              'Vehicle_Class', 'Follower_ID', 'Leader_ID'])
        
        # Process data into car-following pairs
        MIN_DURATION = 200  # 20 seconds at 0.1s intervals
        
        # Group by follower vehicle
        for vehicle_id, vehicle_data in df.groupby('Vehicle_ID'):
            # Find continuous following periods
            following_periods = self._find_following_periods(vehicle_data, MIN_DURATION)
            
            for period in following_periods:
                # Extract leader and follower trajectories
                pair_data = self._extract_pair_trajectories(
                    df, period['leader_id'], vehicle_id,
                    period['start_frame'], period['end_frame']
                )
                
                if pair_data is not None:
                    self.pairs_data.append(pair_data)
    
    def _find_following_periods(self, vehicle_data: pd.DataFrame, 
                              min_duration: int) -> List[Dict]:
        """
        Identifies continuous periods where the vehicle follows another vehicle
        """
        periods = []
        current_leader = None
        start_frame = None
        
        for _, row in vehicle_data.iterrows():
            if row['Leader_ID'] != -1:  # Valid leader exists
                if current_leader is None:
                    current_leader = row['Leader_ID']
                    start_frame = row['Frame_ID']
                elif row['Leader_ID'] != current_leader:
                    # Leader changed, check if previous period is valid
                    if row['Frame_ID'] - start_frame >= min_duration:
                        periods.append({
                            'leader_id': current_leader,
                            'start_frame': start_frame,
                            'end_frame': row['Frame_ID'] - 1
                        })
                    current_leader = row['Leader_ID']
                    start_frame = row['Frame_ID']
            else:
                # No leader, end current period if exists
                if current_leader is not None and \
                   row['Frame_ID'] - start_frame >= min_duration:
                    periods.append({
                        'leader_id': current_leader,
                        'start_frame': start_frame,
                        'end_frame': row['Frame_ID'] - 1
                    })
                current_leader = None
                
        return periods
    
    def _extract_pair_trajectories(self, df: pd.DataFrame,
                                 leader_id: int, follower_id: int,
                                 start_frame: int, end_frame: int) -> Dict:
        """
        Extracts and processes trajectory data for a leader-follower pair
        """
        # Get leader and follower data for the period
        leader_data = df[(df['Vehicle_ID'] == leader_id) &
                        (df['Frame_ID'] >= start_frame) &
                        (df['Frame_ID'] <= end_frame)]
        
        follower_data = df[(df['Vehicle_ID'] == follower_id) &
                          (df['Frame_ID'] >= start_frame) &
                          (df['Frame_ID'] <= end_frame)]
        
        if len(leader_data) == 0 or len(follower_data) == 0:
            return None
            
        # Calculate metrics
        spacing = leader_data['LocalY'].values - follower_data['LocalY'].values
        relative_speed = leader_data['Speed'].values - follower_data['Speed'].values
        time_headway = spacing / follower_data['Speed'].values
        
        return {
            'pair_id': f"{leader_id}-{follower_id}",
            'leader_id': leader_id,
            'follower_id': follower_id,
            'start_frame': start_frame,
            'end_frame': end_frame,
            'duration': (end_frame - start_frame) * 0.1,
            'metrics': {
                'spacing': {
                    'mean': np.mean(spacing),
                    'std': np.std(spacing),
                    'min': np.min(spacing),
                    'max': np.max(spacing)
                },
                'relative_speed': {
                    'mean': np.mean(relative_speed),
                    'std': np.std(relative_speed),
                    'min': np.min(relative_speed),
                    'max': np.max(relative_speed)
                },
                'time_headway': {
                    'mean': np.mean(time_headway[~np.isinf(time_headway)]),
                    'std': np.std(time_headway[~np.isinf(time_headway)]),
                    'min': np.min(time_headway[~np.isinf(time_headway)]),
                    'max': np.max(time_headway[~np.isinf(time_headway)])
                }
            },
            'trajectories': {
                'time': np.arange(0, (end_frame - start_frame + 1) * 0.1, 0.1),
                'leader_speed': leader_data['Speed'].values,
                'follower_speed': follower_data['Speed'].values,
                'spacing': spacing,
                'relative_speed': relative_speed,
                'time_headway': time_headway
            }
        }
    
    def generate_pair_visualizations(self, pair_data: Dict):
        """
        Generates comprehensive visualizations for a single car-following pair
        """
        fig, axes = plt.subplots(3, 1, figsize=(12, 15))
        
        # Speed profiles
        axes[0].plot(pair_data['trajectories']['time'],
                    pair_data['trajectories']['leader_speed'],
                    label='Leader')
        axes[0].plot(pair_data['trajectories']['time'],
                    pair_data['trajectories']['follower_speed'],
                    label='Follower')
        axes[0].set_xlabel('Time (s)')
        axes[0].set_ylabel('Speed (m/s)')
        axes[0].set_title('Speed Profiles')
        axes[0].legend()
        axes[0].grid(True)
        
        # Space gap
        axes[1].plot(pair_data['trajectories']['time'],
                    pair_data['trajectories']['spacing'])
        axes[1].set_xlabel('Time (s)')
        axes[1].set_ylabel('Space Gap (m)')
        axes[1].set_title('Following Distance')
        axes[1].grid(True)
        
        # Time headway
        valid_headway = pair_data['trajectories']['time_headway']
        valid_headway = valid_headway[~np.isinf(valid_headway)]
        valid_time = pair_data['trajectories']['time'][:len(valid_headway)]
        
        axes[2].plot(valid_time, valid_headway)
        axes[2].set_xlabel('Time (s)')
        axes[2].set_ylabel('Time Headway (s)')
        axes[2].set_title('Time Headway')
        axes[2].grid(True)
        
        plt.tight_layout()
        return fig
    
    def generate_summary_statistics(self):
        """
        Generates summary statistics across all car-following pairs
        """
        self.summary_stats = {
            'spacing': {
                'mean': np.mean([p['metrics']['spacing']['mean'] for p in self.pairs_data]),
                'std': np.mean([p['metrics']['spacing']['std'] for p in self.pairs_data]),
                'min': np.min([p['metrics']['spacing']['min'] for p in self.pairs_data]),
                'max': np.max([p['metrics']['spacing']['max'] for p in self.pairs_data])
            },
            'time_headway': {
                'mean': np.mean([p['metrics']['time_headway']['mean'] for p in self.pairs_data]),
                'std': np.mean([p['metrics']['time_headway']['std'] for p in self.pairs_data]),
                'min': np.min([p['metrics']['time_headway']['min'] for p in self.pairs_data]),
                'max': np.max([p['metrics']['time_headway']['max'] for p in self.pairs_data])
            },
            'relative_speed': {
                'mean': np.mean([p['metrics']['relative_speed']['mean'] for p in self.pairs_data]),
                'std': np.mean([p['metrics']['relative_speed']['std'] for p in self.pairs_data]),
                'min': np.min([p['metrics']['relative_speed']['min'] for p in self.pairs_data]),
                'max': np.max([p['metrics']['relative_speed']['max'] for p in self.pairs_data])
            }
        }
        
        return pd.DataFrame(self.summary_stats)

def main():
    # Initialize analyzer
    analyzer = CarFollowingStatistics("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt")
    
    # Load and process data
    analyzer.load_and_process_data()
    
    # Generate summary statistics
    summary_stats = analyzer.generate_summary_statistics()
    print("\nSummary Statistics:")
    print(summary_stats)
    
    # Generate visualizations for first 5 pairs
    print("\nGenerating visualizations for first 5 pairs...")
    for i, pair_data in enumerate(analyzer.pairs_data[:5]):
        fig = analyzer.generate_pair_visualizations(pair_data)
        fig.suptitle(f'Car-Following Pair {pair_data["pair_id"]}')
        plt.savefig(f'pair_{i+1}_analysis.png')
        plt.close()
        
        print(f"\nPair {i+1} Metrics:")
        for metric, values in pair_data['metrics'].items():
            print(f"\n{metric.title()}:")
            for stat, value in values.items():
                print(f"  {stat}: {value:.2f}")

if __name__ == "__main__":
    main()

  time_headway = spacing / follower_data['Speed'].values



Summary Statistics:
         spacing  time_headway  relative_speed
mean   19.713641      3.221234       -0.008538
std     4.778569      1.819268        1.255265
min     3.516840      0.387688      -10.545680
max   126.281730    546.928879       13.817540

Generating visualizations for first 5 pairs...

Pair 1 Metrics:

Spacing:
  mean: 13.24
  std: 7.48
  min: 6.52
  max: 38.02

Relative_Speed:
  mean: -0.99
  std: 2.43
  min: -7.16
  max: 2.13

Time_Headway:
  mean: 5.67
  std: 4.40
  min: 1.66
  max: 28.36

Pair 2 Metrics:

Spacing:
  mean: 14.51
  std: 4.76
  min: 7.36
  max: 25.12

Relative_Speed:
  mean: 0.17
  std: 1.15
  min: -2.28
  max: 2.67

Time_Headway:
  mean: 3.49
  std: 5.21
  min: 1.39
  max: 67.51

Pair 3 Metrics:

Spacing:
  mean: 18.18
  std: 7.66
  min: 8.13
  max: 36.55

Relative_Speed:
  mean: 0.07
  std: 1.46
  min: -4.34
  max: 3.84

Time_Headway:
  mean: 3.40
  std: 1.01
  min: 2.00
  max: 7.10

Pair 4 Metrics:

Spacing:
  mean: 13.97
  std: 2.88
  min: 8.60
 

In [10]:
%pip install numpy pandas matplotlib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging

# Configure logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

# Load and process data

# Load and process data
def load_data(file_path):
    try:
        data = pd.read_csv(file_path, delimiter=' ', skip_blank_lines=True, dtype=float)
        data.columns = ['vehicleId', 'frame', 'lane', 'position', 'speed', 'acceleration', 'vehicleLength', 'vehicleClass', 'followerId', 'leaderId']
        return data
    except Exception as e:
        logger.error(f"Error loading data: {e}")
        return pd.DataFrame()

# Helper function to find car-following pairs
def find_car_following_pairs(data):
    vehicle_groups = data.groupby('vehicleId')
    pairs = []
    min_duration = 200  # 20 seconds (at 0.1s intervals)

    for vehicle_id, vehicle in vehicle_groups:
        unique_leaders = vehicle['leaderId'].unique()
        unique_leaders = unique_leaders[unique_leaders != -1]

        for leader_id in unique_leaders:
            start_frame = None
            current_frame = None

            for _, point in vehicle.iterrows():
                if point['leaderId'] == leader_id:
                    if start_frame is None:
                        start_frame = point['frame']
                    current_frame = point['frame']
                elif start_frame is not None:
                    if current_frame - start_frame >= min_duration:
                        pairs.append({
                            'leaderId': leader_id,
                            'followerId': vehicle_id,
                            'startFrame': start_frame,
                            'endFrame': current_frame,
                            'lane': point['lane']
                        })
                    start_frame = None

            if start_frame is not None and current_frame - start_frame >= min_duration:
                pairs.append({
                    'leaderId': leader_id,
                    'followerId': vehicle_id,
                    'startFrame': start_frame,
                    'endFrame': current_frame,
                    'lane': vehicle.iloc[-1]['lane']
                })

    return pairs

# Process data for selected pairs
def process_pair_data(data, selected_pairs):
    pair_data = []

    for pair in selected_pairs:
        pair_frames = np.arange(pair['startFrame'], pair['endFrame'] + 1)
        leader_data = data[(data['vehicleId'] == pair['leaderId']) & (data['frame'].isin(pair_frames))]
        follower_data = data[(data['vehicleId'] == pair['followerId']) & (data['frame'].isin(pair_frames))]

        time_data = []
        for frame in pair_frames:
            leader_point = leader_data[leader_data['frame'] == frame].iloc[0] if not leader_data[leader_data['frame'] == frame].empty else {}
            follower_point = follower_data[follower_data['frame'] == frame].iloc[0] if not follower_data[follower_data['frame'] == frame].empty else {}

            time_data.append({
                'time': (frame - pair['startFrame']) * 0.1,
                'leaderSpeed': leader_point.get('speed', np.nan),
                'followerSpeed': follower_point.get('speed', np.nan),
                'spacing': leader_point.get('position', np.nan) - follower_point.get('position', np.nan),
                'relativeSpeed': leader_point.get('speed', np.nan) - follower_point.get('speed', np.nan),
                'leaderAccel': leader_point.get('acceleration', np.nan),
                'followerAccel': follower_point.get('acceleration', np.nan)
            })

        pair_data.append({
            'pairId': f"{pair['leaderId']}-{pair['followerId']}",
            'timeData': time_data
        })

    return pair_data

# Visualization functions
def plot_pair_visualizations(pair_data):
    for pair in pair_data:
        time_data = pd.DataFrame(pair['timeData'])

        fig, axs = plt.subplots(3, 1, figsize=(10, 15))
        fig.suptitle(f"Car-Following Pair {pair['pairId']}")

        # Speed Profiles
        axs[0].plot(time_data['time'], time_data['leaderSpeed'], label='Leader Speed', color='blue')
        axs[0].plot(time_data['time'], time_data['followerSpeed'], label='Follower Speed', color='green')
        axs[0].set_xlabel('Time (s)')
        axs[0].set_ylabel('Speed (m/s)')
        axs[0].legend()
        axs[0].set_title('Speed Profiles')

        # Following Distance
        axs[1].plot(time_data['time'], time_data['spacing'], label='Space Gap', color='orange')
        axs[1].set_xlabel('Time (s)')
        axs[1].set_ylabel('Distance (m)')
        axs[1].legend()
        axs[1].set_title('Following Distance')

        # Relative Speed
        axs[2].plot(time_data['time'], time_data['relativeSpeed'], label='Relative Speed', color='red')
        axs[2].set_xlabel('Time (s)')
        axs[2].set_ylabel('Relative Speed (m/s)')
        axs[2].legend()
        axs[2].set_title('Relative Speed')

        plt.tight_layout()
        plt.show()

# Main function to run the analysis
def main():
    file_path = "D:/ASU Academics/Traffic Flow Theroy/MP-1/Reconstructed NGSIM I80-1 data/Data/DATA (NO MOTORCYCLES).txt"
    raw_data = load_data(file_path)
    selected_pairs = find_car_following_pairs(raw_data)[:5]
    pair_data = process_pair_data(raw_data, selected_pairs)
    plot_pair_visualizations(pair_data)

if __name__ == "__main__":
    main()

2025-02-04 23:57:02,205 - __main__ - ERROR - Error loading data: Error tokenizing data. C error: Expected 62 fields in line 1602, saw 63

2025-02-04 23:57:02,205 - __main__ - ERROR - Error loading data: Error tokenizing data. C error: Expected 62 fields in line 1602, saw 63

ERROR:__main__:Error loading data: Error tokenizing data. C error: Expected 62 fields in line 1602, saw 63



Note: you may need to restart the kernel to use updated packages.


KeyError: 'vehicleId'