<a href="https://colab.research.google.com/github/Raswanth-Prasath/NGSIM-Driving-Behavior-Analysis/blob/main/NGSIM_Driving_Behavior_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from scipy import stats

In [2]:
# Define column names for the data files
column_names = ['Vehicle ID', 'Frame ID', 'Lane ID', 'LocalY', 'Mean Speed', 'Mean Acceleration', 'Vehicle length', 'Vehicle Class ID', 'Follower ID', 'Leader ID']  # replace with actual column names
moto_column_names = ['Vehicle ID', 'Frame ID', 'Lane ID', 'LocalY', 'Mean Speed', 'Mean Acceleration', 'Vehicle length', 'Vehicle Class ID']  # replace with actual column names

# Read DATA.txt (adjust delimiter if needed)
data = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt", delimiter=r"\s+", header=None, names=column_names)  # \s+ for multiple spaces
motorcycles = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\MOTORCYCLES.txt", delimiter=r"\s+", header=None, names=moto_column_names)

In [3]:
data.to_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA.csv", index=False)
motorcycles.to_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\MOTORCYCLES.csv", index=False)

In [4]:
# Add missing columns to motorcycles (Leader ID, Follower ID)
motorcycles["Follower ID"] = -1
motorcycles["Leader ID"] = -1

# Combine datasets
combined = pd.concat([data, motorcycles], ignore_index=True)

In [27]:
import pandas as pd
import numpy as np
from dataclasses import dataclass
from typing import List, Optional, Dict, Tuple

@dataclass
class CarFollowingPair:
    """
    Represents a validated car-following pair with all relevant information
    """
    leader_id: int
    follower_id: int
    start_frame: int
    end_frame: int
    lane_id: int
    metrics: Dict = None

    @property
    def duration(self) -> float:
        """Duration of car-following episode in seconds"""
        return (self.end_frame - self.start_frame + 1) * 0.1  # Convert frames to seconds

class CarFollowingIdentifier:
    """
    Identifies and validates car-following pairs in trajectory data
    """
    def __init__(self, min_duration: float = 20.0,
                 min_spacing: float = 2.0,
                 max_spacing: float = 100.0):
        """
        Initialize with validation criteria
        
        Parameters:
        min_duration: Minimum duration in seconds for valid car-following
        min_spacing: Minimum allowed spacing between vehicles (meters)
        max_spacing: Maximum allowed spacing between vehicles (meters)
        """
        self.min_duration = min_duration
        self.min_frames = int(min_duration * 10)  # Convert to frames (0.1s intervals)
        self.min_spacing = min_spacing
        self.max_spacing = max_spacing
        
    def identify_pairs(self, df: pd.DataFrame) -> List[CarFollowingPair]:
        """
        Main method to identify valid car-following pairs
        
        Parameters:
        df: DataFrame with columns for Vehicle_ID, Frame_ID, Lane_ID, LocalY
        
        Returns:
        List of validated CarFollowingPair objects
        """
        valid_pairs = []
        
        # Step 1: Group data by lane
        for lane_id, lane_data in df.groupby('Lane_ID'):
            # Skip special lanes (like merge lanes or shoulders)
            if lane_id > 6:  # Assuming regular lanes are 1-6
                continue
                
            # Step 2: Process each time window in the lane
            frames = sorted(lane_data['Frame_ID'].unique())
            
            # Step 3: For each frame, identify potential pairs
            current_pairs = {}  # Track ongoing pairs
            
            for frame in frames:
                frame_data = lane_data[lane_data['Frame_ID'] == frame]
                
                # Sort vehicles by position to identify leader-follower relationships
                frame_vehicles = frame_data.sort_values('LocalY', ascending=False)
                
                # Step 4: Check each consecutive pair of vehicles
                for i in range(len(frame_vehicles) - 1):
                    leader = frame_vehicles.iloc[i]
                    follower = frame_vehicles.iloc[i + 1]
                    
                    pair_id = (leader['Vehicle_ID'], follower['Vehicle_ID'])
                    
                    # Calculate spacing
                    spacing = leader['LocalY'] - follower['LocalY']
                    
                    # Validate spacing
                    if self.min_spacing <= spacing <= self.max_spacing:
                        if pair_id not in current_pairs:
                            # Start new pair tracking
                            current_pairs[pair_id] = {
                                'start_frame': frame,
                                'current_frame': frame,
                                'lane_id': lane_id
                            }
                        else:
                            # Update existing pair
                            current_pairs[pair_id]['current_frame'] = frame
                    else:
                        # Invalid spacing - end pair if exists
                        self._check_and_add_pair(current_pairs, pair_id, valid_pairs)
            
            # Process any remaining pairs
            for pair_id in list(current_pairs.keys()):
                self._check_and_add_pair(current_pairs, pair_id, valid_pairs)
        
        return valid_pairs
    
    def _check_and_add_pair(self, current_pairs: Dict, 
                           pair_id: Tuple[int, int],
                           valid_pairs: List[CarFollowingPair]) -> None:
        """
        Validates and adds a car-following pair if it meets duration criteria
        """
        if pair_id in current_pairs:
            pair_data = current_pairs[pair_id]
            duration_frames = pair_data['current_frame'] - pair_data['start_frame'] + 1
            
            if duration_frames >= self.min_frames:
                # Create validated pair
                valid_pairs.append(CarFollowingPair(
                    leader_id=pair_id[0],
                    follower_id=pair_id[1],
                    start_frame=pair_data['start_frame'],
                    end_frame=pair_data['current_frame'],
                    lane_id=pair_data['lane_id']
                ))
            
            # Remove pair from tracking
            del current_pairs[pair_id]
    
    def compute_pair_metrics(self, pair: CarFollowingPair, 
                           df: pd.DataFrame) -> Dict:
        """
        Computes detailed metrics for a validated car-following pair
        
        Parameters:
        pair: CarFollowingPair object
        df: Original trajectory DataFrame
        
        Returns:
        Dictionary of computed metrics
        """
        # Get leader and follower trajectories
        leader_data = df[(df['Vehicle_ID'] == pair.leader_id) & 
                        (df['Frame_ID'] >= pair.start_frame) & 
                        (df['Frame_ID'] <= pair.end_frame)]
        
        follower_data = df[(df['Vehicle_ID'] == pair.follower_id) & 
                          (df['Frame_ID'] >= pair.start_frame) & 
                          (df['Frame_ID'] <= pair.end_frame)]
        
        # Compute spacing statistics
        spacing = leader_data['LocalY'].values - follower_data['LocalY'].values
        
        # Compute speed difference statistics
        speed_diff = leader_data['Speed'].values - follower_data['Speed'].values
        
        # Compute time headway
        time_headway = spacing / follower_data['Speed'].values
        valid_headway = time_headway[~np.isinf(time_headway)]
        
        return {
            'spacing': {
                'mean': np.mean(spacing),
                'std': np.std(spacing),
                'min': np.min(spacing),
                'max': np.max(spacing)
            },
            'speed_difference': {
                'mean': np.mean(speed_diff),
                'std': np.std(speed_diff),
                'min': np.min(speed_diff),
                'max': np.max(speed_diff)
            },
            'time_headway': {
                'mean': np.mean(valid_headway),
                'std': np.std(valid_headway),
                'min': np.min(valid_headway),
                'max': np.max(valid_headway)
            }
        }

def main():
    # Example usage
    # Read the data file
    df = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt", delimiter='\s+', header=None,
                     names=['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'LocalY',
                           'Speed', 'Acceleration', 'Vehicle_Length',
                           'Vehicle_Class', 'Follower_ID', 'Leader_ID'])
    
    # Initialize identifier
    identifier = CarFollowingIdentifier(
        min_duration=20.0,  # 20 seconds minimum
        min_spacing=2.0,    # 2 meters minimum spacing
        max_spacing=100.0   # 100 meters maximum spacing
    )
    
    # Find car-following pairs
    pairs = identifier.identify_pairs(df)
    
    # Print summary
    print(f"Found {len(pairs)} valid car-following pairs")
    
    # Analyze first few pairs
    for i, pair in enumerate(pairs[:5]):
        print(f"\nPair {i+1}:")
        print(f"Leader ID: {pair.leader_id}")
        print(f"Follower ID: {pair.follower_id}")
        print(f"Duration: {pair.duration:.1f} seconds")
        print(f"Lane: {pair.lane_id}")
        
        # Compute and print metrics
        metrics = identifier.compute_pair_metrics(pair, df)
        print("\nMetrics:")
        for metric, values in metrics.items():
            print(f"\n{metric}:")
            for stat, value in values.items():
                print(f"  {stat}: {value:.2f}")

if __name__ == "__main__":
    main()

Found 2029 valid car-following pairs

Pair 1:
Leader ID: 47.0
Follower ID: 64.0
Duration: 21.8 seconds
Lane: 1

Metrics:

spacing:
  mean: 52.44
  std: 18.54
  min: 35.11
  max: 99.71

speed_difference:
  mean: 2.83
  std: 2.71
  min: -1.43
  max: 8.23

time_headway:
  mean: 4.71
  std: 1.17
  min: 3.39
  max: 8.14

Pair 2:
Leader ID: 2142.0
Follower ID: 2153.0
Duration: 20.5 seconds
Lane: 1

Metrics:

spacing:
  mean: 66.29
  std: 15.14
  min: 45.03
  max: 99.59

speed_difference:
  mean: 2.67
  std: 1.95
  min: -0.61
  max: 6.10

time_headway:
  mean: 4.73
  std: 1.42
  min: 2.75
  max: 8.12

Pair 3:
Leader ID: 2.0
Follower ID: 17.0
Duration: 26.0 seconds
Lane: 1

Metrics:

spacing:
  mean: 36.46
  std: 10.12
  min: 19.77
  max: 57.29

speed_difference:
  mean: 1.36
  std: 1.29
  min: -1.81
  max: 4.81

time_headway:
  mean: 2.82
  std: 0.35
  min: 2.17
  max: 4.07

Pair 4:
Leader ID: 17.0
Follower ID: 25.0
Duration: 25.7 seconds
Lane: 1

Metrics:

spacing:
  mean: 36.95
  std: 10.31

Analyzes statistical properties of car-following pairs and generates comprehensive visualizations and analyses.

**Car-Following Visualization**

In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging

# Configure logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

def load_data(file_path):
    """Load and process the trajectory data"""
    try:
        # Read data with numbered columns first
        data = pd.read_csv(file_path, delimiter='\s+', header=None)
        
        # Rename columns to match expected format
        data.columns = ['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'Local_Y', 
                       'Speed', 'Acceleration', 'Vehicle_Length',
                       'Vehicle_Class', 'Follower_ID', 'Leader_ID']
        
        logger.info(f"Successfully loaded data with {len(data)} rows")
        return data
        
    except Exception as e:
        logger.error(f"Error loading data: {e}")
        return pd.DataFrame()

def find_car_following_pairs(data):
    """Identify car-following pairs that persist for at least 20 seconds"""
    pairs = []
    min_duration = 200  # 20 seconds (at 0.1s intervals)
    
    # Process each vehicle
    for vehicle_id in data['Vehicle_ID'].unique():
        vehicle_data = data[data['Vehicle_ID'] == vehicle_id].sort_values('Frame_ID')
        
        # Find unique leaders (excluding -1 which indicates no leader)
        leaders = vehicle_data['Leader_ID'].unique()
        leaders = leaders[leaders != -1]
        
        for leader_id in leaders:
            # Get frames where this vehicle follows the current leader
            following_frames = vehicle_data[vehicle_data['Leader_ID'] == leader_id]['Frame_ID']
            
            if len(following_frames) >= min_duration:
                pairs.append({
                    'leader_id': leader_id,
                    'follower_id': vehicle_id,
                    'start_frame': following_frames.iloc[0],
                    'end_frame': following_frames.iloc[-1],
                    'lane': vehicle_data['Lane_ID'].iloc[0],
                    'duration': len(following_frames) * 0.1  # Convert to seconds
                })
    
    logger.info(f"Found {len(pairs)} car-following pairs")
    return pairs

def process_pair_data(data, selected_pairs):
    """Process data for selected car-following pairs"""
    pair_data = []
    
    for pair in selected_pairs:
        # Get trajectory data for leader and follower
        frames = range(int(pair['start_frame']), int(pair['end_frame']) + 1)
        
        leader_data = data[
            (data['Vehicle_ID'] == pair['leader_id']) & 
            (data['Frame_ID'].isin(frames))
        ].sort_values('Frame_ID')
        
        follower_data = data[
            (data['Vehicle_ID'] == pair['follower_id']) & 
            (data['Frame_ID'].isin(frames))
        ].sort_values('Frame_ID')
        
        # Combine data
        merged_data = pd.merge(
            leader_data, 
            follower_data,
            on='Frame_ID',
            suffixes=('_leader', '_follower')
        )
        
        time_data = []
        for _, row in merged_data.iterrows():
            time_data.append({
                'time': (row['Frame_ID'] - pair['start_frame']) * 0.1,
                'leader_speed': row['Speed_leader'],
                'follower_speed': row['Speed_follower'],
                'spacing': row['Local_Y_leader'] - row['Local_Y_follower'],
                'relative_speed': row['Speed_leader'] - row['Speed_follower']
            })
        
        pair_data.append({
            'pair_id': f"{pair['leader_id']}-{pair['follower_id']}",
            'time_data': pd.DataFrame(time_data)
        })
    
    return pair_data

def plot_pair_visualizations(pair_data):
    """Create visualizations for car-following pairs"""
    for pair in pair_data:
        time_data = pair['time_data']
        
        fig, axes = plt.subplots(3, 1, figsize=(12, 15))
        fig.suptitle(f"Car-Following Pair Analysis\nPair {pair['pair_id']}")
        
        # Speed profiles
        axes[0].plot(time_data['time'], time_data['leader_speed'], 
                    label='Leader', color='blue', linewidth=2)
        axes[0].plot(time_data['time'], time_data['follower_speed'], 
                    label='Follower', color='red', linestyle='--', linewidth=2)
        axes[0].set_xlabel('Time (s)')
        axes[0].set_ylabel('Speed (m/s)')
        axes[0].set_title('Speed Profiles')
        axes[0].grid(True)
        axes[0].legend()
        
        # Space gap
        axes[1].plot(time_data['time'], time_data['spacing'], 
                    color='green', linewidth=2)
        axes[1].set_xlabel('Time (s)')
        axes[1].set_ylabel('Space Gap (m)')
        axes[1].set_title('Following Distance')
        axes[1].grid(True)
        
        # Relative speed
        axes[2].plot(time_data['time'], time_data['relative_speed'], 
                    color='purple', linewidth=2)
        axes[2].set_xlabel('Time (s)')
        axes[2].set_ylabel('Relative Speed (m/s)')
        axes[2].set_title('Relative Speed (Leader - Follower)')
        axes[2].grid(True)
        
        # Add summary statistics
        stats_text = (
            f"Mean Space Gap: {time_data['spacing'].mean():.1f}m\n"
            f"Mean Relative Speed: {time_data['relative_speed'].mean():.2f}m/s\n"
            f"Duration: {len(time_data)*0.1:.1f}s"
        )
        plt.figtext(0.02, 0.02, stats_text, fontsize=10, 
                   bbox=dict(facecolor='white', alpha=0.8))
        
        plt.tight_layout()
        plt.savefig(f'pair_{pair["pair_id"]}_analysis.png', dpi=300, bbox_inches='tight')
        plt.close()

def main():
    # Load data
    logger.info("Loading data...")
    data = load_data("D:/ASU Academics/Traffic Flow Theroy/MP-1/Reconstructed NGSIM I80-1 data/Data/DATA (NO MOTORCYCLES).txt")
    
    if len(data) == 0:
        logger.error("Failed to load data")
        return
        
    # Find car-following pairs
    logger.info("Finding car-following pairs...")
    pairs = find_car_following_pairs(data)
    
    if len(pairs) == 0:
        logger.error("No car-following pairs found")
        return
        
    # Select first 5 pairs
    selected_pairs = pairs[:5]
    
    # Process and visualize pairs
    logger.info("Processing and visualizing selected pairs...")
    pair_data = process_pair_data(data, selected_pairs)
    plot_pair_visualizations(pair_data)
    
    logger.info("Analysis complete!")

if __name__ == "__main__":
    main()

2025-02-06 13:44:25,758 - __main__ - INFO - Loading data...
2025-02-06 13:44:25,758 - __main__ - INFO - Loading data...
2025-02-06 13:44:25,758 - __main__ - INFO - Loading data...
2025-02-06 13:44:25,758 - __main__ - INFO - Loading data...
INFO:__main__:Loading data...
2025-02-06 13:44:28,439 - __main__ - INFO - Successfully loaded data with 1055801 rows
2025-02-06 13:44:28,439 - __main__ - INFO - Successfully loaded data with 1055801 rows
2025-02-06 13:44:28,439 - __main__ - INFO - Successfully loaded data with 1055801 rows
2025-02-06 13:44:28,439 - __main__ - INFO - Successfully loaded data with 1055801 rows
INFO:__main__:Successfully loaded data with 1055801 rows
2025-02-06 13:44:28,442 - __main__ - INFO - Finding car-following pairs...
2025-02-06 13:44:28,442 - __main__ - INFO - Finding car-following pairs...
2025-02-06 13:44:28,442 - __main__ - INFO - Finding car-following pairs...
2025-02-06 13:44:28,442 - __main__ - INFO - Finding car-following pairs...
INFO:__main__:Finding car

**Lane Change Analysis :** Count the lane-change occurrences in the dataset. Analyze where and when these lane changes occur, identifying any observable trends.

In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

class LaneChangeAnalyzer:
    def __init__(self, min_duration: float = 0.5):
        self.min_duration = min_duration
        self.min_frames = int(min_duration * 10)
        self.lane_changes = []
        
    def analyze_trajectories(self, df: pd.DataFrame) -> None:
        """Analyzes trajectory data to identify lane changes"""
        # First, rename columns to match our expected format
        df.columns = ['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'LocalY',
                     'Speed', 'Acceleration', 'Vehicle_Length',
                     'Vehicle_Class', 'Follower_ID', 'Leader_ID']
        
        # Group by vehicle
        for vehicle_id in df['Vehicle_ID'].unique():
            # Get vehicle's trajectory
            vehicle_data = df[df['Vehicle_ID'] == vehicle_id].sort_values('Frame_ID')
            
            # Initialize variables for tracking lane changes
            prev_lane = None
            start_frame = None
            from_lane = None
            
            # Analyze frame by frame
            for idx, row in vehicle_data.iterrows():
                curr_lane = row['Lane_ID']
                
                if prev_lane is not None and curr_lane != prev_lane:
                    if start_frame is None:
                        # Start of lane change
                        start_frame = row['Frame_ID']
                        from_lane = prev_lane
                    elif curr_lane != from_lane:
                        # End of lane change
                        end_frame = row['Frame_ID']
                        duration = (end_frame - start_frame) * 0.1
                        
                        if duration >= self.min_duration:
                            self.lane_changes.append({
                                'vehicle_id': vehicle_id,
                                'start_frame': start_frame,
                                'end_frame': end_frame,
                                'from_lane': from_lane,
                                'to_lane': curr_lane,
                                'position': row['LocalY'],
                                'speed': row['Speed'],
                                'duration': duration,
                                'direction': 'left' if curr_lane > from_lane else 'right'
                            })
                        
                        # Reset for next lane change
                        start_frame = None
                        from_lane = None
                
                prev_lane = curr_lane
    
    def generate_statistics(self) -> dict:
        """Generates summary statistics of lane changes"""
        if not self.lane_changes:
            return {
                'total_changes': 0,
                'direction_counts': {'left': 0, 'right': 0},
                'avg_duration': 0,
                'speed_stats': {'mean': 0, 'std': 0, 'min': 0, 'max': 0}
            }
            
        df_changes = pd.DataFrame(self.lane_changes)
        
        stats = {
            'total_changes': len(self.lane_changes),
            'direction_counts': df_changes['direction'].value_counts().to_dict(),
            'avg_duration': df_changes['duration'].mean(),
            'speed_stats': {
                'mean': df_changes['speed'].mean(),
                'std': df_changes['speed'].std(),
                'min': df_changes['speed'].min(),
                'max': df_changes['speed'].max()
            },
            'lane_transitions': pd.crosstab(
                df_changes['from_lane'],
                df_changes['to_lane']
            ).to_dict(),
            'position_dist': pd.cut(
                df_changes['position'],
                bins=np.arange(0, df_changes['position'].max() + 100, 100)
            ).value_counts().sort_index().to_dict()
        }
        
        return stats
    
    def plot_analysis(self, stats: dict) -> None:
        """Creates visualizations of lane change patterns"""
        fig = plt.figure(figsize=(15, 10))
        
        # 1. Direction Distribution (Top Left)
        plt.subplot(221)
        directions = list(stats['direction_counts'].keys())
        counts = list(stats['direction_counts'].values())
        plt.bar(directions, counts)
        plt.title('Lane Change Direction Distribution')
        plt.ylabel('Number of Lane Changes')
        
        # 2. Position Distribution (Top Right)
        plt.subplot(222)
        positions = list(stats['position_dist'].keys())
        position_counts = list(stats['position_dist'].values())
        plt.bar(range(len(positions)), position_counts)
        plt.title('Lane Change Position Distribution')
        plt.xlabel('Position (100m segments)')
        plt.ylabel('Number of Lane Changes')
        
        # 3. Lane Transitions Heatmap (Bottom Left)
        plt.subplot(223)
        if self.lane_changes:
            df_changes = pd.DataFrame(self.lane_changes)
            transition_matrix = pd.crosstab(
                df_changes['from_lane'],
                df_changes['to_lane']
            )
            sns.heatmap(transition_matrix, annot=True, fmt='d', cmap='YlOrRd')
            plt.title('Lane Change Transitions')
            plt.xlabel('To Lane')
            plt.ylabel('From Lane')
                    
        # 4. Speed Distribution (Bottom Right)
        plt.subplot(224)
        if self.lane_changes:
            speeds = [lc['speed'] for lc in self.lane_changes]
            plt.hist(speeds, bins=20)
            plt.title('Speed During Lane Changes')
            plt.xlabel('Speed (m/s)')
            plt.ylabel('Frequency')
                    
        plt.tight_layout()
        plt.savefig('lane_change_analysis.png', dpi=300, bbox_inches='tight')
        plt.close()

def main():
    # Read data file
    print("Reading data file...")
    df = pd.read_csv("D:/ASU Academics/Traffic Flow Theroy/MP-1/Reconstructed NGSIM I80-1 data/Data/DATA (NO MOTORCYCLES).txt", delimiter='\s+', header=None)
    
    # Initialize analyzer
    print("Analyzing lane changes...")
    analyzer = LaneChangeAnalyzer(min_duration=0.5)
    
    # Analyze trajectories
    analyzer.analyze_trajectories(df)
    
    # Generate statistics
    print("Generating statistics...")
    stats = analyzer.generate_statistics()
    
    # Print summary
    print("\nLane Change Analysis Summary:")
    print(f"Total lane changes: {stats['total_changes']}")
    print("\nDirection distribution:")
    for direction, count in stats['direction_counts'].items():
        print(f"{direction}: {count} ({count/stats['total_changes']*100:.1f}%)")
    print(f"\nAverage duration: {stats['avg_duration']:.2f} seconds")
    print("\nSpeed statistics during lane changes:")
    for stat, value in stats['speed_stats'].items():
        print(f"{stat}: {value:.2f} m/s")
    
    # Create visualizations
    print("\nCreating visualizations...")
    analyzer.plot_analysis(stats)
    print("Analysis complete. Visualizations saved as 'lane_change_analysis.png'")

if __name__ == "__main__":
    main()

Reading data file...
Analyzing lane changes...
Generating statistics...

Lane Change Analysis Summary:
Total lane changes: 164

Direction distribution:
right: 140 (85.4%)
left: 24 (14.6%)

Average duration: 15.44 seconds

Speed statistics during lane changes:
mean: 7.95 m/s
std: 2.52 m/s
min: 1.82 m/s
max: 14.77 m/s

Creating visualizations...
Analysis complete. Visualizations saved as 'lane_change_analysis.png'


**Time-Space Diagram :** Plot the lane-by-lane time-space diagram for all the NGSIM trajectory data. Based on the diagram, discuss traffic conditions and patterns of congestion.

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import seaborn as sns

class TimeSpaceDiagramAnalyzer:
    def __init__(self, data_file: str):
        """Initialize analyzer with data file path"""
        self.df = pd.read_csv(data_file, delimiter='\s+', header=None,
                             names=['Vehicle_ID', 'Frame_ID', 'Lane_ID', 'LocalY',
                                   'Speed', 'Acceleration', 'Vehicle_Length',
                                   'Vehicle_Class', 'Follower_ID', 'Leader_ID'])
        
        # Convert Frame_ID to time in seconds
        self.df['Time'] = self.df['Frame_ID'] * 0.1
        
    def plot_lane_diagram(self, lane_id: int, ax=None, cmap='viridis'):
        """Plot time-space diagram for a specific lane"""
        lane_data = self.df[self.df['Lane_ID'] == lane_id]
        
        lines = []
        colors = []
        
        for vehicle_id in lane_data['Vehicle_ID'].unique():
            vehicle_traj = lane_data[lane_data['Vehicle_ID'] == vehicle_id]
            if len(vehicle_traj) > 1:
                points = np.column_stack((vehicle_traj['Time'], 
                                       vehicle_traj['LocalY']))
                lines.append(points)
                colors.append(np.mean(vehicle_traj['Speed']))
        
        if not lines:
            return None
            
        lc = LineCollection(lines, cmap=plt.get_cmap(cmap))
        lc.set_array(np.array(colors))
        
        if ax is None:
            ax = plt.gca()
            
        line = ax.add_collection(lc)
        
        # Set axis limits
        times = lane_data['Time']
        positions = lane_data['LocalY']
        ax.set_xlim(times.min(), times.max())
        ax.set_ylim(positions.min(), positions.max())
        
        return line

    def create_full_diagram(self):
        """Create time-space diagrams for all lanes"""
        lanes = sorted(self.df['Lane_ID'].unique())
        n_lanes = len(lanes)
        
        fig, axes = plt.subplots(n_lanes, 1, figsize=(15, 4*n_lanes), sharex=True)
        if n_lanes == 1:
            axes = [axes]
            
        fig.suptitle('Time-Space Diagram by Lane', fontsize=16, y=0.92)
        
        for ax, lane_id in zip(axes, lanes):
            line = self.plot_lane_diagram(lane_id, ax=ax)
            if line is not None:
                plt.colorbar(line, ax=ax, label='Speed (m/s)')
            
            ax.set_ylabel('Position (m)')
            ax.set_title(f'Lane {lane_id}')
            ax.grid(True, linestyle='--', alpha=0.7)
            
        axes[-1].set_xlabel('Time (seconds)')
        
        plt.tight_layout()
        return fig, axes

    def plot_congestion_heatmap(self):
        """Create a heatmap showing average speeds by lane and time"""
        # Calculate time bins (5-minute intervals)
        time_bins = np.arange(0, self.df['Time'].max() + 300, 300)
        lanes = sorted(self.df['Lane_ID'].unique())
        
        # Create speed matrix
        speed_matrix = np.zeros((len(lanes), len(time_bins)-1))
        
        for i, lane_id in enumerate(lanes):
            lane_data = self.df[self.df['Lane_ID'] == lane_id]
            
            for j, (t_start, t_end) in enumerate(zip(time_bins[:-1], time_bins[1:])):
                mask = (lane_data['Time'] >= t_start) & (lane_data['Time'] < t_end)
                avg_speed = lane_data[mask]['Speed'].mean()
                speed_matrix[i, j] = avg_speed if not np.isnan(avg_speed) else 0
        
        # Create figure
        fig, ax = plt.subplots(figsize=(15, 5))
        
        # Create heatmap
        im = ax.imshow(speed_matrix, 
                      aspect='auto',
                      cmap='RdYlGn',
                      extent=[0, self.df['Time'].max()/60, len(lanes)-0.5, -0.5])
        
        # Add colorbar
        plt.colorbar(im, ax=ax, label='Average Speed (m/s)')
        
        # Configure axes
        ax.set_yticks(range(len(lanes)))
        ax.set_yticklabels([f'Lane {lane}' for lane in lanes])
        
        # Add time labels (in minutes)
        time_ticks = np.linspace(0, self.df['Time'].max()/60, 10)
        ax.set_xticks(time_ticks)
        ax.set_xticklabels([f'{t:.0f}' for t in time_ticks])
        
        plt.title('Traffic Speed Heatmap')
        plt.xlabel('Time (minutes)')
        plt.ylabel('Lane')
        
        return fig, ax

    def analyze_congestion(self):
        """Analyze congestion patterns"""
        congestion_threshold = 10  # m/s
        
        # Calculate average speeds in 5-minute windows
        self.df['time_window'] = pd.cut(self.df['Time'], 
                                      bins=np.arange(0, self.df['Time'].max() + 300, 300))
        
        speed_stats = self.df.groupby(['Lane_ID', 'time_window'])['Speed'].agg([
            'mean', 'std', 'count'
        ]).reset_index()
        
        # Identify congestion
        congestion = speed_stats[speed_stats['mean'] < congestion_threshold]
        
        # Calculate overall statistics
        lane_stats = self.df.groupby('Lane_ID')['Speed'].agg([
            'mean', 'std', 'min', 'max'
        ]).round(2)
        
        return {
            'congestion_periods': congestion,
            'lane_statistics': lane_stats,
            'congestion_threshold': congestion_threshold
        }

def main():
    # Initialize analyzer
    analyzer = TimeSpaceDiagramAnalyzer("D:/ASU Academics/Traffic Flow Theroy/MP-1/Reconstructed NGSIM I80-1 data/Data/DATA (NO MOTORCYCLES).txt")
    
    # Create time-space diagrams
    print("Creating time-space diagrams...")
    fig_ts, axes_ts = analyzer.create_full_diagram()
    fig_ts.savefig('time_space_diagram.png', dpi=300, bbox_inches='tight')
    plt.close(fig_ts)
    
    # Create congestion heatmap
    print("Creating congestion heatmap...")
    fig_heat, ax_heat = analyzer.plot_congestion_heatmap()
    fig_heat.savefig('congestion_heatmap.png', dpi=300, bbox_inches='tight')
    plt.close(fig_heat)
    
    # Analyze congestion
    print("\nAnalyzing congestion patterns...")
    stats = analyzer.analyze_congestion()
    
    # Print summary
    print("\nTraffic Analysis Summary:")
    print(f"\nCongestion threshold: {stats['congestion_threshold']} m/s")
    
    print("\nLane Statistics:")
    print(stats['lane_statistics'])
    
    print("\nCongestion Periods:")
    congestion = stats['congestion_periods']
    if not congestion.empty:
        for _, period in congestion.iterrows():
            print(f"Lane {period['Lane_ID']}: "
                  f"Time window {period['time_window']}, "
                  f"Average speed: {period['mean']:.1f} m/s")

if __name__ == "__main__":
    main()

Creating time-space diagrams...
Creating congestion heatmap...

Analyzing congestion patterns...

Traffic Analysis Summary:

Congestion threshold: 10 m/s

Lane Statistics:
          mean   std   min    max
Lane_ID                          
1        16.63  3.71  0.58  31.63
2         7.11  2.64  0.00  17.74
3         7.06  2.47  0.00  16.52
4         6.37  2.78  0.00  16.74
5         7.02  2.99  0.00  19.24
6         6.90  3.14  0.00  20.08
7         6.34  4.18  0.00  20.74
999      10.76  3.60  0.80  17.77

Congestion Periods:
Lane 2: Time window (0.0, 300.0], Average speed: 7.3 m/s
Lane 2: Time window (300.0, 600.0], Average speed: 7.5 m/s
Lane 2: Time window (600.0, 900.0], Average speed: 6.3 m/s
Lane 2: Time window (900.0, 1200.0], Average speed: 9.1 m/s
Lane 3: Time window (0.0, 300.0], Average speed: 7.6 m/s
Lane 3: Time window (300.0, 600.0], Average speed: 7.1 m/s
Lane 3: Time window (600.0, 900.0], Average speed: 6.4 m/s
Lane 3: Time window (900.0, 1200.0], Average speed: 8.2 m

  speed_stats = self.df.groupby(['Lane_ID', 'time_window'])['Speed'].agg([
