<a href="https://colab.research.google.com/github/Raswanth-Prasath/NGSIM-Driving-Behavior-Analysis/blob/main/NGSIM_Driving_Behavior_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from scipy import stats

In [4]:
# Define column names for the data files
column_names = ['Vehicle ID', 'Frame ID', 'Lane ID', 'LocalY', 'Mean Speed', 'Mean Acceleration', 'Vehicle length', 'Vehicle Class ID', 'Follower ID', 'Leader ID']  # replace with actual column names
moto_column_names = ['Vehicle ID', 'Frame ID', 'Lane ID', 'LocalY', 'Mean Speed', 'Mean Acceleration', 'Vehicle length', 'Vehicle Class ID']  # replace with actual column names

# Read DATA.txt (adjust delimiter if needed)
data = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA (NO MOTORCYCLES).txt", delimiter=r"\s+", header=None, names=column_names)  # \s+ for multiple spaces
motorcycles = pd.read_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\MOTORCYCLES.txt", delimiter=r"\s+", header=None, names=moto_column_names)

In [5]:
data.to_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\DATA.csv", index=False)
motorcycles.to_csv("D:\ASU Academics\Traffic Flow Theroy\MP-1\Reconstructed NGSIM I80-1 data\Data\MOTORCYCLES.csv", index=False)

In [24]:
# Add missing columns to motorcycles (Leader ID, Follower ID)
motorcycles["Follower ID"] = -1
motorcycles["Leader ID"] = -1

# Combine datasets
combined = pd.concat([data, motorcycles], ignore_index=True)

In [23]:
import pandas as pd

# Ensure data is sorted by Vehicle ID and Frame ID
combined = combined.sort_values(by=['Vehicle ID', 'Frame ID'])

# Create a helper function to validate segments
def validate_segment(follower_data, leader_id, start_frame, end_frame, lane_id, combined):
    """
    Check if a segment meets all criteria:
    - Duration ≥20 seconds (≥200 frames)
    - Leader exists for all frames in the segment
    - Leader and follower stay in the same lane
    """
    # Calculate duration (0.1 seconds per frame)
    num_frames = end_frame - start_frame + 1
    if num_frames < 200:
        return False
    
    # Get leader's data for these frames
    leader_data = combined[
        (combined['Vehicle ID'] == leader_id) &
        (combined['Frame ID'].between(start_frame, end_frame))
    ]
    
    # Check 1: Leader has data for all frames in the segment
    leader_frames = set(leader_data['Frame ID'].unique())
    required_frames = set(range(start_frame, end_frame + 1))
    
    if leader_frames != required_frames:
        return False
    
    # Check 2: Leader and follower stayed in the same lane
    if (leader_data['Lane ID'] != lane_id).any():
        return False
    
    return True

# Main analysis
valid_pairs = []

# Group by follower vehicles
for follower_id, follower_data in combined.groupby('Vehicle ID'):
    current_leader = None
    start_frame = None
    current_lane = None
    
    for _, row in follower_data.iterrows():
        # Skip rows with no leader
        if row['Leader ID'] == -1:
            if current_leader is not None:
                # End of a potential segment
                if validate_segment(follower_data, current_leader, start_frame, row['Frame ID'] - 1, current_lane, combined):
                    valid_pairs.append({
                        'follower_id': follower_id,
                        'leader_id': current_leader,
                        'start_frame': start_frame,
                        'end_frame': row['Frame ID'] - 1,
                        'lane_id': current_lane,
                        'duration_sec': (row['Frame ID'] - start_frame) * 0.1
                    })
            current_leader = None
            start_frame = None
            current_lane = None
            continue
        
        # Initialize or continue tracking
        if row['Leader ID'] != current_leader or row['Lane ID'] != current_lane:
            if current_leader is not None:
                # Check previous segment
                if validate_segment(follower_data, current_leader, start_frame, row['Frame ID'] - 1, current_lane, combined):
                    valid_pairs.append({
                        'follower_id': follower_id,
                        'leader_id': current_leader,
                        'start_frame': start_frame,
                        'end_frame': row['Frame ID'] - 1,
                        'lane_id': current_lane,
                        'duration_sec': (row['Frame ID'] - start_frame) * 0.1
                    })
            
            # Start new segment
            current_leader = row['Leader ID']
            start_frame = row['Frame ID']
            current_lane = row['Lane ID']
        else:
            # Continue existing segment
            pass

# Convert results to DataFrame
valid_pairs_df = pd.DataFrame(valid_pairs)

# Filter duplicates (same pair in overlapping time windows)
valid_pairs_df = valid_pairs_df.drop_duplicates(
    subset=['follower_id', 'leader_id', 'lane_id'], 
    keep='first'
)

# Save results
valid_pairs_df.to_csv('valid_car_following_pairs.csv', index=False)
print(f"Found {len(valid_pairs_df)} valid car-following pairs")

TypeError: 'numpy.float64' object cannot be interpreted as an integer