## Exract all the surrounding vehicle data

In [8]:
import pandas as pd

# --------------------------------------------------
# 1) Configuration
# --------------------------------------------------

# Path to your full dataset:
WHOLE_DATA_FILE = 'I80dataset2.csv'

# List of your lane‑changing event files:
LANE_CHANGING_FILES = [
    'lane_changing_2_to_3.csv',
    'lane_changing_3_to_2.csv',
    'lane_changing_3_to_4.csv',
    'lane_changing_4_to_3.csv',
    'lane_changing_4_to_5.csv',
    'lane_changing_5_to_4.csv',
    'lane_changing_5_to_6.csv',
    'lane_changing_6_to_5.csv',
    'lane_changing_6_to_7.csv',
    'lane_changing_7_to_6.csv'
]

# --------------------------------------------------
# 2) Helper: find the nearest follower/leader
# --------------------------------------------------
def find_nearest_vehicle(df_frame, y_change, position='follower'):
    """
    Within df_frame (same frame), find the nearest vehicle relative
    to y_change:
      - 'follower': y < y_change, pick max(y)
      - 'leader'  : y > y_change, pick min(y)
    Returns a pd.Series or None.
    """
    if position == 'follower':
        cands = df_frame[df_frame['5'] < y_change]
        if not cands.empty:
            return cands.loc[cands['5'].idxmax()]
    else:  # 'leader'
        cands = df_frame[df_frame['5'] > y_change]
        if not cands.empty:
            return cands.loc[cands['5'].idxmin()]
    return None

# --------------------------------------------------
# 3) Process one lane‑change file
# --------------------------------------------------
def process_lane_changing_file(file_name, whole_df):
    # Parse source/target lane numbers from filename
    base = file_name.replace('lane_changing_', '').replace('.csv', '')
    source_lane, target_lane = map(int, base.split('_to_'))

    # Load the lane‑change events
    lc_df  = pd.read_csv(file_name)
    src_df = whole_df[whole_df['lane_no'] == source_lane]
    tgt_df = whole_df[whole_df['lane_no'] == target_lane]

    out_rows = []

    for _, lc in lc_df.iterrows():
        frame = lc['1']
        y_pos = lc['5']

        # Same‑frame subsets
        tgt_frame = tgt_df[tgt_df['1'] == frame]
        src_frame = src_df[src_df['1'] == frame]

        # Find neighbors
        ft = find_nearest_vehicle(tgt_frame, y_pos, 'follower')
        Lt = find_nearest_vehicle(tgt_frame, y_pos, 'leader')
        Ls = find_nearest_vehicle(src_frame, y_pos, 'leader')
        lg = find_nearest_vehicle(src_frame, y_pos, 'follower')

        # Helper to build a one‑row DataFrame with proper column names
        def make_row(series, suffix):
            cols = whole_df.columns
            if series is not None:
                vals = series.reindex(cols).values
            else:
                vals = [pd.NA]*len(cols)
            return pd.DataFrame([vals], columns=[f"{c}{suffix}" for c in cols])

        # Lane‑change vehicle row (its own columns + suffix)
        lc_row = pd.DataFrame(
            [lc.values],
            columns=[f"{c}_lane_change" for c in lc.index]
        )

        # Concatenate: LC | follower_target | leader_target | leader_source | lag_source
        row = pd.concat([
            lc_row,
            make_row(ft, '_follower_target'),
            make_row(Lt, '_leader_target'),
            make_row(Ls, '_leader_source'),
            make_row(lg, '_lag_source'),
        ], axis=1)

        out_rows.append(row)

    # Write output if any rows were produced
    if out_rows:
        merged_df = pd.concat(out_rows, ignore_index=True)
        out_name = f"merged_{file_name}"
        merged_df.to_csv(out_name, index=False)
        print(f"✔️  {file_name}: {len(merged_df)} rows → '{out_name}'")
    else:
        print(f"⚠️  {file_name}: no rows merged")

# --------------------------------------------------
# 4) Main
# --------------------------------------------------
if __name__ == "__main__":
    whole_data = pd.read_csv(WHOLE_DATA_FILE)
    for fname in LANE_CHANGING_FILES:
        process_lane_changing_file(fname, whole_data)


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_2_to_3.csv: 5795 rows → 'merged_lane_changing_2_to_3.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_3_to_2.csv: 46694 rows → 'merged_lane_changing_3_to_2.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_3_to_4.csv: 11353 rows → 'merged_lane_changing_3_to_4.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_4_to_3.csv: 78336 rows → 'merged_lane_changing_4_to_3.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_4_to_5.csv: 7830 rows → 'merged_lane_changing_4_to_5.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_5_to_4.csv: 103364 rows → 'merged_lane_changing_5_to_4.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_5_to_6.csv: 27049 rows → 'merged_lane_changing_5_to_6.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_6_to_5.csv: 109924 rows → 'merged_lane_changing_6_to_5.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_6_to_7.csv: 11234 rows → 'merged_lane_changing_6_to_7.csv'


  merged_df = pd.concat(out_rows, ignore_index=True)


✔️  lane_changing_7_to_6.csv: 130560 rows → 'merged_lane_changing_7_to_6.csv'
