In [3]:
import pandas as pd
import numpy as np

#### Loading Data from CSV File

In [4]:
def load_data(file_path, chunk_size=1000, selected_columns=None):
    """
    Load data from a CSV file in chunks.

    Parameters:
    - file_path: str, path to the CSV file
    - chunk_size: int, number of rows per chunk
    - selected_columns: list, columns to select from the CSV

    Returns:
    - generator of DataFrames
    """
    # Read the CSV file in chunks

    if selected_columns is None:
        chunk_iter = pd.read_csv(file_path, chunksize=chunk_size)
    else:
        chunk_iter = pd.read_csv(file_path, usecols=selected_columns, chunksize=chunk_size)

    # Process each chunk
    for chunk in chunk_iter:
        yield chunk

In [5]:
chunk_data = False
chunk_size = 10000
num_chunks = 10
selected_columns = ['Vehicle_ID', 'Frame_ID', 'Total_Frames', 'Global_Time', 'Local_X', 'Local_Y', 'Global_X', 'Global_Y', 'v_length', 'v_Width', 'v_Class', 'v_Vel', 'v_Acc', 'Lane_ID', 'Section_ID', 'Direction', 'Movement', 'Preceding', 'Following', 'Space_Headway', 'Time_Headway', 'Location']

if chunk_data:
    # Load data in chunks
    trajectories_data = pd.DataFrame()
    cur_chunk = 0
    for chunk in load_data('../data/ngsim.csv', chunk_size=chunk_size, selected_columns=selected_columns):
        # Process the chunk (e.g., append to a list or DataFrame)
        trajectories_data = pd.concat([trajectories_data, chunk], ignore_index=True)

        cur_chunk += 1
        if cur_chunk >= num_chunks:
            break
else:
    trajectories_data = pd.read_csv('../data/ngsim.csv')  

In [6]:
trajectories_data.tail()

Unnamed: 0,Vehicle_ID,Frame_ID,Total_Frames,Global_Time,Local_X,Local_Y,Global_X,Global_Y,v_length,v_Width,...,D_Zone,Int_ID,Section_ID,Direction,Movement,Preceding,Following,Space_Headway,Time_Headway,Location
11850521,1355,5622,938,1113438127100,18.275,1464.336,6042648.149,2134520.896,12.3,6.8,...,,,,,,1351,1361,26.91,15.92,i-80
11850522,1474,5688,401,1113438133700,5.509,560.412,6042767.84,2133624.549,17.8,7.9,...,,,,,,1468,1480,74.49,1.73,i-80
11850523,398,2368,654,1113437801700,18.903,635.841,6042771.741,2133701.076,15.2,8.5,...,,,,,,385,406,90.95,3.77,i-80
11850524,599,875,577,1113436854400,77.094,621.358,6042831.282,2133693.854,15.3,6.4,...,,,,,,0,611,0.0,0.0,i-80
11850525,2006,6461,879,1113437413000,73.816,695.035,6042818.859,2133766.666,14.8,6.9,...,,,,,,2009,2014,25.47,2.26,i-80


In [7]:
trajectories_data.shape

(11850526, 25)

In [8]:
# print all the columns
print(trajectories_data.columns)

Index(['Vehicle_ID', 'Frame_ID', 'Total_Frames', 'Global_Time', 'Local_X',
       'Local_Y', 'Global_X', 'Global_Y', 'v_length', 'v_Width', 'v_Class',
       'v_Vel', 'v_Acc', 'Lane_ID', 'O_Zone', 'D_Zone', 'Int_ID', 'Section_ID',
       'Direction', 'Movement', 'Preceding', 'Following', 'Space_Headway',
       'Time_Headway', 'Location'],
      dtype='object')


#### Choosing a Location: (Iterate over later)

In [9]:
locations = trajectories_data['Location'].unique()
locations

array(['us-101', 'i-80', 'lankershim', 'peachtree'], dtype=object)

In [10]:
chosen_location = 'us-101'

In [11]:
# Extract the data for the chosen location
us_101_data = trajectories_data[trajectories_data['Location'] == chosen_location]
us_101_data.head()

Unnamed: 0,Vehicle_ID,Frame_ID,Total_Frames,Global_Time,Local_X,Local_Y,Global_X,Global_Y,v_length,v_Width,...,D_Zone,Int_ID,Section_ID,Direction,Movement,Preceding,Following,Space_Headway,Time_Headway,Location
0,515,2330,1123,1118848075000,30.034,188.062,6451203.729,1873252.549,13.0,6.9,...,,,,,,500,523,119.1,5.11,us-101
1,515,2330,1123,1118848075000,30.034,188.062,6451203.729,1873252.549,13.0,6.9,...,,,,,,500,523,119.1,5.11,us-101
3,2127,6459,567,1118847624800,19.632,1775.614,6452425.122,1872172.475,13.5,6.9,...,,,,,,2124,2132,48.92,1.3,us-101
4,1033,4827,592,1118848324700,6.202,1701.144,6452347.673,1872258.452,13.5,4.4,...,,,,,,1029,1040,38.81,0.92,us-101
5,1033,4827,592,1118848324700,6.202,1701.144,6452347.673,1872258.452,13.5,4.4,...,,,,,,1029,1040,38.81,0.92,us-101


#### For Each Location:

##### Drop duplicates:

In [12]:
us_101_data_unique = us_101_data.drop_duplicates(subset=['Vehicle_ID', 'Frame_ID'])
us_101_data_unique.shape

(3614404, 25)

##### Choose lanes


In [13]:
chosen_lanes = [3, 4, 5]

In [14]:
# Extract the data for the chosen lanes
us_101_data_lanes = us_101_data_unique[us_101_data_unique['Lane_ID'].isin(chosen_lanes)]
# us_101_data_lanes.head()
us_101_data_lanes.shape

(2055628, 25)

##### We have the dataset with -- One Location, 3 particular lanes, and unique combination of (vehicle_ID and frame_ID)

##### Sort via frame id:

In [15]:
us_101_data_lanes.head()

Unnamed: 0,Vehicle_ID,Frame_ID,Total_Frames,Global_Time,Local_X,Local_Y,Global_X,Global_Y,v_length,v_Width,...,D_Zone,Int_ID,Section_ID,Direction,Movement,Preceding,Following,Space_Headway,Time_Headway,Location
0,515,2330,1123,1118848075000,30.034,188.062,6451203.729,1873252.549,13.0,6.9,...,,,,,,500,523,119.1,5.11,us-101
6,1890,9157,628,1118849672700,53.514,817.521,6451655.238,1872800.663,24.0,8.5,...,,,,,,1882,1897,102.65,2.27,us-101
7,744,3392,996,1118848181200,28.878,490.086,6451422.353,1873041.018,15.0,5.9,...,,,,,,740,752,37.8,1.54,us-101
15,496,2766,771,1118848118600,52.141,1656.454,6452284.002,1872253.037,15.0,4.9,...,,,,,,510,502,68.21,2.09,us-101
20,1779,5317,490,1118847510600,31.09,733.063,6451632.732,1872850.265,28.0,7.9,...,,,,,,1769,1784,189.54,3.8,us-101


In [16]:
# add index as a column
us_101_data_lanes = us_101_data_lanes.reset_index(drop=False)
us_101_data_lanes.head()

Unnamed: 0,index,Vehicle_ID,Frame_ID,Total_Frames,Global_Time,Local_X,Local_Y,Global_X,Global_Y,v_length,...,D_Zone,Int_ID,Section_ID,Direction,Movement,Preceding,Following,Space_Headway,Time_Headway,Location
0,0,515,2330,1123,1118848075000,30.034,188.062,6451203.729,1873252.549,13.0,...,,,,,,500,523,119.1,5.11,us-101
1,6,1890,9157,628,1118849672700,53.514,817.521,6451655.238,1872800.663,24.0,...,,,,,,1882,1897,102.65,2.27,us-101
2,7,744,3392,996,1118848181200,28.878,490.086,6451422.353,1873041.018,15.0,...,,,,,,740,752,37.8,1.54,us-101
3,15,496,2766,771,1118848118600,52.141,1656.454,6452284.002,1872253.037,15.0,...,,,,,,510,502,68.21,2.09,us-101
4,20,1779,5317,490,1118847510600,31.09,733.063,6451632.732,1872850.265,28.0,...,,,,,,1769,1784,189.54,3.8,us-101


<p>We need to check for vehicles which are in 70 continous frames</p>

<p>For this I was thinking, why not take 70 continous frames and then check which vehicles will be in those particular frames<p>

<p>We get to directly use those rows are they will be valid, we can the ones where we have at least one car in each of the chosen lanes<p>

<p> Then we shift to the vehicles and make a new dataset from each row, just pick from the original dataset the values with this car and this frame ID.... as they are already unique, this should give use the dataset we require<p>

<p>Let's try<p>

In [17]:
# choose vehicle ID and Frame ID from the data

us_101_data_vf = us_101_data_lanes[['Vehicle_ID', 'Frame_ID', 'index']]
us_101_data_vf.head()

Unnamed: 0,Vehicle_ID,Frame_ID,index
0,515,2330,0
1,1890,9157,6
2,744,3392,7
3,496,2766,15
4,1779,5317,20


###### Attempt 1:

In [56]:
# group by Frame ID
# us_101_data_grouped_by_frame = us_101_data_vf.groupby('Frame_ID').agg(list)
# us_101_data_grouped_by_frame['num_vehicles'] = us_101_data_grouped_by_frame['Vehicle_ID'].apply(lambda x: len(x))

us_101_data_grouped_by_frame = us_101_data_vf.groupby('Frame_ID').agg({
    'Vehicle_ID': lambda x: set(sorted(list(x))),
    'index': lambda x: set(x)
}).reset_index()
us_101_data_grouped_by_frame['num_vehicles'] = us_101_data_grouped_by_frame['Vehicle_ID'].apply(len)
us_101_data_grouped_by_frame.head()


Unnamed: 0,Frame_ID,Vehicle_ID,index,num_vehicles
0,8,{5},{8242843},1
1,9,{5},{5805738},1
2,11,{5},{459432},1
3,13,{5},{1407728},1
4,16,{5},{5740198},1


In [57]:
num_frames = 30

In [58]:
frame_ids = us_101_data_grouped_by_frame['Frame_ID'].sort_values().reset_index(drop=True)
frame_ids 

0           8
1           9
2          11
3          13
4          16
        ...  
9978     9996
9979     9997
9980     9998
9981     9999
9982    10000
Name: Frame_ID, Length: 9983, dtype: int64

In [59]:
group = frame_ids - frame_ids.index
groups = frame_ids.groupby(group)


In [60]:
continuous_sequences = [g.tolist() for _, g in groups if len(g) >= num_frames]
len(continuous_sequences)

1

In [61]:
valid_frame_ids = [fid for seq in continuous_sequences for fid in seq]
us_101_data_grouped_by_frame_filtered = us_101_data_grouped_by_frame[us_101_data_grouped_by_frame['Frame_ID'].isin(valid_frame_ids)]
us_101_data_grouped_by_frame_filtered.head()

Unnamed: 0,Frame_ID,Vehicle_ID,index,num_vehicles
11,29,{8},{4975546},1
12,30,{8},{3392441},1
13,31,"{8, 5}","{5186391, 1972151}",2
14,32,"{8, 14}","{8561555, 8366927}",2
15,33,"{8, 14}","{652210, 667827}",2


In [62]:
us_101_data_grouped_by_frame_filtered.shape

(9972, 4)

In [66]:
potential_egos = []

for start in range(len(us_101_data_grouped_by_frame_filtered) - num_frames + 1):
    window = us_101_data_grouped_by_frame_filtered.iloc[start:start + num_frames]

    sets = window['Vehicle_ID'].apply(set).tolist()
    indexes = window['index'].tolist()
    if sets:
        common_ids = set.intersection(*sets)
        index_union = set().union(*indexes)
        potential_egos.append({
            'Frame_ID': window['Frame_ID'].tolist(),
            'intersections': common_ids,
            'index': list(index_union)
        })

In [67]:
len(potential_egos)

9943

#### Looping through to find ego:

In [71]:
ego_lanes = [4]

In [178]:
for potential_ego in potential_egos[100:101]:
    # us_101_data_lanes_filtered_ego = us_101_data_lanes[us_101_data_lanes['index'].isin(potential_ego['index'])]
    # us_101_data_lanes_filtered_ego = us_101_data_lanes_filtered_ego[us_101_data_lanes_filtered_ego['Vehicle_ID'].isin(list(potential_ego['intersections']))]
    # us_101_data_lanes_filtered_ego = us_101_data_lanes_filtered_ego[us_101_data_lanes_filtered_ego['Frame_ID'].isin(potential_ego['Frame_ID'])]
    
    us_101_data_lanes_filtered = us_101_data_lanes[
        us_101_data_lanes['index'].isin(potential_ego['index'])
        & us_101_data_lanes['Vehicle_ID'].isin(list(potential_ego['intersections']))
        & us_101_data_lanes['Frame_ID'].isin(potential_ego['Frame_ID'])
    ].sort_values(by=['Frame_ID', 'Vehicle_ID'])


    # FINDING THE EGO VEHICLE

    us_101_data_lanes_filtered_ego = us_101_data_lanes_filtered[us_101_data_lanes_filtered['Lane_ID'].isin(ego_lanes)]

    # us_101_data_lanes_filtered_ego = us_101_data_lanes[
    #     us_101_data_lanes['index'].isin(potential_ego['index'])
    #     & us_101_data_lanes['Vehicle_ID'].isin(list(potential_ego['intersections']))
    #     & us_101_data_lanes['Frame_ID'].isin(potential_ego['Frame_ID'])
    #     & us_101_data_lanes['Lane_ID'].isin(ego_lanes)
    # ]

    us_101_data_lanes_filtered_ego = us_101_data_lanes_filtered_ego.sort_values(by=['Frame_ID', 'Vehicle_ID'])

    min_frame_id = us_101_data_lanes_filtered_ego['Frame_ID'].min()

    start_df = us_101_data_lanes_filtered_ego[us_101_data_lanes_filtered_ego['Frame_ID'] == min_frame_id]
    # find vehicle IDs in the start_df where preceding or following is 0
    start_df_preceeding = start_df[start_df['Preceding'] == 0]['Vehicle_ID']
    start_df_following = start_df[start_df['Following'] == 0]['Vehicle_ID']


    # find the vehicle IDs in the start_df that are not in the start_df_preceeding or start_df_following
    us_101_data_lanes_filtered_ego = us_101_data_lanes_filtered_ego[~us_101_data_lanes_filtered_ego['Vehicle_ID'].isin(start_df_preceeding) & ~us_101_data_lanes_filtered_ego['Vehicle_ID'].isin(start_df_following)]

    # print(us_101_data_lanes_filtered_ego[['index', 'Vehicle_ID', 'Frame_ID', 'Preceding', 'Following']][0:20])
    # print(us_101_data_lanes_filtered_ego[0:12])

    potential_ego_vehicle_ids = us_101_data_lanes_filtered_ego['Vehicle_ID'].unique()
    print(f"Potential ego vehicle IDs: {potential_ego_vehicle_ids}")

    # CREATING A DATAFRAME FOR EACH POTENTIAL EGO VEHICLE ID
    ego_df = pd.DataFrame()

    for potential_ego_vehicle_id in potential_ego_vehicle_ids:
        # Filter the data for the potential ego vehicle ID
        us_101_data_lanes_cur = us_101_data_lanes_filtered.copy()
        us_101_data_lanes_cur['d'] = np.nan

        temp_stop_now = False

        for frame_id in us_101_data_lanes_cur['Frame_ID'].unique():
            # Filter the data for the current frame ID
            frame_data = us_101_data_lanes_cur[us_101_data_lanes_cur['Frame_ID'] == frame_id]

            # print(f"{frame_data[['Vehicle_ID', 'd', 'Frame_ID', 'Preceding', 'Following']]}")

            # Should be a single row
            ego_vehicle_data = frame_data[frame_data['Vehicle_ID'] == potential_ego_vehicle_id]

            if not ego_vehicle_data.empty:
                ego_x = ego_vehicle_data.iloc[0]['Local_X']
                ego_y = ego_vehicle_data.iloc[0]['Local_Y']

                ego_preceding = ego_vehicle_data.iloc[0]['Preceding']
                ego_following = ego_vehicle_data.iloc[0]['Following']

                preceding_vehicle_data = frame_data[frame_data['Vehicle_ID'] == ego_preceding]
                following_vehicle_data = frame_data[frame_data['Vehicle_ID'] == ego_following]

                if preceding_vehicle_data.empty or following_vehicle_data.empty:
                    break

                # Calculate the distance to all other vehicles
                mask = frame_data['Frame_ID'] == frame_id
                frame_data.loc[mask, 'd'] = np.sqrt((frame_data['Local_X'] - ego_x) ** 2 + (frame_data['Local_Y'] - ego_y) ** 2)

                # print(f"frame_id: {frame_id}")
                # print(f"potential_ego_vehicle_id: {potential_ego_vehicle_id}")
                # print(frame_data[['Vehicle_ID', 'd', 'Frame_ID', 'Lane_ID', 'Preceding', 'Following']])
                
                # find closesest vehice on left and right
                min_row_lane3 = frame_data[frame_data['Lane_ID'] == 3].loc[frame_data[frame_data['Lane_ID'] == 3]['d'].idxmin()]['Vehicle_ID']
                min_row_lane5 = frame_data[frame_data['Lane_ID'] == 5].loc[frame_data[frame_data['Lane_ID'] == 5]['d'].idxmin()]['Vehicle_ID']

                # print(f"min_row_lane3: {min_row_lane3}")
                # print(f"min_row_lane5: {min_row_lane5}")

                five_frame_data = frame_data[frame_data['Vehicle_ID'].isin([potential_ego_vehicle_id, ego_preceding, ego_following, min_row_lane3, min_row_lane5])]

                ego_df = pd.concat([ego_df, five_frame_data], ignore_index=True)

                # print(f"{five_frame_data[['Vehicle_ID', 'd', 'Frame_ID', 'Lane_ID', 'Preceding', 'Following']]}")

                temp_stop_now = True

            else:
                print(f"ego_vehicle_data is empty for potential_ego_vehicle_id: {potential_ego_vehicle_id} and frame_id: {frame_id}")

        if temp_stop_now:
            print(ego_df[['Vehicle_ID', 'd', 'Frame_ID', 'Lane_ID', 'Preceding', 'Following']])

            ego_df.to_csv('ego_df.csv', index=False)
            break
            



    break

Potential ego vehicle IDs: [ 8 21 25 34 40]
     Vehicle_ID           d  Frame_ID  Lane_ID  Preceding  Following
0             8   72.093286       129        4          5         21
1            18   35.488004       129        5         14         31
2            20   86.363773       129        3          9         47
3            21    0.000000       129        4          8         25
4            25   99.738175       129        4         21         34
..          ...         ...       ...      ...        ...        ...
145           8   65.849486       158        4          5         21
146          20   74.340334       158        3          9         47
147          21    0.000000       158        4          8         25
148          25   93.565290       158        4         21         34
149          31  102.540056       158        5         18         35

[150 rows x 6 columns]


##### Next we identify which of these is the ego car, and calculate distance to other cars

In [None]:
# smallest Frame_ID
