In [1]:
# Import libraries
import pandas as pd 
import numpy as np
from sklearn.neighbors import KNeighborsRegressor 
from sklearn.metrics import r2_score, mean_absolute_error,mean_squared_error
from sklearn.preprocessing import LabelEncoder

In [2]:
# Read the data
train_df = pd.read_csv("train_df.csv")
test_df = pd.read_csv("test_df.csv")


In [3]:
# Drop the 'Unnamed: 0' column from both the 'train_df' and 'test_df' DataFrames
train_df = train_df.drop(['Unnamed: 0'],axis = 1)
test_df = test_df.drop(['Unnamed: 0'],axis = 1)

In [4]:
# Use LabelEncoder to encode the 'L-F_Pair' column in both the 'train_df' and 'test_df' DataFrames
label_encoder = LabelEncoder()
train_df['L-F_Pair'] = label_encoder.fit_transform(train_df['L-F_Pair'])
test_df['L-F_Pair'] = label_encoder.fit_transform(test_df['L-F_Pair'])

The function create_prediction_columns() generates a time series prediction by shifting the actual data in the DataFrame 'df' up by a specified number of rows (n) for each unique 'L-F_Pair' group. It is assumed that the time difference between consecutive rows is 0.1 seconds. The function aims to create a time series-like structure where each row contains the actual values at a specific time step ('t') and the target values to predict at the next time step ('t+n').

In [5]:
 def create_prediction_columns(df, n):
        '''
        create the prediction pair by shifting the actual data up by the mentioned number(0.1*n seconds) to create the timeseries info
        '''
        df["nextframeAcc"] = df.groupby(
            ["L-F_Pair"], as_index=False)["v_Acc"].shift(-1*n)
        df["nextframesvel"] = df.groupby(
            ["L-F_Pair"], as_index=False)["v_Vel"].shift(-1*n)
        df["nextframeposition"] = df.groupby(
            ["L-F_Pair"], as_index=False)["Local_Y"].shift(-1*n)
        df["nextFrameSpacing"] = df.groupby(
            ["L-F_Pair"], as_index=False)["Space_Gap"].shift(-1*n)
        df["precnextframesvel"] = df.groupby(
            ["L-F_Pair"], as_index=False)["v_Vel_preceding"].shift(-1*n)
        df = df[df['nextframeposition'].notna()]
        df = df[df['nextframesvel'].notna()]
        df = df[df['nextframeAcc'].notna()]
        df = df[df['nextFrameSpacing'].notna()]
        df = df[df['precnextframesvel'].notna()]

        return df

In [6]:
# Assuming we want to create 5 prediction columns in the train_df DataFrame.
train_df_5 = create_prediction_columns(train_df,5)

In [7]:
# Assuming test_df is the DataFrame that contains the test data.
test_df_5 = create_prediction_columns(test_df,5)

### K-NEAREST NEIGHBOR

In [8]:
from sklearn.neighbors import KNeighborsRegressor 
from sklearn.metrics import r2_score,mean_squared_error

In [9]:
train_df.columns

Index(['Vehicle_ID', 'Frame_ID', 'Local_Y', 'v_Length', 'v_Width', 'v_Class',
       'v_Vel', 'v_Acc', 'Lane_ID', 'Preceding', 'Space_Headway',
       'Time_Headway', 'Location', 'Period', 'Vehicle_ID_preceding',
       'Local_Y_preceding', 'v_Length_preceding', 'v_Class_preceding',
       'v_Vel_preceding', 'v_Acc_preceding', 'Lane_ID_preceding',
       'Space_Headway_preceding', 'Time_Headway_preceding', 'L-F_Pair',
       'pair_Time_Duration', 'total_pair_duration', 'Vehicle_Class',
       'Space_Gap', 'Difference_of_speed', 'nextframeAcc', 'nextframesvel',
       'nextframeposition', 'nextFrameSpacing', 'precnextframesvel'],
      dtype='object')

In [10]:
# X_train contains the training data with the target variable and unnecessary columns removed.
X_train = train_df_5.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
# y_train contains the target variable 'nextframeAcc' for training the model.
y_train = train_df_5['nextframeAcc']
X_test = test_df_5.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df_5['nextframeAcc']

In [11]:
# Creating an instance of the KNeighborsRegressor with 15 neighbors as the hyperparameter.
knn_regressor = KNeighborsRegressor(n_neighbors=15)
# Training the KNeighborsRegressor model with the training data (X_train) and corresponding target variable (y_train).
knn_regressor.fit(X_train, y_train)


In [12]:
def predict_velocity_and_spacing(df, target_variable='nextframeAcc', delta_time = 0.1 ,model=knn_regressor):
    predicted_df = []

    for current_pair in df['L-F_Pair'].unique():
        input_df = df[df['L-F_Pair'] == current_pair].copy()

        vel = np.zeros(input_df.shape[0])
        pred_acc = np.zeros(input_df.shape[0])
        spacing = np.zeros(input_df.shape[0])
        local_y_subject = np.zeros(input_df.shape[0])
        local_y_preceding = np.zeros(input_df.shape[0])

        vel[0] = input_df.iloc[0]['v_Vel']
        spacing[0] = input_df.iloc[0]['Space_Gap']
        pred_acc[0] = input_df.iloc[0][target_variable]
        local_y_subject[0] = input_df.iloc[0]['Local_Y']
        local_y_preceding[0] = input_df.iloc[0]['Local_Y_preceding']

        for j in range(1, len(input_df)):
            vel[j] = vel[j - 1] + (pred_acc[j - 1] * delta_time)
            if vel[j] < 0:
                vel[j] = 0
            spacing[j] = spacing[j - 1] + (vel[j - 1] * delta_time) + (0.5 * pred_acc[j - 1] * pow(delta_time, 2))
            local_y_subject[j] = local_y_subject[j - 1] + (vel[j - 1] * delta_time) + (0.5 * pred_acc[j - 1] * pow(delta_time, 2))
            local_y_preceding[j] = input_df.iloc[j]['Local_Y_preceding']

            # Calculate the predicted acceleration for the next time step
            #pred_acc[j] = model.predict(np.array([spacing[j], input_df.iloc[j]['v_Class_preceding'], input_df.iloc[j]['v_Class'], vel[j], vel[j] - input_df.iloc[j]['v_Vel_preceding']]).reshape(1, -1))

        input_df['predicted_velocity'] = vel
        input_df['predicted_spacing'] = spacing
        predicted_df.append(input_df)

    result = pd.concat(predicted_df)
    return result


In [13]:
# Using the trained KNeighborsRegressor model (knn_regressor), we predict 'nextframeAcc' for the test data (X_test).
pred_knn_5=knn_regressor.predict(X_test)

In [14]:
# Assigning the predicted values (pred_knn_5) to the "nextframeAcc" column in the X_test DataFrame.
X_test["nextframeAcc"]=pred_knn_5

In [15]:
# Using the function predict_velocity_and_spacing with X_test as input and 'nextframeAcc' as the target variable.
predictions_knn_5 = predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_knn_5

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.696004,320.406890,58.680096,21.800717,-0.108180,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,20.624633,318.340859,58.402728,21.843626,-0.094440,21.081130,61.035638
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,20.559887,316.281633,57.817512,21.882249,-0.062800,21.081130,63.143751
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,20.503401,314.228468,57.540144,21.917777,-0.050483,21.081130,65.251864
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,20.455171,312.180539,57.619392,21.949883,-0.042851,21.081130,67.359977
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67745,1749,7302,433.735630,5.73024,8.0,2,14.102466,0.031511,1,1747,...,33.3,26.523696,1.577324,14.199165,440.807041,26.862024,14.872017,0.750097,12.122363,411.850590
67746,1749,7303,435.146254,5.73024,8.0,2,14.110007,0.119349,1,1747,...,33.3,26.508456,1.462328,14.218998,442.227949,27.008328,14.641252,0.841657,12.122363,413.062827
67747,1749,7304,436.558090,5.73024,8.0,2,14.126702,0.214642,1,1747,...,33.3,26.545032,1.309236,14.236483,443.650723,27.166824,14.391028,0.841657,12.122363,414.275063
67748,1749,7305,437.971950,5.73024,8.0,2,14.150506,0.261567,1,1747,...,33.3,26.618184,1.121355,14.249134,445.075004,27.322272,14.121275,0.841657,12.122363,415.487299


In [16]:
# Calculating the R-squared score to evaluate the performance of the KNeighborsRegressor model.
r2_score(y_test, pred_knn_5)

-0.2950302585315323

In [17]:
# Calculating the mean squared error between the true target values (y_test) and the predicted values (pred_knn_5).
mse = mean_squared_error(y_test,pred_knn_5)

In [18]:
# Calculating the root mean squared error (RMSE) and printing its value.
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.698048378672784


### Reaction time of 1s

In [19]:
# Creating train_df_10 DataFrame with 10 prediction columns using the create_prediction_columns function.
train_df_10 = create_prediction_columns(train_df,10)

In [20]:
# Creating test_df_10 DataFrame with 10 prediction columns using the create_prediction_columns function.
test_df_10 = create_prediction_columns(test_df,10)

In [21]:
# Extracting the features for training (X_train) by dropping the columns 'nextframeAcc', 'Vehicle_Class', 'Location', and 'Period'.
X_train = train_df_10.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
# Extracting the target variable for training (y_train), which is 'nextframeAcc'.
y_train = train_df_10['nextframeAcc']
X_test = test_df_10.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df_10['nextframeAcc']

In [22]:
# Creating and training the KNeighborsRegressor model with 5 neighbors using X_train as features and y_train as the target variable.
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, y_train)


In [23]:
# Using the trained KNeighborsRegressor model (knn_regressor), we predict 'nextframeAcc' for the test data (X_test).
pred_knn_1=knn_regressor.predict(X_test)

In [24]:
# Assigning the predicted values (pred_knn_1) to the "nextframeAcc" column in the X_test DataFrame.
X_test["nextframeAcc"]=pred_knn_1

In [25]:
# Using the function predict_velocity_and_spacing with X_test as input and 'nextframeAcc' as the target variable.
predictions_knn_1 = predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_knn_1

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.414137,310.137074,57.476136,21.976547,-0.137980,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,20.378891,308.097423,57.305448,21.996704,-0.112065,21.078150,61.035489
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,20.347913,306.061083,57.159144,22.011139,-0.098674,21.078150,63.143304
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,20.319577,304.027708,57.006744,22.019051,-0.084036,21.078150,65.251119
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,20.292164,301.997121,56.854344,22.018117,-0.069171,21.078150,67.358934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67740,1749,7297,426.690658,5.73024,8.0,2,14.067385,0.059409,1,1747,...,33.3,26.298144,1.710587,14.199165,440.807041,26.862024,14.872017,0.443495,12.109831,405.390256
67741,1749,7298,428.097873,5.73024,8.0,2,14.076924,0.131416,1,1747,...,33.3,26.334720,1.742030,14.218998,442.227949,27.008328,14.641252,0.443495,12.109831,406.601239
67742,1749,7299,429.506189,5.73024,8.0,2,14.089403,0.118241,1,1747,...,33.3,26.401776,1.739197,14.236483,443.650723,27.166824,14.391028,0.443495,12.109831,407.812222
67743,1749,7300,430.915552,5.73024,8.0,2,14.097843,0.050603,1,1747,...,33.3,26.477976,1.710217,14.249134,445.075004,27.322272,14.121275,0.443495,12.109831,409.023205


In [26]:
# Calculating the R-squared score to evaluate the performance of the KNeighborsRegressor model on the test data.
r2_score(y_test, pred_knn_1)

-0.4312861780864454

In [27]:
# Calculating and printing the root mean squared error (RMSE) for the KNeighborsRegressor model's predictions on the test data.
mse = mean_squared_error(y_test,pred_knn_1)
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.7227964724094532


### Reaction time of 1.5s

In [28]:
# Creating train_df_15 DataFrame with 15 prediction columns using the create_prediction_columns function.
train_df_15 = create_prediction_columns(train_df,15)

In [29]:
# Creating test_df_15 DataFrame with 15 prediction columns using the create_prediction_columns function.
test_df_15 = create_prediction_columns(test_df,15)

In [30]:
# Extracting features for training (X_train) and the target variable (y_train) from train_df_15 DataFrame.
X_train = train_df_15.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_train = train_df_15['nextframeAcc']
# Extracting features for testing (X_test) and the target variable (y_test) from test_df_15 DataFrame.
X_test = test_df_15.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df_15['nextframeAcc']

In [31]:
# Creating a KNeighborsRegressor model with 5 neighbors and fitting it to the training data.
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, y_train)


In [32]:
# Using the trained KNeighborsRegressor model (knn_regressor) to predict 'nextframeAcc' for the test data (X_test).
pred_knn_15=knn_regressor.predict(X_test)

In [33]:
# Updating the "nextframeAcc" column in the X_test DataFrame with the predicted values (pred_knn_15).
X_test["nextframeAcc"]=pred_knn_15

In [34]:
# Using the function predict_velocity_and_spacing with X_test as input and 'nextframeAcc' as the target variable.
# The function will use the provided KNeighborsRegressor model to predict the target variable for the test data
predictions_knn_15 = predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_knn_15

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.264070,299.969309,56.701944,22.009592,-0.151659,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,20.234104,297.944400,56.549544,21.998897,-0.137980,21.076782,61.035421
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,20.201120,295.922639,56.406288,21.988775,-0.112065,21.076782,63.143099
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,20.163002,293.904433,56.275224,21.974874,-0.098674,21.076782,65.250777
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,20.116615,291.890452,56.156352,21.950872,-0.084036,21.076782,67.358455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67735,1749,7292,419.641662,5.73024,8.0,2,14.153647,-0.239191,1,1747,...,33.3,26.093928,0.913004,14.199165,440.807041,26.862024,14.872017,0.443495,12.055050,397.617964
67736,1749,7293,421.055774,5.73024,8.0,2,14.128588,-0.262126,1,1747,...,33.3,26.103072,1.150887,14.218998,442.227949,27.008328,14.641252,0.443495,12.055050,398.823469
67737,1749,7294,422.467337,5.73024,8.0,2,14.102675,-0.256263,1,1747,...,33.3,26.148792,1.353574,14.236483,443.650723,27.166824,14.391028,0.443495,12.055050,400.028974
67738,1749,7295,423.876494,5.73024,8.0,2,14.080449,-0.188385,1,1747,...,33.3,26.215848,1.517180,14.249134,445.075004,27.322272,14.121275,0.443495,12.055050,401.234479


In [35]:
# Calculating the R-squared score to evaluate the performance of the KNeighborsRegressor model on the test data.
r2_score(y_test, pred_knn_15)

-0.4569726208308087

In [36]:
# Calculating and printing the root mean squared error (RMSE) for the KNeighborsRegressor model's predictions on the test data.
mse = mean_squared_error(y_test,pred_knn_15)
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.7193696666241902


### Reaction time of 2s

In [37]:
# Creating train_df_20 DataFrame with 20 prediction columns using the create_prediction_columns function.
train_df_20 = create_prediction_columns(train_df,20)

In [38]:
# Creating test_df_20 DataFrame with 20 prediction columns using the create_prediction_columns function.
test_df_20 = create_prediction_columns(test_df,20)

In [39]:
# Extracting features for training (X_train) and the target variable (y_train) from train_df_20 DataFrame.
X_train = train_df_20.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_train = train_df_20['nextframeAcc']
# Extracting features for testing (X_test) and the target variable (y_test) from test_df_20 DataFrame.
X_test = test_df_20.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df_20['nextframeAcc']

In [40]:
# Creating a KNeighborsRegressor model with 5 neighbors and fitting it to the training data.
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, y_train)


In [41]:
# Using the trained KNeighborsRegressor model (knn_regressor) to predict 'nextframeAcc' for the test data (X_test).
pred_knn_2=knn_regressor.predict(X_test)

In [42]:
# Updating the "nextframeAcc" column in the X_test DataFrame with the predicted values (pred_knn_2).
X_test["nextframeAcc"]=pred_knn_2

In [43]:
# Using the function predict_velocity_and_spacing with X_test as input and 'nextframeAcc' as the target variable.
# The function use the provided KNeighborsRegressor model to predict the target variable for the test data.
predictions_knn_2 = predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_knn_2

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.060325,289.881605,56.043576,21.915466,-0.173419,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,19.998221,287.878678,55.912512,21.872338,-0.151659,21.074606,61.035312
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,19.940316,285.881751,55.787544,21.824385,-0.137980,21.074606,63.142772
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,19.895098,283.889980,55.650384,21.768902,-0.124746,21.074606,65.250233
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,19.862745,281.902088,55.491888,21.700426,-0.112065,21.074606,67.357694
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67730,1749,7287,412.547431,5.73024,8.0,2,14.189132,0.163324,1,1747,...,33.3,26.124408,-0.135149,14.199165,440.807041,26.862024,14.872017,0.443495,12.070287,392.060507
67731,1749,7288,413.966918,5.73024,8.0,2,14.200602,0.066145,1,1747,...,33.3,26.100024,-0.060484,14.218998,442.227949,27.008328,14.641252,0.443495,12.070287,393.267536
67732,1749,7289,415.387038,5.73024,8.0,2,14.201801,-0.042164,1,1747,...,33.3,26.084784,0.106068,14.236483,443.650723,27.166824,14.391028,0.443495,12.070287,394.474564
67733,1749,7290,416.806767,5.73024,8.0,2,14.192784,-0.138212,1,1747,...,33.3,26.093928,0.358509,14.249134,445.075004,27.322272,14.121275,0.443495,12.070287,395.681593


In [44]:
# Calculating the R-squared score to evaluate the performance of the KNeighborsRegressor model on the test data.
r2_score(y_test, pred_knn_2)

-0.449987241804201

In [45]:
# Calculating and printing the root mean squared error (RMSE) for the KNeighborsRegressor model's predictions on the test data.
mse = mean_squared_error(y_test,pred_knn_2)
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.7098914076452463
