In [1]:
# Import libraries
import numpy as np 
import pandas as pd 
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

import Cleanup
import warnings
# Ignore specific warning messages from a package
warnings.filterwarnings("ignore")


In [2]:
# Read the data
train_df = pd.read_csv("train_df.csv")
test_df = pd.read_csv("test_df.csv")


In [3]:
# Drops the column named 'Unnamed: 0' from two DataFrames, 'train_df' and 'test_df'
train_df = train_df.drop(['Unnamed: 0'],axis = 1)
test_df = test_df.drop(['Unnamed: 0'],axis = 1)

In [4]:
# Create a LabelEncoder object
label_encoder = LabelEncoder()
# Encode the 'L-F_Pair' column in 'train_df' and 'test_df' using the LabelEncoder and update the DataFrame
train_df['L-F_Pair'] = label_encoder.fit_transform(train_df['L-F_Pair'])
test_df['L-F_Pair'] = label_encoder.fit_transform(test_df['L-F_Pair'])

In [5]:
 def create_prediction_columns(df, n):
        '''
        create the prediction pair by shifting the actual data up by the mentioned number(0.1*n seconds) to create the timeseries info
        '''
        df["nextframeAcc"] = df.groupby(
            ["L-F_Pair"], as_index=False)["v_Acc"].shift(-1*n)
        df["nextframesvel"] = df.groupby(
            ["L-F_Pair"], as_index=False)["v_Vel"].shift(-1*n)
        df["nextframeposition"] = df.groupby(
            ["L-F_Pair"], as_index=False)["Local_Y"].shift(-1*n)
        df["nextFrameSpacing"] = df.groupby(
            ["L-F_Pair"], as_index=False)["Space_Gap"].shift(-1*n)
        df["precnextframesvel"] = df.groupby(
            ["L-F_Pair"], as_index=False)["v_Vel_preceding"].shift(-1*n)
        df = df[df['nextframeposition'].notna()]
        df = df[df['nextframesvel'].notna()]
        df = df[df['nextframeAcc'].notna()]
        df = df[df['nextFrameSpacing'].notna()]
        df = df[df['precnextframesvel'].notna()]

        return df

In [6]:
# Use the function 'create_prediction_columns' to create prediction pairs for the DataFrame 'train_df' with a shifting window of 5 time steps
train_df = create_prediction_columns(train_df,5)

In [7]:
#  Use the function 'create_prediction_columns' to create prediction pairs for the DataFrame 'test_df' with a shifting window of 5 time steps
test_df = create_prediction_columns(test_df,5)

In [8]:
train_df

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,pair_Time_Duration,total_pair_duration,Vehicle_Class,Space_Gap,Difference_of_speed,nextframeAcc,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel
0,1760,5683,425.375767,6.49224,8.5,3,20.498565,-0.283574,1,1754,...,0.0,11.6,Heavy Vehicle,36.277296,1.090135,-0.679115,20.775275,415.056862,35.911536,22.732894
1,1760,5682,423.323483,6.49224,8.5,3,20.547120,-0.474961,1,1754,...,0.1,11.6,Heavy Vehicle,36.030408,1.327031,-0.626724,20.899921,412.973103,35.856672,22.853260
2,1760,5681,421.265679,6.49224,8.5,3,20.608958,-0.491087,1,1754,...,0.2,11.6,Heavy Vehicle,35.920680,1.527489,-0.649260,20.943266,410.880943,35.728656,22.929306
3,1760,5680,419.201729,6.49224,8.5,3,20.670048,-0.463286,1,1754,...,0.3,11.6,Heavy Vehicle,35.832288,1.700222,-0.481942,20.991363,408.784212,35.661600,22.960584
4,1760,5679,417.131926,6.49224,8.5,3,20.725999,-0.777079,1,1754,...,0.4,11.6,Heavy Vehicle,35.814000,1.844612,-0.714591,21.120779,404.573626,35.585400,22.892418
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273941,1755,7317,432.553507,5.42544,8.4,2,13.974013,0.180951,1,1749,...,31.9,32.8,Car,16.831056,1.133864,0.372719,14.113733,439.571071,16.773144,15.278563
273942,1755,7318,433.951878,5.42544,8.4,2,13.993415,0.207085,1,1749,...,32.0,32.8,Car,16.821912,1.212407,0.369981,14.150868,440.984301,16.684752,15.222014
273943,1755,7319,435.352370,5.42544,8.4,2,14.016417,0.252962,1,1749,...,32.1,32.8,Car,16.843248,1.254982,0.370157,14.187875,442.401238,16.605504,15.138900
273944,1755,7320,436.755413,5.42544,8.4,2,14.044461,0.307920,1,1749,...,32.2,32.8,Car,16.858488,1.260305,0.568790,14.234822,443.822373,16.565880,15.030022


### SUPPORT VECTOR MACHINE

In [9]:
# Prepare the feature matrix 'X_train' by dropping the columns 'nextframeAcc', 'Vehicle_Class', 'Location', and 'Period' from the DataFrame 'train_df'
X_train = train_df.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
# Prepare the target array 'y_train' containing the values from the 'nextframeAcc' column of the DataFrame 'train_df'
y_train = train_df['nextframeAcc']
X_test = test_df.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df['nextframeAcc']

The code performs feature scaling using the StandardScaler to standardize the features in both the training and test datasets. Standardization is a preprocessing step that scales the features to have a mean of 0 and a standard deviation of 1, which is a common requirement for many machine learning algorithms.

In [10]:
# Create a StandardScaler object named 'sc'
sc = StandardScaler()
#  Fit the StandardScaler on the training feature matrix 'X_train' to compute the mean and standard deviation of the features
sc.fit(X_train)
# # Use the fitted StandardScaler to transform the training feature matrix 'X_train' and 'X_test'to standardized values, and store the result in 'X_train_std' and 'X_test'
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [11]:
# Instantiate the Support Vector Regressor 
svr= SVR(C=1.0, epsilon=0.2)
 
# Fit the model
svr.fit(X_train_std, y_train)

The code defines a function named 'predict_velocity_and_spacing', which takes a DataFrame 'df' as input and returns a new DataFrame containing the predicted velocity and spacing values for each time step in the original DataFrame. The function uses a shifting window approach to predict the velocity and spacing for each time step based on previous time step values.

In [12]:
def predict_velocity_and_spacing(df, target_variable='nextframeAcc', delta_time = 0.1,model=svr):
    predicted_df = []

    for current_pair in df['L-F_Pair'].unique():
        input_df = df[df['L-F_Pair'] == current_pair].copy()
# Initialize arrays to store velocity, predicted acceleration, spacing, and local_y for each time step 
        vel = np.zeros(input_df.shape[0])
        pred_acc = np.zeros(input_df.shape[0])
        spacing = np.zeros(input_df.shape[0])
        local_y_subject = np.zeros(input_df.shape[0])
        local_y_preceding = np.zeros(input_df.shape[0])
  # Initialize the first time step values from the input DataFrame
        vel[0] = input_df.iloc[0]['v_Vel']
        spacing[0] = input_df.iloc[0]['Space_Gap']
        pred_acc[0] = input_df.iloc[0][target_variable]
        local_y_subject[0] = input_df.iloc[0]['Local_Y']
        local_y_preceding[0] = input_df.iloc[0]['Local_Y_preceding']

        for j in range(1, len(input_df)):
            # Calculate the velocity for the current time step using the previous velocity and predicted acceleration
            vel[j] = vel[j - 1] + (pred_acc[j - 1] * delta_time)
            if vel[j] < 0:
                vel[j] = 0
            spacing[j] = spacing[j - 1] + (vel[j - 1] * delta_time) + (0.5 * pred_acc[j - 1] * pow(delta_time, 2))
            local_y_subject[j] = local_y_subject[j - 1] + (vel[j - 1] * delta_time) + (0.5 * pred_acc[j - 1] * pow(delta_time, 2))
            local_y_preceding[j] = input_df.iloc[j]['Local_Y_preceding']

            # Calculate the predicted acceleration for the next time step
            #pred_acc[j] = model.predict(np.array([spacing[j], input_df.iloc[j]['v_Class_preceding'], input_df.iloc[j]['v_Class'], vel[j], vel[j] - input_df.iloc[j]['v_Vel_preceding']]).reshape(1, -1))

        input_df['predicted_velocity'] = vel
        input_df['predicted_spacing'] = spacing
        predicted_df.append(input_df)

    result = pd.concat(predicted_df)
    return result


In [13]:
# Use the trained SVR model 'svr' to predict the target variable for the test feature matrix 'X_test'
pred_5 = svr.predict(X_test)

In [14]:
# Assign the predicted values ('pred_5') to the 'nextframeAcc' column of the test feature matrix 'X_test'
X_test["nextframeAcc"]=pred_5

In [15]:
# Assuming you have trained the Random Forest model (rf) and X_test is the test DataFrame
predictions_svm_5= predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_svm_5

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.696004,320.406890,58.680096,21.800717,0.194716,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,20.624633,318.340859,58.402728,21.843626,0.194716,21.111420,61.037152
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,20.559887,316.281633,57.817512,21.882249,0.194716,21.111420,63.148294
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,20.503401,314.228468,57.540144,21.917777,0.194716,21.111420,65.259436
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,20.455171,312.180539,57.619392,21.949883,0.194716,21.111420,67.370578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67745,1749,7302,433.735630,5.73024,8.0,2,14.102466,0.031511,1,1747,...,33.3,26.523696,1.577324,14.199165,440.807041,26.862024,14.872017,0.194716,12.130866,412.125653
67746,1749,7303,435.146254,5.73024,8.0,2,14.110007,0.119349,1,1747,...,33.3,26.508456,1.462328,14.218998,442.227949,27.008328,14.641252,0.194716,12.130866,413.338739
67747,1749,7304,436.558090,5.73024,8.0,2,14.126702,0.214642,1,1747,...,33.3,26.545032,1.309236,14.236483,443.650723,27.166824,14.391028,0.194716,12.130866,414.551826
67748,1749,7305,437.971950,5.73024,8.0,2,14.150506,0.261567,1,1747,...,33.3,26.618184,1.121355,14.249134,445.075004,27.322272,14.121275,0.194716,12.130866,415.764912


### Calculating R2 and RMSE 

In [17]:
from sklearn.metrics import r2_score, mean_absolute_error,mean_squared_error

r2_score(y_test, pred_5) #evaluating the model's performance against the ground truth on the test set

-0.08083108158840346

In [18]:
mse = mean_squared_error(y_test,pred_5)
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.637711835686083


### Reaction time of 1s

In [19]:
# Create a new DataFrame 'train_df_1' with additional columns representing time-series information based on a shifting window of 10 time steps
train_df_1 = create_prediction_columns(train_df,10)

In [20]:
# Create a new DataFrame 'test_df_1' with additional columns representing time-series information based on a shifting window of 10 time steps
test_df_1 = create_prediction_columns(test_df,10)

In [21]:
# Prepare the training and test datasets by separating features and target variables
# For the training dataset:
X_train = train_df_1.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_train = train_df_1['nextframeAcc']
# For the test dataset:
X_test = test_df_1.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df_1['nextframeAcc']

In [22]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
sc = StandardScaler()
# Fit the StandardScaler on the training features to learn the mean and standard deviation of each feature
sc.fit(X_train)
# Transform the training features using the learned mean and standard deviation
X_train_std = sc.transform(X_train)
# Transform the test features using the same mean and standard deviation as learned from the training data
X_test_std = sc.transform(X_test)

In [23]:
# Instantiate the Support Vector Regressor 
svr= SVR(C=1.0, epsilon=0.2)
 
# Fit the model
svr.fit(X_train_std, y_train)

In [24]:
# Use the trained SVR model to predict the target variable ('nextframeAcc') for the test features ('X_test')
pred_1 = svr.predict(X_test)
# Update the 'nextframeAcc' column in the test feature matrix ('X_test') with the predicted values
X_test["nextframeAcc"]=pred_1

In [25]:
# Assuming you have trained the Random Forest model (rf) and X_test is the test DataFrame
predictions_svm_1 = predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_svm_1

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.414137,310.137074,57.476136,21.976547,0.118068,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,20.378891,308.097423,57.305448,21.996704,0.118068,21.103755,61.036769
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,20.347913,306.061083,57.159144,22.011139,0.118068,21.103755,63.147145
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,20.319577,304.027708,57.006744,22.019051,0.118068,21.103755,65.257520
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,20.292164,301.997121,56.854344,22.018117,0.118068,21.103755,67.367896
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67735,1749,7292,419.641662,5.73024,8.0,2,14.153647,-0.239191,1,1747,...,33.3,26.093928,0.913004,14.102466,433.735630,26.523696,15.679791,0.118068,12.123201,399.754494
67736,1749,7293,421.055774,5.73024,8.0,2,14.128588,-0.262126,1,1747,...,33.3,26.103072,1.150887,14.110007,435.146254,26.508456,15.572335,0.118068,12.123201,400.966814
67737,1749,7294,422.467337,5.73024,8.0,2,14.102675,-0.256263,1,1747,...,33.3,26.148792,1.353574,14.126702,436.558090,26.545032,15.435939,0.118068,12.123201,402.179134
67738,1749,7295,423.876494,5.73024,8.0,2,14.080449,-0.188385,1,1747,...,33.3,26.215848,1.517180,14.150506,437.971950,26.618184,15.271861,0.118068,12.123201,403.391454


### Calculating R2 and RMSE 

In [26]:
r2_score(y_test, pred_1) #evaluating the model's performance against the ground truth on the test set

-0.029176397104716534

In [27]:
mse = mean_squared_error(y_test,pred_1)
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.6022977374084251


### Reaction Time 1.5 s

In [28]:
# Create a new DataFrame 'train_df_15' with additional columns representing time-series information based on a shifting window of 15 time steps
train_df_15 = create_prediction_columns(train_df,15)

In [29]:
# Create a new DataFrame 'test_df_15' with additional columns representing time-series information based on a shifting window of 15 time steps
test_df_15 = create_prediction_columns(test_df,15)

In [30]:
# Prepare the training and test datasets by separating features and target variables
# For the training dataset:
X_train = train_df_15.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_train = train_df_15['nextframeAcc']
# For the test dataset:
X_test = test_df_15.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df_15['nextframeAcc']

In [31]:
sc = StandardScaler()
# Fit the StandardScaler on the training features to learn the mean and standard deviation of each feature
sc.fit(X_train)
# Transform the training features using the learned mean and standard deviation
X_train_std = sc.transform(X_train)
# Transform the test features using the same mean and standard deviation as learned from the training data
X_test_std = sc.transform(X_test)

In [32]:
# Instantiate the Support Vector Regressor 
svr= SVR(C=1.0, epsilon=0.2)
 
# Fit the model
svr.fit(X_train_std, y_train)

In [33]:
# Use the trained Support Vector Regressor (SVR) model to predict the target variable ('nextframeAcc') for the test features ('X_test')
pred_15 = svr.predict(X_test)
# Update the 'nextframeAcc' column in the test feature matrix ('X_test') with the predicted values
X_test["nextframeAcc"]=pred_15

In [34]:
# Assuming you have trained the Random Forest model (rf) and X_test is the test DataFrame
predictions_svm_15 = predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_svm_15

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.264070,299.969309,56.701944,22.009592,0.065506,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,20.234104,297.944400,56.549544,21.998897,0.065506,21.098499,61.036506
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,20.201120,295.922639,56.406288,21.988775,0.065506,21.098499,63.146356
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,20.163002,293.904433,56.275224,21.974874,0.065506,21.098499,65.256206
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,20.116615,291.890452,56.156352,21.950872,0.065506,21.098499,67.366056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67730,1749,7287,412.547431,5.73024,8.0,2,14.189132,0.163324,1,1747,...,33.3,26.124408,-0.135149,14.102466,433.735630,26.523696,15.679791,0.065506,12.117945,393.530739
67731,1749,7288,413.966918,5.73024,8.0,2,14.200602,0.066145,1,1747,...,33.3,26.100024,-0.060484,14.110007,435.146254,26.508456,15.572335,0.065506,12.117945,394.742534
67732,1749,7289,415.387038,5.73024,8.0,2,14.201801,-0.042164,1,1747,...,33.3,26.084784,0.106068,14.126702,436.558090,26.545032,15.435939,0.065506,12.117945,395.954328
67733,1749,7290,416.806767,5.73024,8.0,2,14.192784,-0.138212,1,1747,...,33.3,26.093928,0.358509,14.150506,437.971950,26.618184,15.271861,0.065506,12.117945,397.166123


### Calculating R2 and RMSE 

In [35]:
r2_score(y_test, pred_15) #evaluating the model's performance against the ground truth on the test set

-0.006265331881197822

In [36]:
mse = mean_squared_error(y_test,pred_15)
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.5872164380413628


### Reaction Time of 2s

In [37]:
# Create a new DataFrame 'train_df_2' with additional columns representing time-series information based on a shifting window of 20 time steps
train_df_2= create_prediction_columns(train_df,20)

In [38]:
# Create a new DataFrame 'test_df_2' with additional columns representing time-series information based on a shifting window of 20 time steps
test_df_2= create_prediction_columns(test_df,20)

In [39]:
# Prepare the training and test datasets by separating features and target variables
# For the training dataset:
X_train = train_df_2.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_train = train_df_2['nextframeAcc']
# For the test dataset:
X_test = test_df_2.drop(['nextframeAcc', 'Vehicle_Class', 'Location', 'Period'], axis = 1)
y_test = test_df_2['nextframeAcc']

In [40]:
sc = StandardScaler()
# Fit the StandardScaler on the training features to learn the mean and standard deviation of each feature
sc.fit(X_train)
# Transform the training features using the learned mean and standard deviation
X_train_std = sc.transform(X_train)
# Transform the test features using the same mean and standard deviation as learned from the training data
X_test_std = sc.transform(X_test)

In [41]:
# Instantiate the Support Vector Regressor 
svr= SVR(C=1.0, epsilon=0.2)
 
# Fit the model
svr.fit(X_train_std, y_train)

In [42]:
# Use the trained Support Vector Regressor (SVR) model to predict the target variable ('nextframeAcc') for the test features ('X_test')
pred_2 = svr.predict(X_test)
# Update the 'nextframeAcc' column in the test feature matrix ('X_test') with the predicted values
X_test["nextframeAcc"]=pred_2

In [43]:
# Assuming you have trained the Random Forest model (rf) and X_test is the test DataFrame
predictions_svm_2 = predict_velocity_and_spacing(X_test, target_variable='nextframeAcc')
predictions_svm_2

Unnamed: 0,Vehicle_ID,Frame_ID,Local_Y,v_Length,v_Width,v_Class,v_Vel,v_Acc,Lane_ID,Preceding,...,total_pair_duration,Space_Gap,Difference_of_speed,nextframesvel,nextframeposition,nextFrameSpacing,precnextframesvel,nextframeAcc,predicted_velocity,predicted_spacing
0,1768,5669,330.851725,4.66344,6.9,2,21.091948,0.842936,1,1760,...,18.0,58.926984,0.377951,20.060325,289.881605,56.043576,21.915466,0.011777,21.091948,58.926984
1,1768,5668,328.746742,4.66344,6.9,2,21.007727,0.838071,1,1760,...,18.0,59.216544,0.545885,19.998221,287.878678,55.912512,21.872338,0.011777,21.093126,61.036238
2,1768,5667,326.650013,4.66344,6.9,2,20.926841,0.803230,1,1760,...,18.0,59.268360,0.702362,19.940316,285.881751,55.787544,21.824385,0.011777,21.093126,63.145550
3,1768,5666,324.561248,4.66344,6.9,2,20.848459,0.787251,1,1760,...,18.0,58.960512,0.846721,19.895098,283.889980,55.650384,21.768902,0.011777,21.093126,65.254863
4,1768,5665,322.480258,4.66344,6.9,2,20.771348,0.777447,1,1760,...,18.0,58.829448,0.980495,19.862745,281.902088,55.491888,21.700426,0.011777,21.093126,67.364176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67725,1749,7282,405.483324,5.73024,8.0,2,14.059508,0.279937,1,1747,...,33.3,25.996392,-0.194523,14.102466,433.735630,26.523696,15.679791,0.011777,12.112572,387.308701
67726,1749,7283,406.890678,5.73024,8.0,2,14.087571,0.281485,1,1747,...,33.3,25.987248,-0.178934,14.110007,435.146254,26.508456,15.572335,0.011777,12.112572,388.519958
67727,1749,7284,408.300854,5.73024,8.0,2,14.115954,0.286330,1,1747,...,33.3,26.029920,-0.169573,14.126702,436.558090,26.545032,15.435939,0.011777,12.112572,389.731215
67728,1749,7285,409.713849,5.73024,8.0,2,14.143936,0.273447,1,1747,...,33.3,26.090880,-0.164321,14.150506,437.971950,26.618184,15.271861,0.011777,12.112572,390.942472


### Calculating R2 and RMSE 

In [44]:
r2_score(y_test, pred_2) #evaluating the model's performance against the ground truth on the test set

-0.00038888987204099834

In [45]:
mse = mean_squared_error(y_test,pred_2)
rmse = np.sqrt(mse)

print("RMSE:", rmse)

RMSE: 0.5786614676837969
