In [1]:
import pandas as pd # for data frame processing
import numpy as np # for some statistical procedures
from scipy.optimize import minimize # optimisation routine for parameter estimation
from scipy.stats import norm # normal distribution density function
import numdifftools as nd # we use this to calculate t-ratios and p-values
import csv # we need this to store our parameters as csv

In [2]:
model_name = 'car_following_model' # Name we want to give to our model (this is used when saving the output)

In [3]:
panel = 0 # switch to 1 if data is panel (any other value if not panel)
mixing = 0 # switch to 1 if we apply mixing (any other value if no mixing applied)

In [4]:
# Command to load the data
data = pd.read_table('I80_data0.txt')

# Number of observations (we need this to caclulate goodness-of-fit indices)
Nobs = data.shape[0]

In [5]:
# Type "data" in this field (without the quotation) and run the cell (Shift + return)
data

Unnamed: 0,ID,Time,Position,Length,Width,Type,Speed,Acceleration,Lane,Leader,...,lead04,lane01,lane02,lane03,lane04,tasks,task_index,timelag,dd,idd
0,4,26,83.033604,4.084802,-9999.99,2,6.847780,-2.266411,5,21,...,1,1,1,1,1,28,5,25,1,4
1,4,27,87.759061,4.084802,-9999.99,2,3.295830,-1.349331,5,21,...,1,1,1,1,1,28,6,26,1,4
2,4,28,90.960723,4.084802,-9999.99,2,2.820909,-2.040139,5,21,...,1,1,1,1,1,28,7,27,1,4
3,4,29,92.047244,4.084802,-9999.99,2,0.276981,0.345661,5,21,...,1,1,1,1,1,28,8,28,1,4
4,4,30,92.601824,4.084802,-9999.99,2,0.634731,0.365870,5,21,...,1,1,1,1,1,28,9,29,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18421,3340,110,397.345232,4.359100,-9999.99,2,10.053292,-0.356570,2,117,...,1,1,1,1,1,55,51,109,1,3340
18422,3340,111,406.291822,4.359100,-9999.99,2,8.161121,-0.587411,2,117,...,1,1,1,1,1,55,52,110,1,3340
18423,3340,112,414.387694,4.359100,-9999.99,2,8.106680,0.110621,2,117,...,1,1,1,1,1,55,53,111,1,3340
18424,3340,113,422.633135,4.359100,-9999.99,2,8.298771,-0.182639,2,117,...,1,1,1,1,1,55,54,112,1,3340


In [6]:
print(data.columns)

Index(['ID', 'Time', 'Position', 'Length', 'Width', 'Type', 'Speed',
       'Acceleration', 'Lane', 'Leader', 'Follower', 'Space_headway',
       'Time_headway', 'Acceleration_lead', 'Speed_lead', 'Position_lead',
       'Density', 'distanceToEnd', 'lane_change', 'lead_change', 'lag_leader0',
       'lag_leader1', 'lag_leader2', 'lag_leader3', 'lag_leader4', 'lag_lane0',
       'lag_lane1', 'lag_lane2', 'lag_lane3', 'lag_lane4', 'lag_acceleration0',
       'lag_acceleration1', 'lag_acceleration2', 'lag_acceleration3',
       'lag_acceleration4', 'lag_acceleration_lead0', 'lag_acceleration_lead1',
       'lag_acceleration_lead2', 'lag_acceleration_lead3',
       'lag_acceleration_lead4', 'lag_speed0', 'lag_speed1', 'lag_speed2',
       'lag_speed3', 'lag_speed4', 'lag_speed_lead0', 'lag_speed_lead1',
       'lag_speed_lead2', 'lag_speed_lead3', 'lag_speed_lead4',
       'lag_position_lead0', 'lag_position_lead1', 'lag_position_lead2',
       'lag_position_lead3', 'lag_position_lead4', '

In [7]:
data['choice'] = data['Acceleration'].apply(lambda x: 1 if x >= 0 else -1)


In [8]:
# Example:
# Variable_name = np.array(data['Variable_name']).reshape(-1, 1)
# assume reaction time is 1s
choice = np.array(data['choice']).reshape(-1, 1)
Acceleration = np.array(data['Acceleration']).reshape(-1, 1)
Speed = np.array(data['Speed']).reshape(-1, 1)
Space_headway = np.array(data['lag_s_headway1']).reshape(-1, 1)
Relative_speed = np.array(data['lag_speed_lead1']- data['lag_speed1']).reshape(-1, 1)


In [9]:
# Example: ID = np.array(data['ID']) # ID does not need to be reshaped

ID = np.array(data['ID']) # ID does not need to be reshaped

In [10]:
betas_start = {"asc_acc": 0, "asc_dec": 0, "beta_acc": 0, "beta_dec": 0,
               "alpha_acc": 0, "alpha_dec": 0, "beta_acc": 0, 
               "beta_dec": 0, "gamma_acc": 0, "gamma_dec": 0,
               "lamda_acc": 0, "lamda_dec": 0, "sigma_acc": 0, "sigma_dec": 0,
               "sigma_nt":0, "constant":0}

In [19]:
def LL(betas): # betas is a vector with the parameters we want to estimate
   
    # First let's define the parameters to be estimated.
    # The parameter names are imported directly from 'beta_start' that we defined earlier
    
    for pn in range(len(betas_start.values())):
        globals()[np.array(list(betas_start.keys()))[pn]] = betas[pn]
        
    # Then we need to define the main model specification
    
    # We need to start by defining the utility functions
    # Please make sure that you are using the same names for the parameters as those defined in 'betas_start'
    
    U_acc = asc_acc + beta_acc * Relative_speed 
    U_dec = asc_dec + beta_dec * Relative_speed
    U_dn = 0



    #######################################################################################
    #######################################################################################
    
    # TASK ALERT #1!! How about adding the headway in the utility function?? #

    # TASK ALERT #2!! How about adding alternative specific parameters per travel mode?? #

    #######################################################################################
    #######################################################################################
    
    # And we need to take the exponents of utilities as
    U_acc = np.exp(U_acc)
    U_dec = np.exp(U_dec)
    U_dn = np.exp(U_dn)
    
    # We need the sum of all utilities to calculate our choice probabilities
    U_sum = U_acc + U_dec + U_dn
    
    # And then we need to calculate our chouce probabilities
    P_acc = U_acc / U_sum
    P_dec = U_dec / U_sum
    P_dn = U_dn / U_sum


    sensitivity_acc = np.exp(alpha_acc) * ((Speed+np.exp(-50))**beta_acc) / (Space_headway**gamma_acc)
    sensitivity_dec = -np.exp(alpha_dec) * ((Speed+np.exp(-50))**beta_dec) / (Space_headway**gamma_dec)

    stimulus_acc = (
        np.abs(Relative_speed + np.exp(-50)**lamda_acc)**(Relative_speed>=0)
    )
    stimulus_dec = (
        np.abs(Relative_speed + np.exp(-50)**lamda_dec)**(Relative_speed<0)
    )

    acc = sensitivity_acc*stimulus_acc
    dec = sensitivity_dec*stimulus_dec

    Pa = norm.pdf(Acceleration, acc, np.exp(sigma_acc))/(1-norm.cdf(0,acc,np.exp(sigma_acc)))
    Pb = norm.pdf(Acceleration, dec, np.exp(sigma_dec))/(norm.cdf(0,dec,np.exp(sigma_dec)))
    Pc = norm.pdf(Acceleration, constant, np.exp(sigma_nt))
    
    # The total probability is then:
    P = (P_acc * Pa + P_dn * Pc) * (choice==1) + (P_dec * Pb + P_dn * Pc) * (choice==-1) 


    ############################################################################################################
    ############################################################################################################
    # - Now this below is relevant if we have panel data and apply mixing (Do not change this piece of code!) -#
    if panel == 1:
    # Do it as panel
        P = pd.DataFrame(P)
        P = pd.concat([pd.Series(ID), pd.DataFrame(P)], axis=1, ignore_index=True)
        P.rename(columns={P.columns[0]: 'ID'},inplace=True)
    
        # We take the product of probabilities per individual per draw and then delete the ID column
        P = P.groupby('ID', as_index=False).prod()
        P = P.drop('ID', axis=1)
   
    if mixing == 1:
        # We take the average per row to get the average probability per individual (if mixing == 1)
        
        if panel == 1:
            P['mean'] = P.mean(axis=1)
            P = np.array(P['mean'])
        
        if panel == 0:
            P = pd.DataFrame(P)
            P = pd.concat([pd.Series(ID), pd.DataFrame(P)], axis=1, ignore_index=True)
            P.rename(columns={P.columns[0]: 'ID'},inplace=True)
    
            # We take the product of probabilities per individual per draw and then delete the ID column
            P = P.groupby('ID', as_index=False).prod()
            P = P.drop('ID', axis=1)
            P['mean'] = P.mean(axis=1)
            P = np.array(P['mean'])
            
    P = np.array(P)
    ### --- This is where the panel data approach ends. --- ###
    ############################################################################################################
    ############################################################################################################
    
    # We then take the log of the density function
    logprob = np.log(P)
    
    return logprob

In [20]:
def SLL(betas):
    return -sum(LL(betas))

In [21]:
import warnings
warnings.filterwarnings("ignore")

In [22]:
# This will give us the initial loglikelihood value as an output
def callback1(betas):
    print("Current log likelihood:", -SLL(betas))

# This function will allow as to store parameter estimates during iterations
# Initialise list to store parameter values
parameter_values = [np.array(list(betas_start.values()))]
# Then define the function
def callback2(betas):    
    parameter_values.append(betas)
    column_names = list(betas_start.keys())
    with open(f'{model_name}_iterations.csv','w',newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(column_names)
        writer.writerows(parameter_values)

# Now let's combine the two callback functions
def combined_callback(betas):
    callback1(betas)
    callback2(betas)
        
print("Initial log likelihood:", -SLL(np.array(list(betas_start.values()))))

# Choose optimisation routine (preferably BFGS)
optimiser = 'BFGS' # BFGS or L-BFGS-B or nelder-mead

result = minimize(SLL, np.array(list(betas_start.values())), method=optimiser,callback=combined_callback, 
                  options={'disp':False}) # ,bounds=bounds1
#args = (parameter_values,)
print("Final log likelihood:", -result.fun)

Initial log likelihood: [-29925.06184088]
Current log likelihood: [-26239.37886121]
Current log likelihood: [-23148.62883853]
Current log likelihood: [-22176.22288294]
Current log likelihood: [-20982.46364238]
Current log likelihood: [-20407.55746632]
Current log likelihood: [-20183.41175786]
Current log likelihood: [-20135.33655926]
Current log likelihood: [-20109.63589254]
Current log likelihood: [-20095.6713379]
Current log likelihood: [-20093.01254144]
Current log likelihood: [-20089.07362587]
Current log likelihood: [-20058.36302074]
Current log likelihood: [-19999.55772991]
Current log likelihood: [-19894.53469452]
Current log likelihood: [-19707.41347987]
Current log likelihood: [-19658.07162116]
Current log likelihood: [-19655.99367803]
Current log likelihood: [-19583.09782053]
Current log likelihood: [-19510.81037335]
Current log likelihood: [-19501.73616573]
Current log likelihood: [-19489.76594368]
Current log likelihood: [-19485.02757197]
Current log likelihood: [-19482.206