In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Problem 1: Binary Logit Model

In [5]:
df = pd.read_csv('C:\\Users\\moham\\My Drive\\Projects\\Structural Econometrics Projects\\Topics in Advanced Econometrics\\2- Logit\\commute_binary.csv')

In [15]:
df['output'] = (df['mode'] == 'car').astype(int)
df['Married'] = (df['marital_status'] == 'married').astype(int)

In [16]:
df.columns

Index(['id', 'mode', 'time.car', 'cost.car', 'time.bus', 'cost.bus',
       'price_gas', 'snowfall', 'construction', 'bus_detour', 'age', 'income',
       'marital_status', 'output', 'Married'],
      dtype='object')

In [93]:
import statsmodels.api as sm

# Define the dependent variable
y = df['output']

# Define the independent variables
X = df[['time.car', 'cost.car', 'time.bus']]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Fit the binary logit model
logit_model = sm.Logit(y, X)
result = logit_model.fit()
# Print the summary of the model
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.600470
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                 output   No. Observations:                 1000
Model:                          Logit   Df Residuals:                      996
Method:                           MLE   Df Model:                            3
Date:                Sat, 27 Jul 2024   Pseudo R-squ.:                  0.1205
Time:                        14:12:48   Log-Likelihood:                -600.47
converged:                       True   LL-Null:                       -682.74
Covariance Type:            nonrobust   LLR p-value:                 1.914e-35
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          2.2333      0.347      6.443      0.000       1.554       2.913
time.car      -0.3322      0.

In [94]:
predicted_values = (result.predict(X)).apply(lambda x: 0 if x < 0.5 else 1)
# Compare predicted values with actual values
accuracy = (predicted_values == df['output']).mean()

# Calculate success rate
success_rate = accuracy * 100

success_rate

66.8

## Marginal Effects
By ME, we mean the effect of a change in each attribute on the probability of choosing to drive car.

From the theory, we know that the marginal effects of each attribute on the probability of choosing to derive is:
$$
\beta_z P_{n i}\left(1-P_{n i}\right)
$$

In [95]:
predictions = result.predict(X)

coefficients = result.params
timecar_coefficient = coefficients['time.car']
timebus_coefficient = coefficients['time.bus']
costcar_coefficient = coefficients['cost.car']

df['ME_timecar'] = timecar_coefficient * predictions * (1 - predictions)
df['ME_timebus'] = timebus_coefficient * predictions * (1 - predictions)
df['ME_costcar'] = costcar_coefficient * predictions * (1 - predictions)

In [96]:
new_columns = ['ME_timecar', 'ME_timebus', 'ME_costcar']
summary = df[new_columns].describe().loc[['min', '25%', '50%', '75%', 'max', 'mean']]
summary

Unnamed: 0,ME_timecar,ME_timebus,ME_costcar
min,-0.083054,0.000963,-0.519289
25%,-0.081579,0.024823,-0.510069
50%,-0.076326,0.030459,-0.477225
75%,-0.062203,0.032555,-0.38892
max,-0.002413,0.033144,-0.015087
mean,-0.069001,0.027536,-0.431426


These Marginal effects on the mean are very similar to the marginal effects in a LPM. Though, these MEs have heterogeneity and change around the mean. Therefore, this is a better model to work with.

## Finding the Dollor value of each attribute

As costcar attribute is in money terms. We have the MU of money. Therefore, just divide by this coeff.

In [98]:
dollor_value_of_time_car = np.abs(timecar_coefficient / costcar_coefficient) * 60
dollor_value_of_time_bus = np.abs(timebus_coefficient / costcar_coefficient) * 60
print(f"Dollar Value of Time Bus: ${dollor_value_of_time_bus:.2f}")
print(f"Dollar Value of Time Car: ${dollor_value_of_time_car:.2f}")

Dollar Value of Time Bus: $3.83
Dollar Value of Time Car: $9.60


This is willingness to pay by an agent to decrease time for each for 1 hour.

## Logit model with individual attributes

In [99]:
# Define the dependent variable
y = df['output']

df['cost.car_Norm'] = df['cost.car'] / df['income']
# Define the independent variables
X = df[['time.car', 'cost.car_Norm', 'time.bus']]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Fit the binary logit model
logit_model = sm.Logit(y, X)
result = logit_model.fit()
# Print the summary of the model
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.597449
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                 output   No. Observations:                 1000
Model:                          Logit   Df Residuals:                      996
Method:                           MLE   Df Model:                            3
Date:                Sat, 27 Jul 2024   Pseudo R-squ.:                  0.1249
Time:                        14:16:33   Log-Likelihood:                -597.45
converged:                       True   LL-Null:                       -682.74
Covariance Type:            nonrobust   LLR p-value:                 9.498e-37
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             2.2654      0.331      6.842      0.000       1.616       2.914
time.car         -0.