Import the necessary libraries

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

Import the dataset

In [3]:
data_path = os.path.join('data', 'trip_generation.csv')
df = pd.read_csv(data_path)
df.head(5)

Unnamed: 0,zone,dwelling_unit_type,number_of_persons,number_of_vehicles,licensed_drivers,fulltime_workers,partime_workers,work_at_home_persons,number_of_students,number_of_females,number_of_males,number_of_children,number_of_adults,daily_work_trips,daily_non_work_trips
0,29,2,3,1,3,1,1,0,1,2,1,0,0,2,9
1,6,2,2,1,2,2,0,0,0,1,1,0,0,3,7
2,11,2,1,0,0,0,0,0,0,1,0,0,1,0,0
3,14,2,1,0,1,0,1,0,1,1,0,0,0,0,3
4,24,2,2,0,0,0,0,0,2,1,1,0,0,0,4


## Research Questions

1. What are the key determinants of daily work trip generation?

2. What are the key determinants of daily non-work trip generation?

3. What are the factors affecting the daily trip-generation in a metropolitan area?

In [4]:
df. columns

Index(['zone', 'dwelling_unit_type', 'number_of_persons', 'number_of_vehicles',
       'licensed_drivers', 'fulltime_workers', 'partime_workers',
       'work_at_home_persons', 'number_of_students', 'number_of_females',
       'number_of_males', 'number_of_children', 'number_of_adults',
       'daily_work_trips', 'daily_non_work_trips'],
      dtype='object')

### 1. What are the key determinants of daily work trip generation?
#### Multiple Regression Model 1 - Daily Work Trip

In [7]:
y = df['daily_work_trips']
x = df[['fulltime_workers', 'partime_workers', 'work_at_home_persons']]
x = sm.add_constant(x)
model = sm.OLS(y, x)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       daily_work_trips   R-squared:                       0.481
Model:                            OLS   Adj. R-squared:                  0.481
Method:                 Least Squares   F-statistic:                     611.9
Date:                Sat, 06 Jul 2024   Prob (F-statistic):          2.52e-281
Time:                        10:50:56   Log-Likelihood:                -2188.8
No. Observations:                1982   AIC:                             4386.
Df Residuals:                    1978   BIC:                             4408.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                    0.0512 

### 2. What are the key determinants of daily non-work trip generation?
#### Multiple Regression Model 2 - Daily Non-Work Trip

In [8]:
y = df['daily_non_work_trips']
x = df[['number_of_persons', 'licensed_drivers', 'number_of_students']]
x = sm.add_constant(x)
model = sm.OLS(y, x)
results = model.fit()
print(results.summary())

                             OLS Regression Results                             
Dep. Variable:     daily_non_work_trips   R-squared:                       0.335
Model:                              OLS   Adj. R-squared:                  0.334
Method:                   Least Squares   F-statistic:                     332.1
Date:                  Sat, 06 Jul 2024   Prob (F-statistic):          1.26e-174
Time:                          10:56:37   Log-Likelihood:                -4125.3
No. Observations:                  1982   AIC:                             8259.
Df Residuals:                      1978   BIC:                             8281.
Df Model:                             3                                         
Covariance Type:              nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const           

### 3. What are the factors affecting the daily trip-generation in a metropolitan area?

In [10]:
y = df['daily_work_trips']
x = df[['fulltime_workers', 'partime_workers', 'work_at_home_persons', 
      'licensed_drivers', 'number_of_students']]
x = sm.add_constant(x)
model = sm.OLS(y, x)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       daily_work_trips   R-squared:                       0.483
Model:                            OLS   Adj. R-squared:                  0.482
Method:                 Least Squares   F-statistic:                     369.6
Date:                Sat, 06 Jul 2024   Prob (F-statistic):          4.00e-280
Time:                        11:07:36   Log-Likelihood:                -2185.2
No. Observations:                1982   AIC:                             4382.
Df Residuals:                    1976   BIC:                             4416.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                    0.0507 

In [11]:
df['total_trips'] = ''

In [12]:
df['total_trips'] = df['daily_work_trips'] + df['daily_non_work_trips']

In [13]:
df.head(5)

Unnamed: 0,zone,dwelling_unit_type,number_of_persons,number_of_vehicles,licensed_drivers,fulltime_workers,partime_workers,work_at_home_persons,number_of_students,number_of_females,number_of_males,number_of_children,number_of_adults,daily_work_trips,daily_non_work_trips,total_trips
0,29,2,3,1,3,1,1,0,1,2,1,0,0,2,9,11
1,6,2,2,1,2,2,0,0,0,1,1,0,0,3,7,10
2,11,2,1,0,0,0,0,0,0,1,0,0,1,0,0,0
3,14,2,1,0,1,0,1,0,1,1,0,0,0,0,3,3
4,24,2,2,0,0,0,0,0,2,1,1,0,0,0,4,4


In [14]:
y = df['total_trips']
x = df[['daily_non_work_trips', 'fulltime_workers', 'partime_workers', 'work_at_home_persons',
       ]]
x = sm.add_constant(x)
model = sm.OLS(y, x)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:            total_trips   R-squared:                       0.935
Model:                            OLS   Adj. R-squared:                  0.935
Method:                 Least Squares   F-statistic:                     9529.
Date:                Sat, 06 Jul 2024   Prob (F-statistic):               0.00
Time:                        11:13:31   Log-Likelihood:                -2182.4
No. Observations:                1982   AIC:                             4373.
Df Residuals:                    1978   BIC:                             4395.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                    0.0187 

In [18]:
y = df['total_trips']
x = df[['licensed_drivers', 'fulltime_workers', 'partime_workers', 'daily_work_trips',
        'number_of_children', 'number_of_students', 'work_at_home_persons']]
x = sm.add_constant(x)
model = sm.OLS(y, x)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:            total_trips   R-squared:                       0.558
Model:                            OLS   Adj. R-squared:                  0.557
Method:                 Least Squares   F-statistic:                     356.5
Date:                Sat, 06 Jul 2024   Prob (F-statistic):               0.00
Time:                        11:18:51   Log-Likelihood:                -4085.6
No. Observations:                1982   AIC:                             8187.
Df Residuals:                    1974   BIC:                             8232.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                    0.7782 