In [1]:
import __init__
#
from IPython.display import HTML, display
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
def significance(pvalue):
    if pvalue < 0.01:
        num_stars = 3
    elif pvalue < 0.05:
        num_stars = 2
    elif pvalue < 0.1:
        num_stars = 1
    else:
        num_stars = 0    
    return '*' * num_stars

In [3]:
def display_res(Y2009, Y2010, inDepV):
    results = []
    for df in [Y2009, Y2010]:
        y = df[dep_v]
        X = df[inDepV]
        X = sm.add_constant(X)
        results.append(sm.OLS(y, X, missing='drop').fit())
    res2009, res2010 = results
    for idv in inDepV + ['const']:
        coef2009 = '%.4f' % res2009.params[idv] + significance(res2009.pvalues[idv])
        coef2010 = '%.4f' % res2010.params[idv] + significance(res2010.pvalues[idv])
        print '%s:' % idv + ','.join([coef2009, coef2010])
        ste2009 = res2009.params[idv] / res2009.tvalues[idv]
        ste2010 = res2010.params[idv] / res2010.tvalues[idv]
        print '(%.4f),(%.4f)' % (ste2009, ste2010)
    print

    fvalue2009 = '%.4f' % res2009.fvalue + significance(res2009.f_pvalue)
    fvalue2010 = '%.4f' % res2010.fvalue + significance(res2010.f_pvalue)
    print 'N:%d,%d' % (res2009.nobs, res2010.nobs)
    print 'R-squared:%.4f,%.4f' % (res2009.rsquared, res2010.rsquared)
    print 'Adj R-squared:%.4f,%.4f' % (res2009.rsquared_adj, res2010.rsquared_adj)
    print 'F-statistics:%s,%s' % (fvalue2009, fvalue2010)

In [4]:
from information_boards import statisticsAllDrivers_ns_dpath
from information_boards import statisticsAllDriversMonth_ns2023_prefix
Y2009 = pd.read_csv('%s/%s2009.csv' % (statisticsAllDrivers_ns_dpath, statisticsAllDriversMonth_ns2023_prefix))
Y2010 = pd.read_csv('%s/%s2010.csv' % (statisticsAllDrivers_ns_dpath, statisticsAllDriversMonth_ns2023_prefix))
print len(set(Y2009['driverID'])), len(set(Y2010['driverID']))

7233 7560


In [5]:
# Models
dep_v = 'QTime/locTrip'
ib_impact = ['locInRatio']
cv0 = ['wleTripNumber', 'locTripNumber', 'wleProductivity']
cv1 = cv0 + ['EP/locTrip']
cv2 = cv0 + ['locProductivity']
cv3 = cv0 + ['EP/locTrip', 'locProductivity']
m1_inDepV = ib_impact
m2a_inDepV = ib_impact + cv0
m2b_inDepV = ib_impact + cv1
m2c_inDepV = ib_impact + cv2
m2d_inDepV = ib_impact + cv3

In [6]:
%time
print ''
# M1
display_res(Y2009, Y2010, m1_inDepV)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs

locInRatio:-15.2840***,-39.5741***
(2.5833),(4.4439)
const:38.8788***,57.9199***
(1.3665),(2.3160)

N:18945,19780
R-squared:0.0018,0.0040
Adj R-squared:0.0018,0.0039
F-statistics:35.0035***,79.3029***


In [7]:
%time
print ''
# M2a
display_res(Y2009, Y2010, m2a_inDepV)

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.05 µs

locInRatio:-13.4527***,-38.5651***
(2.6904),(4.6100)
wleTripNumber:0.0414,-0.5577
(0.2848),(0.4621)
locTripNumber:2.8744,7.3663**
(1.7485),(3.0662)
wleProductivity:0.7111***,2.0062***
(0.2387),(0.3920)
const:15.5623***,-1.6528
(5.8221),(10.5882)

N:18945,19780
R-squared:0.0035,0.0059
Adj R-squared:0.0033,0.0057
F-statistics:16.5570***,29.1813***


In [8]:
%time
print ''
# M2b
display_res(Y2009, Y2010, m2b_inDepV)

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.96 µs

locInRatio:-1.6414***,-1.1229
(0.3748),(0.6862)
wleTripNumber:-0.1105***,-0.0979
(0.0397),(0.0687)
locTripNumber:0.8252***,1.0522**
(0.2435),(0.4557)
wleProductivity:0.0491,0.0484
(0.0332),(0.0583)
EP/locTrip:-2.2998***,-2.0158***
(0.0023),(0.0022)
const:14.6738***,11.1166***
(0.8106),(1.5735)

N:18945,19780
R-squared:0.9807,0.9780
Adj R-squared:0.9807,0.9780
F-statistics:192311.6441***,176195.6903***


In [9]:
%time
print ''
# M2c
display_res(Y2009, Y2010, m2c_inDepV)

CPU times: user 2 µs, sys: 2 µs, total: 4 µs
Wall time: 5.96 µs

locInRatio:-33.3066***,-67.0916***
(2.5685),(4.3316)
wleTripNumber:-0.0693,-0.6878
(0.2685),(0.4310)
locTripNumber:-0.3809,-1.0496
(1.6496),(2.8640)
wleProductivity:4.4252***,8.1240***
(0.2376),(0.3825)
locProductivity:-6.6883***,-11.4510***
(0.1372),(0.2105)
const:92.1123***,149.0078***
(5.7082),(10.2567)

N:18945,19780
R-squared:0.1146,0.1352
Adj R-squared:0.1144,0.1350
F-statistics:490.3776***,618.4637***


In [10]:
%time
print ''
# M2d
display_res(Y2009, Y2010, m2d_inDepV)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs

locInRatio:-1.5841***,-1.4592**
(0.3809),(0.6941)
wleTripNumber:-0.1103***,-0.0998
(0.0397),(0.0687)
locTripNumber:0.8334***,0.9760**
(0.2437),(0.4562)
wleProductivity:0.0389,0.1125*
(0.0354),(0.0616)
EP/locTrip:-2.3006***,-2.0131***
(0.0025),(0.0023)
locProductivity:0.0181,-0.1151***
(0.0215),(0.0360)
const:14.4666***,12.6138***
(0.8473),(1.6413)

N:18945,19780
R-squared:0.9807,0.9781
Adj R-squared:0.9807,0.9781
F-statistics:160257.3240***,146900.0191***
