In [1]:
import __init__
#
from IPython.display import HTML, display
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
def significance(pvalue):
    if pvalue < 0.01:
        num_stars = 3
    elif pvalue < 0.05:
        num_stars = 2
    elif pvalue < 0.1:
        num_stars = 1
    else:
        num_stars = 0    
    return '*' * num_stars

In [3]:
def display_res(Y2009, Y2010, inDepV):
    results = []
    for df in [Y2009, Y2010]:
        y = df[dep_v]
        X = df[inDepV]
        X = sm.add_constant(X)
        results.append(sm.OLS(y, X, missing='drop').fit())
    res2009, res2010 = results
    for idv in inDepV + ['const']:
        coef2009 = '%.4f' % res2009.params[idv] + significance(res2009.pvalues[idv])
        coef2010 = '%.4f' % res2010.params[idv] + significance(res2010.pvalues[idv])
        print '%s:' % idv + ','.join([coef2009, coef2010])
        ste2009 = res2009.params[idv] / res2009.tvalues[idv]
        ste2010 = res2010.params[idv] / res2010.tvalues[idv]
        print '(%.4f),(%.4f)' % (ste2009, ste2010)
    print

    fvalue2009 = '%.4f' % res2009.fvalue + significance(res2009.f_pvalue)
    fvalue2010 = '%.4f' % res2010.fvalue + significance(res2010.f_pvalue)
    print 'N:%d,%d' % (res2009.nobs, res2010.nobs)
    print 'R-squared:%.4f,%.4f' % (res2009.rsquared, res2010.rsquared)
    print 'Adj R-squared:%.4f,%.4f' % (res2009.rsquared_adj, res2010.rsquared_adj)
    print 'F-statistics:%s,%s' % (fvalue2009, fvalue2010)

In [5]:
from information_boards import statisticsAllDrivers_ns_dpath
from information_boards import statisticsAllDriversMonth_ns1517_prefix
Y2009 = pd.read_csv('%s/%s2009.csv' % (statisticsAllDrivers_ns_dpath, statisticsAllDriversMonth_ns1517_prefix))
Y2010 = pd.read_csv('%s/%s2010.csv' % (statisticsAllDrivers_ns_dpath, statisticsAllDriversMonth_ns1517_prefix))
print len(set(Y2009['driverID'])), len(set(Y2010['driverID']))

6492 7361


In [6]:
# Models
dep_v = 'QTime/locTrip'
ib_impact = ['locInRatio']
cv0 = ['wleTripNumber', 'locTripNumber', 'wleProductivity']
cv1 = cv0 + ['EP/locTrip']
cv2 = cv0 + ['locProductivity']
cv3 = cv0 + ['EP/locTrip', 'locProductivity']
m1_inDepV = ib_impact
m2a_inDepV = ib_impact + cv0
m2b_inDepV = ib_impact + cv1
m2c_inDepV = ib_impact + cv2
m2d_inDepV = ib_impact + cv3

In [7]:
%time
print ''
# M1
display_res(Y2009, Y2010, m1_inDepV)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs

locInRatio:-76.3637***,-135.8951***
(6.8238),(8.6328)
const:86.1902***,141.7538***
(4.7206),(6.4269)

N:11098,11766
R-squared:0.0112,0.0206
Adj R-squared:0.0111,0.0205
F-statistics:125.2347***,247.8015***


In [8]:
%time
print ''
# M2a
display_res(Y2009, Y2010, m2a_inDepV)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs

locInRatio:-79.4859***,-138.1359***
(7.1849),(9.0469)
wleTripNumber:0.3940,-0.1515
(1.2367),(1.4423)
locTripNumber:5.3165,9.4939
(5.5170),(6.2600)
wleProductivity:2.0065***,2.6264***
(0.4772),(0.5548)
const:27.3182**,58.3679***
(13.5469),(17.2146)

N:11098,11766
R-squared:0.0131,0.0229
Adj R-squared:0.0128,0.0226
F-statistics:36.9308***,68.8984***


In [9]:
%time
print ''
# M2b
display_res(Y2009, Y2010, m2b_inDepV)

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs

locInRatio:-3.9749***,-2.9592***
(0.8245),(0.7399)
wleTripNumber:-0.3055**,-0.0412
(0.1412),(0.1169)
locTripNumber:2.2316***,0.4133
(0.6299),(0.5072)
wleProductivity:0.1275**,0.0828*
(0.0545),(0.0450)
EP/locTrip:-2.4529***,-2.4095***
(0.0027),(0.0018)
const:16.1918***,14.0755***
(1.5468),(1.3951)

N:11098,11766
R-squared:0.9871,0.9936
Adj R-squared:0.9871,0.9936
F-statistics:170230.7301***,364412.3548***


In [10]:
%time
print ''
# M2c
display_res(Y2009, Y2010, m2c_inDepV)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs

locInRatio:-62.0852***,-93.7240***
(6.7230),(8.2278)
wleTripNumber:-2.1597*,-1.7733
(1.1565),(1.3048)
locTripNumber:-1.4712,-15.7267***
(5.1545),(5.6829)
wleProductivity:7.6337***,8.8031***
(0.4669),(0.5161)
locProductivity:-14.5879***,-21.4777***
(0.3614),(0.4197)
const:309.5504***,579.6945***
(14.4534),(18.6049)

N:11098,11766
R-squared:0.1396,0.2009
Adj R-squared:0.1392,0.2005
F-statistics:359.8257***,591.2481***


In [11]:
%time
print ''
# M2d
display_res(Y2009, Y2010, m2d_inDepV)

CPU times: user 3 µs, sys: 4 µs, total: 7 µs
Wall time: 8.11 µs

locInRatio:-3.8548***,-3.2419***
(0.8240),(0.7391)
wleTripNumber:-0.3437**,-0.0172
(0.1413),(0.1166)
locTripNumber:2.1323***,0.7575
(0.6296),(0.5081)
wleProductivity:0.2186***,-0.0145
(0.0577),(0.0467)
EP/locTrip:-2.4481***,-2.4161***
(0.0029),(0.0020)
locProductivity:-0.2264***,0.3141***
(0.0472),(0.0416)
const:20.5947***,6.3312***
(1.7975),(1.7291)

N:11098,11766
R-squared:0.9872,0.9936
Adj R-squared:0.9872,0.9936
F-statistics:142144.0163***,305131.8118***
