In [1]:
import __init__
#
from IPython.display import HTML, display
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
def significance(pvalue):
    if pvalue < 0.01:
        num_stars = 3
    elif pvalue < 0.05:
        num_stars = 2
    elif pvalue < 0.1:
        num_stars = 1
    else:
        num_stars = 0    
    return '*' * num_stars

In [3]:
def display_res(Y2009, Y2010, inDepV):
    results = []
    for df in [Y2009, Y2010]:
        y = df[dep_v]
        X = df[inDepV]
        X = sm.add_constant(X)
        results.append(sm.OLS(y, X, missing='drop').fit())
    res2009, res2010 = results
    for idv in inDepV + ['const']:
        coef2009 = '%.4f' % res2009.params[idv] + significance(res2009.pvalues[idv])
        coef2010 = '%.4f' % res2010.params[idv] + significance(res2010.pvalues[idv])
        print '%s:' % idv + ','.join([coef2009, coef2010])
        ste2009 = res2009.params[idv] / res2009.tvalues[idv]
        ste2010 = res2010.params[idv] / res2010.tvalues[idv]
        print '(%.4f),(%.4f)' % (ste2009, ste2010)
    print

    fvalue2009 = '%.4f' % res2009.fvalue + significance(res2009.f_pvalue)
    fvalue2010 = '%.4f' % res2010.fvalue + significance(res2010.f_pvalue)
    print 'N:%d,%d' % (res2009.nobs, res2010.nobs)
    print 'R-squared:%.4f,%.4f' % (res2009.rsquared, res2010.rsquared)
    print 'Adj R-squared:%.4f,%.4f' % (res2009.rsquared_adj, res2010.rsquared_adj)
    print 'F-statistics:%s,%s' % (fvalue2009, fvalue2010)

In [4]:
from information_boards import statisticsAllDrivers_ap_dpath
from information_boards import statisticsAllDriversMonth_ap_prefix
Y2009 = pd.read_csv('%s/Filtered-%s2009.csv' % (statisticsAllDrivers_ap_dpath, statisticsAllDriversMonth_ap_prefix))
Y2010 = pd.read_csv('%s/Filtered-%s2010.csv' % (statisticsAllDrivers_ap_dpath, statisticsAllDriversMonth_ap_prefix))
print len(set(Y2009['driverID'])), len(set(Y2010['driverID']))

16022 19183


In [5]:
# Models
dep_v = 'QTime/locTrip'
ib_impact = ['locInRatio']
cv0 = ['wleTripNumber', 'locTripNumber', 'wleProductivity']
cv1 = cv0 + ['EP/locTrip']
cv2 = cv0 + ['locProductivity']
cv3 = cv0 + ['EP/locTrip', 'locProductivity']
m1_inDepV = ib_impact
m2a_inDepV = ib_impact + cv0
m2b_inDepV = ib_impact + cv1
m2c_inDepV = ib_impact + cv2
m2d_inDepV = ib_impact + cv3

In [6]:
%time
print ''
# M1
display_res(Y2009, Y2010, m1_inDepV)

CPU times: user 1 µs, sys: 1e+03 ns, total: 2 µs
Wall time: 4.05 µs

locInRatio:-2.8314***,-3.6538***
(0.1075),(0.0831)
const:37.6371***,32.1873***
(0.0678),(0.0576)

N:100725,128898
R-squared:0.0068,0.0148
Adj R-squared:0.0068,0.0148
F-statistics:693.5361***,1932.8223***


In [7]:
%time
print ''
# M2a
display_res(Y2009, Y2010, m2a_inDepV)

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 6.91 µs

locInRatio:0.9789***,-0.9082***
(0.1199),(0.0951)
wleTripNumber:0.0134***,0.0083***
(0.0004),(0.0003)
locTripNumber:0.0130***,0.0219***
(0.0023),(0.0019)
wleProductivity:-0.9708***,-0.8568***
(0.0094),(0.0069)
const:56.4660***,51.5178***
(0.2458),(0.2035)

N:100725,128898
R-squared:0.1310,0.1424
Adj R-squared:0.1310,0.1424
F-statistics:3796.3326***,5349.4829***


In [8]:
%time
print ''
# M2b
display_res(Y2009, Y2010, m2b_inDepV)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 72 µs

locInRatio:-5.7634***,-5.3174***
(0.0638),(0.0523)
wleTripNumber:0.0034***,0.0031***
(0.0002),(0.0001)
locTripNumber:-0.0071***,-0.0020*
(0.0012),(0.0010)
wleProductivity:-0.0506***,-0.0881***
(0.0052),(0.0040)
EP/locTrip:-2.2534***,-2.0450***
(0.0043),(0.0037)
const:35.9838***,32.5828***
(0.1340),(0.1157)

N:100725,128898
R-squared:0.7642,0.7468
Adj R-squared:0.7642,0.7467
F-statistics:65284.9176***,76015.7818***


In [9]:
%time
print ''
# M2c
display_res(Y2009, Y2010, m2c_inDepV)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.05 µs

locInRatio:-3.9866***,-5.1062***
(0.0685),(0.0562)
wleTripNumber:-0.0006***,-0.0019***
(0.0002),(0.0002)
locTripNumber:0.0113***,0.0163***
(0.0013),(0.0011)
wleProductivity:0.0592***,0.0762***
(0.0057),(0.0044)
locProductivity:-1.9963***,-1.6399***
(0.0043),(0.0033)
const:80.1405***,71.2616***
(0.1477),(0.1253)

N:100725,128898
R-squared:0.7237,0.7071
Adj R-squared:0.7237,0.7070
F-statistics:52765.3872***,62217.9167***


In [10]:
%time
print ''
# M2d
display_res(Y2009, Y2010, m2d_inDepV)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.05 µs

locInRatio:-5.4719***,-5.5354***
(0.0604),(0.0496)
wleTripNumber:0.0011***,0.0007***
(0.0002),(0.0001)
locTripNumber:-0.0008,0.0040***
(0.0011),(0.0010)
wleProductivity:0.0564***,0.0343***
(0.0050),(0.0039)
EP/locTrip:-1.4649***,-1.3289***
(0.0083),(0.0069)
locProductivity:-0.8314***,-0.6883***
(0.0076),(0.0057)
const:53.0113***,47.5009***
(0.2000),(0.1660)

N:100725,128898
R-squared:0.7895,0.7721
Adj R-squared:0.7895,0.7721
F-statistics:62946.2158***,72789.8538***
