In [2]:
using CSV, DataFrames, GLM, RegressionTables
using Statistics

In [None]:
function scale(df::DataFrame; cols::Vector{String} = names(df))
    result = copy(df)
    for col in cols
        result[!, col] = (df[!, col] .- mean(df[!, col])) ./ std(df[!, col])
    end
    return result
end

function scale(x::AbstractVector)
    return (x .- mean(x)) ./ std(x)
end

scale (generic function with 2 methods)

In [179]:
bp=CSV.read("../src/regression/budapest/bp_socioecon_merged5.csv", DataFrame)
madrid=CSV.read("../src/regression/madrid/madrid_socioecon_merged2.csv", DataFrame)
helsinki=CSV.read("../src/regression/helsinki/helsinki_socioecon_merged4.csv", DataFrame)
;

In [None]:
cols=[
    "cultural_institutions_multimodal",
    "drugstores_multimodal",
    "groceries_multimodal",
    "healthcare_multimodal",
    "parks_multimodal",
    "religious_organizations_multimodal",
    "restaurants_multimodal",
    "schools_multimodal",
    "services_multimodal",
    "cultural_institutions_walk15",
    "drugstores_walk15",
    "groceries_walk15",
    "healthcare_walk15",
    "parks_walk15",
    "religious_organizations_walk15",
    "restaurants_walk15",
    "schools_walk15",
    "services_walk15",
];


In [None]:
is_there_amenity(x) = x > 0 ? 1 : 0
bp = bp[!, cols] .= is_there_amenity.(bp[!, cols])
madrid = madrid[!, cols] .= is_there_amenity.(madrid[!, cols])
helsinki = helsinki[!, cols] .= is_there_amenity.(helsinki[!, cols])
;

In [None]:
bp.walk_sum =
    bp.cultural_institutions_walk15 .+ bp.drugstores_walk15 .+ bp.groceries_walk15 .+
    bp.healthcare_walk15 .+ bp.parks_walk15 .+ bp.religious_organizations_walk15 .+
    bp.restaurants_walk15 .+ bp.schools_walk15 .+ bp.services_walk15
;
bp.multimod_sum =
    bp.cultural_institutions_multimodal .+ bp.drugstores_multimodal .+
    bp.groceries_multimodal .+ bp.healthcare_multimodal .+ bp.parks_multimodal .+
    bp.religious_organizations_multimodal .+ bp.restaurants_multimodal .+
    bp.schools_multimodal .+ bp.services_multimodal
;
helsinki.walk_sum =
    helsinki.cultural_institutions_walk15 .+ helsinki.drugstores_walk15 .+
    helsinki.groceries_walk15 .+ helsinki.healthcare_walk15 .+ helsinki.parks_walk15 .+
    helsinki.religious_organizations_walk15 .+ helsinki.restaurants_walk15 .+
    helsinki.schools_walk15 .+ helsinki.services_walk15
;
helsinki.multimod_sum =
    helsinki.cultural_institutions_multimodal .+ helsinki.drugstores_multimodal .+
    helsinki.groceries_multimodal .+ helsinki.healthcare_multimodal .+
    helsinki.parks_multimodal .+ helsinki.religious_organizations_multimodal .+
    helsinki.restaurants_multimodal .+ helsinki.schools_multimodal .+
    helsinki.services_multimodal
;
madrid.walk_sum =
    madrid.cultural_institutions_walk15 .+ madrid.drugstores_walk15 .+
    madrid.groceries_walk15 .+ madrid.healthcare_walk15 .+ madrid.parks_walk15 .+
    madrid.religious_organizations_walk15 .+ madrid.restaurants_walk15 .+
    madrid.schools_walk15 .+ madrid.services_walk15
;
madrid.multimod_sum =
    madrid.cultural_institutions_multimodal .+ madrid.drugstores_multimodal .+
    madrid.groceries_multimodal .+ madrid.healthcare_multimodal .+ madrid.parks_multimodal .+
    madrid.religious_organizations_multimodal .+ madrid.restaurants_multimodal .+
    madrid.schools_multimodal .+ madrid.services_multimodal
;


In [183]:
# Main regressions without interactions

bp.gini_diff = bp.gini_multimodal .- bp.gini_walk15
bp.gini_diff_house = bp.gini_house_multimodal .- bp.gini_house_walk15

helsinki.gini_diff = helsinki.weighted_gini_multi .- helsinki.weighted_gini_walk
madrid.gini_diff = madrid.weighted_gini_multi .- madrid.weighted_gini_walk


bp.access_diff = bp.multimod_sum .- bp.walk_sum
madrid.access_diff = madrid.multimod_sum .- madrid.walk_sum
helsinki.access_diff = helsinki.multimod_sum .- helsinki.walk_sum
;

In [184]:
CSV.write("bp.jl.csv", bp)

"bp.jl.csv"

In [None]:
# bp1_noint=lm(gini_diff ~
#                gini_walk15+
#                area_difference+
#                ellipticity+
#                distance_betweenness,
#              data=bp)

bp1_noint = lm(
    @formula(
        gini_diff ~ gini_walk15 + area_difference + ellipticity + distance_betweenness
    ),
    bp,
)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

gini_diff ~ 1 + gini_walk15 + area_difference + ellipticity + distance_betweenness

Coefficients:
────────────────────────────────────────────────────────────────────────────────────────────
                            Coef.   Std. Error       t  Pr(>|t|)     Lower 95%     Upper 95%
────────────────────────────────────────────────────────────────────────────────────────────
(Intercept)            0.0755072   0.00205188    36.80    <1e-99   0.0714845     0.0795299
gini_walk15           -0.342529    0.00856171   -40.01    <1e-99  -0.359314     -0.325744
area_difference        6.98962e-5  0.000132644    0.53    0.5983  -0.000190153   0.000329945
ellipticity            0.0161454   0.00148294    10.89    <1e-26   0.0132381     0.0190527
distance_betweenness  -0.00332919  9.77919e-5   -34.04    <1e-99 

In [None]:
# bp2_noint=lm(bp$gini_diff_house ~
#                gini_walk15+
#                area_difference+
#                ellipticity+
#                distance_betweenness,
#              data=bp)
bp2_noint = lm(
    @formula(
        gini_diff_house ~
        gini_walk15 + area_difference + ellipticity + distance_betweenness
    ),
    bp,
)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

gini_diff_house ~ 1 + gini_walk15 + area_difference + ellipticity + distance_betweenness

Coefficients:
────────────────────────────────────────────────────────────────────────────────────────────
                             Coef.   Std. Error      t  Pr(>|t|)     Lower 95%     Upper 95%
────────────────────────────────────────────────────────────────────────────────────────────
(Intercept)            0.00236003   0.0031714     0.74    0.4568  -0.00385751    0.00857756
gini_walk15            0.0917521    0.0132331     6.93    <1e-11   0.0658087     0.117696
area_difference        0.00128462   0.000205016   6.27    <1e-09   0.000882687   0.00168656
ellipticity            0.00113118   0.00229205    0.49    0.6217  -0.00336239    0.00562475
distance_betweenness  -0.000950724  0.000151148  -6.29    

In [None]:
# helsinki1_noint=lm(gini_diff ~
#                      weighted_gini_walk+
#                      area_difference+
#                      ellipticity+
#                      distance_betweenness,
#                    data=helsinki)
helsinki1_noint = lm(
    @formula(
        gini_diff ~
        weighted_gini_walk + area_difference + ellipticity + distance_betweenness
    ),
    helsinki,
)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

gini_diff ~ 1 + weighted_gini_walk + area_difference + ellipticity + distance_betweenness

Coefficients:
────────────────────────────────────────────────────────────────────────────────────────────
                             Coef.  Std. Error       t  Pr(>|t|)     Lower 95%     Upper 95%
────────────────────────────────────────────────────────────────────────────────────────────
(Intercept)            0.134816     0.0030788    43.79    <1e-99   0.128779      0.140853
weighted_gini_walk    -0.420133     0.0093998   -44.70    <1e-99  -0.438565     -0.401702
area_difference        0.000135629  6.06152e-5    2.24    0.0253   1.67715e-5    0.000254486
ellipticity           -0.000761388  0.001023     -0.74    0.4568  -0.00276734    0.00124456
distance_betweenness  -0.000172356  5.94224e-5   -2.90    

In [None]:
# madrid1_noint=lm(gini_diff ~
#                    weighted_gini_walk+
#                    area_difference+
#                    ellipticity+
#                    distance_betweenness,
#                  data=madrid)
madrid1_noint = lm(
    @formula(
        gini_diff ~
        weighted_gini_walk + area_difference + ellipticity + distance_betweenness
    ),
    madrid,
)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

gini_diff ~ 1 + weighted_gini_walk + area_difference + ellipticity + distance_betweenness

Coefficients:
───────────────────────────────────────────────────────────────────────────────────────
                           Coef.  Std. Error       t  Pr(>|t|)   Lower 95%    Upper 95%
───────────────────────────────────────────────────────────────────────────────────────
(Intercept)            6.35123    0.171134     37.11    <1e-99   6.01574     6.68673
weighted_gini_walk    -0.178915   0.00470709  -38.01    <1e-99  -0.188143   -0.169687
area_difference       -0.0144015  0.0103702    -1.39    0.1650  -0.0347314   0.00592842
ellipticity           -0.222714   0.0849451    -2.62    0.0088  -0.389242   -0.0561854
distance_betweenness  -0.0809069  0.00592679  -13.65    <1e-40  -0.0925259  -0.0692879
─────

In [None]:
# bp1a_noint=lm(access_diff ~
#                 walk_sum+
#                 area_difference+
#                 ellipticity+
#                 distance_betweenness,
#               data=bp)
bp1a_noint = lm(
    @formula(access_diff ~ walk_sum + area_difference + ellipticity + distance_betweenness),
    bp,
)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

access_diff ~ 1 + walk_sum + area_difference + ellipticity + distance_betweenness

Coefficients:
────────────────────────────────────────────────────────────────────────────────────────
                           Coef.  Std. Error        t  Pr(>|t|)   Lower 95%    Upper 95%
────────────────────────────────────────────────────────────────────────────────────────
(Intercept)            7.81914    0.0630147    124.08    <1e-99   7.6956      7.94268
walk_sum              -0.871587   0.00552744  -157.68    <1e-99  -0.882424   -0.860751
area_difference        0.0521302  0.00369288    14.12    <1e-43   0.0448903   0.0593701
ellipticity           -0.150181   0.0421979     -3.56    0.0004  -0.23291    -0.067452
distance_betweenness  -0.0143395  0.00224907    -6.38    <1e-09  -0.0187488  -0.00993018
──────

In [None]:
# helsinki1a_noint=lm(access_diff ~
#                       walk_sum+
#                       area_difference+
#                       ellipticity+
#                       distance_betweenness,
#                     data=helsinki)
helsinki1a_noint = lm(
    @formula(access_diff ~ walk_sum + area_difference + ellipticity + distance_betweenness),
    helsinki,
)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

access_diff ~ 1 + walk_sum + area_difference + ellipticity + distance_betweenness

Coefficients:
──────────────────────────────────────────────────────────────────────────────────────
                           Coef.  Std. Error       t  Pr(>|t|)   Lower 95%   Upper 95%
──────────────────────────────────────────────────────────────────────────────────────
(Intercept)            6.43826    0.0994535    64.74    <1e-99   6.24324     6.63327
walk_sum              -0.754409   0.0100793   -74.85    <1e-99  -0.774173   -0.734645
area_difference        0.0830943  0.00435907   19.06    <1e-75   0.0745469   0.0916418
ellipticity            0.26535    0.0736195     3.60    0.0003   0.120993    0.409706
distance_betweenness  -0.0471888  0.00469146  -10.06    <1e-22  -0.056388   -0.0379896
──────────────────

In [None]:
# madrid1a_noint=lm(access_diff ~
#                     walk_sum+
#                     area_difference+
#                     ellipticity+
#                     distance_betweenness,
#                   data=madrid)
madrid1a_noint = lm(
    @formula(access_diff ~ walk_sum + area_difference + ellipticity + distance_betweenness),
    madrid,
)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

access_diff ~ 1 + walk_sum + area_difference + ellipticity + distance_betweenness

Coefficients:
──────────────────────────────────────────────────────────────────────────────────────
                           Coef.  Std. Error       t  Pr(>|t|)   Lower 95%   Upper 95%
──────────────────────────────────────────────────────────────────────────────────────
(Intercept)            5.10217    0.0837467    60.92    <1e-99   4.93799     5.26635
walk_sum              -0.570749   0.00815176  -70.02    <1e-99  -0.58673    -0.554768
area_difference        0.0918017  0.00510078   18.00    <1e-69   0.081802    0.101801
ellipticity            0.0950967  0.0420187     2.26    0.0237   0.0127225   0.177471
distance_betweenness  -0.0418843  0.0031428   -13.33    <1e-39  -0.0480455  -0.0357231
───────────────────

In [None]:
# h1a = helsinki1a_noint
# m1a = madrid1a_noint
# h1 = helsinki1_noint
# m1 = madrid1_noint
# b1a = bp1a_noint
# b1 = bp1_noint
# b2 = bp2_noint

# stargazer(h1a, m1a, b1a, h1, m1, b2, b1,
#           type="latex",
#           style="aer",
#           column.labels = c("Helsinki - Access",
#                             "Madrid - Access",
#                             "BP - Access",
#                             "Helsinki - Gini",
#                             "Madrid - Gini",
#                             "BP residential - Gini",
#                             "BP experienced - Gini"),
#           dep.var.labels.include = F,
#           out="SI_Reg_1_noint.tex")

In [None]:
regtable(
    helsinki1a_noint,
    madrid1a_noint,
    bp1a_noint,
    helsinki1_noint,
    madrid1_noint,
    bp2_noint,
    bp1_noint;
    render = AsciiTable(),
    group = false,
    # labels=Dict("access_diff" => "Helsinki - Access")
)



--------------------------------------------------------------------------------------------------------------
                                  access_diff                    gini_diff         gini_diff_house   gini_diff
                       ---------------------------------   ---------------------   ---------------   ---------
                             (1)         (2)         (3)         (4)         (5)               (6)         (7)
--------------------------------------------------------------------------------------------------------------
(Intercept)             6.438***    5.102***    7.819***    0.135***    6.351***             0.002    0.076***
                         (0.099)     (0.084)     (0.063)     (0.003)     (0.171)           (0.003)     (0.002)
walk_sum               -0.754***   -0.571***   -0.872***                                                      
                         (0.010)     (0.008)     (0.006)                                                      
