In [4]:
from pysal.lib import weights
from matplotlib import pyplot as plt

import pandas as pd
import geopandas as gpd
import folium 
import segregation as seg

import sys
import os

os.environ["USE_PYGEOS"] = "0"
import geopandas as gpd

sys.path.insert(0, "../src/helper/")

from landprice_merger import (
    zensus_landprice_merger,
    landprice_neighborhood_merger,
    combine_landprice_with_geodata,
)
from csv_merger import combine_csvs
from principal_component_analysis import combine_PCA_datasets


In [5]:
cities = ["Berlin", "Bremen", "Dresden", "Frankfurt", "Köln"]

main_path = os.path.join(os.getcwd(), "..")

path_zensus = os.path.join(main_path, "res", "data", "DLR", "2 Zensus")
path_land = os.path.join(main_path, "res", "data", "DLR", "1 Land Prices")
path_neigh = os.path.join(main_path, "res", "data", "DLR", "3 Neighborhoods")


In [6]:
df_zensus = combine_csvs(str_path=path_zensus)

gdf_zensus = combine_PCA_datasets(
    df_zensus=df_zensus, str_city=cities[0], str_path=path_zensus
)


In [7]:
df_land_prices = combine_csvs(str_path=path_land)

gdf_landprices = combine_landprice_with_geodata(
    df_landprice=df_land_prices, str_city=cities[0], str_path=path_land
)

gdf_landprices_names = landprice_neighborhood_merger(
    gdf_landprice=gdf_landprices, str_city=cities[0], str_path_neigh=path_neigh
)


In [41]:
result = zensus_landprice_merger(
    gdf_landprices=gdf_landprices_names, gdf_zensus=gdf_zensus
)

In [42]:
result.columns

Index(['Grid_Code', 'City_Code', 'geometry', 'buildings_total_units',
       'n_owned_without_current_household',
       'n_rented_without_current_household', 'n_vacant',
       'we_private_individuals', 'we_private_companies_legal_entities',
       'we_public_companies', 'we_housing_cooperative', 'w_less_30', 'r_6',
       'r_7_and_more', 'gs_other_building_with_living_space', 'j_1979_1995',
       'j_1996_2008', 'j_2009_and_later', 'e_community_of_apartment_owners',
       'e_private_housing_company', 'e_other_private_company',
       'e_federal_or_state_government', 'e_nonprofit_organization',
       'gt_semidetached_house', 'gt_other_type_of_building',
       'gg_detached_single_family_house', 'gg_detached_twofamily',
       'gg_twofamily_semidetached_house', 'gg_twofamily_terraced_house',
       'gg_apartment_3_6', 'h_floor', 'h_stoves_night', 'h_no_heating',
       'z_1_apart', 'z_2_apart', 'z_3_6_apart', 'City', 'families_total_units',
       'famk_couple_children_more_18', 'fam

In [43]:
single_var_1 = "hhgr_6_more"
single_var_2 = "hhleb_single_mothers"
total_pop_var = "households_total_units"


result[total_pop_var] = result[total_pop_var].astype(int) # do this in utils
result[single_var_1] = result[single_var_1].astype(int)
result[single_var_2] = result[single_var_2].astype(int)

In [44]:
# A-spatial segregation index     
int1 = seg.singlegroup.Interaction(data = result, group_pop_var = single_var_1, total_pop_var = total_pop_var)
int2 = seg.singlegroup.Interaction(data = result, group_pop_var = single_var_2, total_pop_var = total_pop_var)

print("Interaction of owner category %s: %.2f and Interaction of owner category %s: %.2f" %(single_var_1, int1.statistic, single_var_2, int2.statistic))


Interaction of owner category hhgr_6_more: 0.92 and Interaction of owner category hhleb_single_mothers: 0.88


In [45]:
result_grouped = result.groupby("Neighborhood_Name").mean().reset_index()

In [46]:
zensus_bremen_grid = gpd.read_file("D:/ifo_hack/ifoHack_DLR_Challenge_Data/3 Neighborhoods/Neighborhoods_Berlin.gpkg")

idx_column = "Neighborhood_Name"

result_grouped = result_grouped.merge(zensus_bremen_grid, on = ["Neighborhood_Name", "Neighborhood_FID"])
#result_grouped = result_grouped.reset_index().merge(zensus_bremen_grid, how="left").set_index(result_grouped.index.names)

In [47]:
gdf = gpd.GeoDataFrame(
    result_grouped, geometry = result_grouped["geometry"])


In [48]:
m = gdf.explore(height = 500, width = 1000, color = "gray", name = "Zensus Neighborhood Cells")

In [49]:
folium.LayerControl().add_to(m)
m

In [50]:
dint1 = seg.singlegroup.DistanceDecayInteraction(data = gdf, group_pop_var = single_var_1, total_pop_var = total_pop_var)
dint2 = seg.singlegroup.DistanceDecayInteraction(data = gdf, group_pop_var = single_var_2, total_pop_var = total_pop_var)
    
print("Interaction (spatial) of households category %s: %.2f and Interaction (spatial) of households category %s: %.2f" %(single_var_1, dint1.statistic, 
                                                                                                            single_var_2, dint2.statistic))

gdf[single_var_1 + "_perc"] = [x * 100 / y if y != 0 else 0 for (x,y) in zip(gdf[single_var_1], gdf[total_pop_var])]
 
m = gdf.explore(height=500, width=1000, name="Seniors > 65yo",
                             column = single_var_1 + "_perc", scheme = "EqualInterval", cmap = "inferno", legend = True)


folium.LayerControl().add_to(m)
m

Interaction (spatial) of households category hhgr_6_more: 1.01 and Interaction (spatial) of households category hhleb_single_mothers: 0.92
