# Create numpy grid for training for example CNNs

In [1]:
import geopandas as gpd
import pandas as pd
import shapely
import numpy as np
import pickle
import os

In [2]:
# load cbs_od_koppel file
with open('cbs_id_koppel.pickle', 'rb') as handle:
    b = pickle.load(handle)

In [3]:
def to_np_grid(year, grid_size = 100):
    COUNT = 0
    for filename in os.listdir("../Data/filled/" + str(year)):
        print(filename.strip("_filled_.csv"), year)

        df = pd.read_csv("../Data/filled/{}/{}".format(year, filename), index_col = 0)
        df.geometry = df.geometry.apply(shapely.wkt.loads)
        df = df.set_geometry("geometry")
        df.C28992R100 = df.C28992R100.map(b)
        
        df = df.drop(["BU_CODE"], axis = 1)
        """
        args: GeoDataFrame with geometry collumn
        """
        minx = df.geometry.total_bounds[0]
        miny = df.geometry.total_bounds[1]

        # translate geometry so that left lower corner is 0,0
        df["geometry"] = df["geometry"].translate(-minx, -miny)
        print(df.shape)
        # Get x,y,z shape of the df for the numpy zeros grid
        x = int((df.total_bounds / 100)[2])
        y = int((df.total_bounds / 100)[3])
        z = df.shape[1] -1 # n collumns (- geometry column)
        grid = np.zeros((x,y,z))

        # loop over items in gpd dataframe and add data to the numpy grid
        for _, i in df.iterrows():             
            grid[int(i.geometry.bounds[0]/100), int(i.geometry.bounds[1]/100)] = i.drop("geometry")
            COUNT += 1
    
        return grid, df
    # Returns grid with all columns 
#         np.save("../Data/filled/grids/{}/{}".format(year, filename.strip("_filled_.csv")), grid)


In [8]:
grid, df = to_np_grid(2015)

's-Gravenhage15 2015
(8818, 43)


In [10]:
grid.shape

(164, 135, 42)

In [16]:
grid[0,0]

array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       2.0000000e+00, 1.4950000e+02, 1.0700000e+03, 0.0000000e+00,
       1.0280374e-01, 2.0093457e-01, 2.2897196e-01, 3.1308413e-01,
       1.6822430e-01, 1.9626169e-01, 9.3457940e-03, 8.4000000e+01,
       1.5000000e+01, 0.0000000e+00, 3.0600000e+01, 4.6600000e+03,
       2.2900000e+03, 2.8037382e-02, 1.3000000e+00, 1.5000000e+00,
       1.1000000e+00, 1.5000000e+00, 9.0000000e-01, 8.3000000e+00,
       1.1900000e+01, 3.4800000e+02, 4.4300000e+01, 2.1100000e+01,
       1.9700000e+03, 1.9700000e+03, 1.9700000e+03, 0.0000000e+00,
       2.2388080e+06, 0.0000000e+00])

In [15]:
df.iloc[0]

bijeenkomstfunctiesum                                                           0.0
gezondheidszorgfunctiesum                                                       0.0
industriefunctiesum                                                             0.0
kantoorfunctiesum                                                               0.0
logiesfunctiesum                                                                0.0
onderwijsfunctiesum                                                             0.0
sportfunctiesum                                                                 0.0
winkelfunctiesum                                                                0.0
woonfunctiesum                                                                  1.0
oppervlakteVerblijfsobjectmean                                                148.0
inwoner_g                                                                     395.0
geboorte_g                                                                  

In [6]:
for year in [2015,2016,2017,2018,2019]:
    grid = to_np_grid(year)


's-Gravenhage15 2015
(8818, 43)
's-Hertogenbosch15 2015
(3528, 43)
Alkmaar15 2015
(1978, 43)
Amersfoortvbo.csv15 2015
(3835, 43)
Apeldoorn15 2015
(3495, 43)
Breda15 2015
(4404, 43)
Deventer15 2015
(1693, 43)
ndhoven15 2015
(6133, 43)
Enschede15 2015
(5908, 43)
Leeuwardenvbo.csv15 2015
(1758, 43)
Maastricht15 2015
(3284, 43)
Middelburg15 2015
(952, 43)
Nijmegen15 2015
(3494, 43)
Tilburg15 2015
(3550, 43)
Utrecht15 2015
(6288, 43)
Venlovbo.csv15 2015
(2336, 43)
Zwolle15 2015
(3947, 43)
's-Gravenhage16 2016
(8926, 43)
's-Hertogenbosch16 2016
(3501, 43)
Alkmaar16 2016
(1987, 43)
Amersfoortvbo.csv16 2016
(3870, 43)
Apeldoorn16 2016
(3504, 43)
Breda16 2016
(4434, 43)
Deventer16 2016
(1721, 43)
ndhoven16 2016
(6161, 43)
Enschede16 2016
(5929, 43)
Leeuwardenvbo.csv16 2016
(1770, 43)
Maastricht16 2016
(3283, 43)
Middelburg16 2016
(967, 43)
Nijmegen16 2016
(3571, 43)
Tilburg16 2016
(3834, 43)
Utrecht16 2016
(6423, 43)
Venlovbo.csv16 2016
(2336, 43)
Zwolle16 2016
(3965, 43)
's-Gravenhage17 2017
(

In [41]:
(grid[:,-1] == 1).sum()

6

In [39]:
np.where(grid == 2008981)

(array([150], dtype=int64),
 array([132], dtype=int64),
 array([40], dtype=int64))

In [51]:
np.unique(grid[:,:,-3])

array([0., 1.])

In [47]:
grid.shape

(164, 135, 42)

# #############################################

create cbs C28992R100 to id file

In [232]:
vierkant = gpd.read_file("../Data/NLvierkant/NL_vierkant100m.shp")

KeyboardInterrupt: 

In [174]:
vierkant.reset_index().rename(columns = {"index" : "id"}).id.values

array([      0,       1,       2, ..., 3723285, 3723286, 3723287],
      dtype=int64)

In [193]:
zip_iterator = zip(vierkant.reset_index().rename(columns = {"index" : "id"}).C28992R100.values, vierkant.reset_index().rename(columns = {"index" : "id"}).id.values)

In [194]:
cbs_vierkant_dict = dict(zip_iterator)

In [200]:
br.C28992R100 = br.C28992R100.map(cbs_vierkant_dict)

In [198]:
br.C28992R100.iloc[0]

'E1824N3269'

In [204]:
with open('cbs_id_koppel.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(cbs_vierkant_dict, f, pickle.HIGHEST_PROTOCOL)

In [6]:
def load_data(year):
    os.getcwd()
    df = pd.DataFrame()
    path = "../Data/filled/" + str(year) + "/"
    for filename in os.listdir(path):
        df1 = pd.read_csv(path + filename)
        if df1.geometry.isna().any():
            print(filename)
        df = pd.concat([df, df1])
    df = gpd.GeoDataFrame(df)
    df.geometry = df.geometry.apply(shapely.wkt.loads)
    
    df = df.reset_index()
    df = df.drop(["Unnamed: 0", "index"], axis = 1)
    return df

In [7]:
import libpysal

In [8]:
df15 = load_data(2015)
df16 = load_data(2016)
df17 = load_data(2017)
df18 = load_data(2018)
df19 = load_data(2019)

In [9]:
df15

Unnamed: 0,bijeenkomstfunctiesum,gezondheidszorgfunctiesum,industriefunctiesum,kantoorfunctiesum,logiesfunctiesum,onderwijsfunctiesum,sportfunctiesum,winkelfunctiesum,woonfunctiesum,oppervlakteVerblijfsobjectmean,...,mean_WOZ,PLaagste40Inkomen,PHoogste20Inkomen,bouwjaarmean,bouwjaarmin,bouwjaarmax,C28992R100,geometry,y,BU_CODE
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,88.000000,...,357.786037,44.9,16.1,1978.000000,1948.000000,2008.000000,E1459N4184,"POLYGON ((145900.000 418500.000, 146000.000 41...",0.0,{'BU02630209': 4}
1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,200.000000,...,356.589383,44.9,16.1,1974.000000,1930.000000,1994.000000,E1462N4184,"POLYGON ((146200.000 418500.000, 146300.000 41...",0.0,{'BU02630209': 6}
2,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,375.666667,...,356.589383,44.9,16.1,1989.500000,1970.000000,2011.000000,E1463N4184,"POLYGON ((146300.000 418500.000, 146400.000 41...",0.0,{'BU02630209': 9}
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,228.000000,...,359.841218,42.0,16.0,1973.000000,1973.000000,1973.000000,E1496N4184,"POLYGON ((149600.000 418500.000, 149700.000 41...",0.0,{'BU02630009': 2}
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,121.000000,...,346.000000,44.2,20.9,1970.851618,1949.308454,1983.316989,E1514N4184,"POLYGON ((151400.000 418500.000, 151500.000 41...",1.0,{'BU02630002': 2}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56362,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,141.000000,...,275.000000,39.0,21.1,1961.500000,1959.000000,1964.000000,E2057N4952,"POLYGON ((205700.000 495300.000, 205800.000 49...",0.0,{'BU01935200': 2}
56363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,246.000000,...,282.000000,31.3,20.2,1985.000000,1985.000000,1985.000000,E2101N4952,"POLYGON ((210100.000 495300.000, 210200.000 49...",0.0,{'BU01771008': 7}
56364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,116.333333,...,499.000000,39.3,23.7,1983.000000,1969.000000,1997.000000,E2046N4951,"POLYGON ((204600.000 495200.000, 204700.000 49...",0.0,{'BU01935210': 4}
56365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,188.000000,...,499.000000,39.3,23.7,2001.500000,1999.000000,2004.000000,E2054N4951,"POLYGON ((205400.000 495200.000, 205500.000 49...",0.0,{'BU01935210': 2}


In [5]:
with open('cbs_id_koppel.pickle', 'rb') as handle:
    b = pickle.load(handle)

In [13]:
# df15["id"] = df15.C28992R100.map(b)
# df16["id"] = df16.C28992R100.map(b)
# df17["id"] = df17.C28992R100.map(b)
# df18["id"] = df18.C28992R100.map(b)
# df19["id"] = df19.C28992R100.map(b)

# df15 = df15.set_index("id")
# df16 = df16.set_index("id")
# df17 = df17.set_index("id")
# df18 = df18.set_index("id")
# df19 = df19.set_index("id")

w15 = libpysal.weights.DistanceBand.from_dataframe(df15, threshold=150, binary = True, silence_warnings = True)
w16 = libpysal.weights.DistanceBand.from_dataframe(df16, threshold=150, binary = True, silence_warnings = True)
w17 = libpysal.weights.DistanceBand.from_dataframe(df17, threshold=150, binary = True, silence_warnings = True)
w18 = libpysal.weights.DistanceBand.from_dataframe(df18, threshold=150, binary = True, silence_warnings = True)
w19 = libpysal.weights.DistanceBand.from_dataframe(df19, threshold=150, binary = True, silence_warnings = True)

In [14]:
with open('w15c.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(w15, f, pickle.HIGHEST_PROTOCOL)
    
with open('w16c.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(w16, f, pickle.HIGHEST_PROTOCOL)
    
with open('w17c.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(w17, f, pickle.HIGHEST_PROTOCOL)
    
with open('w18c.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(w18, f, pickle.HIGHEST_PROTOCOL)
    
with open('w19c.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(w19, f, pickle.HIGHEST_PROTOCOL)