# Clean and Preprocess Transaction Data 

In [1]:
import pandas as pd

In [2]:
df_total = pd.read_parquet('../../housing-data/original/df_total_ml.parquet')
df_spatial = pd.read_parquet('../../housing-data/original/ml_dataset_spatial.parquet')

In [3]:
extra_total = df_total.columns.difference(df_spatial.columns)
df_combined = df_spatial.merge(df_total[extra_total.append(pd.Index(["TRANSID"]))], on="TRANSID", how='inner')

In [4]:
# Repeated column
df_combined.drop(["POSTCODE4"], inplace=True, axis=1)

In [5]:
# Define Transaction Features

transaction_features = ['TRANSID', 'DATUM', 'KOOPSOM', 'WONINGTYPE', 'SOC', 'CALCOPP', 'KAVOPP',
       'BOUWJAAR', 'PROVINCIECODE','COROPPLUSCODE', 'GEMEENTECODE', 'WIJKCODE', 'BUURTCODE',
       'POSTCODE', 'LAT', 'LON', 'BESTEMMING', 'DUMMY_BERGING',
       'DUMMY_GARAGE', 'DUMMY_MONUMENT', 'FOTOWIJZER_TYPE']

In [6]:
# Drop all other neighborhood Features
df_combined = df_combined[transaction_features]
df_combined

Unnamed: 0,TRANSID,DATUM,KOOPSOM,WONINGTYPE,SOC,CALCOPP,KAVOPP,BOUWJAAR,PROVINCIECODE,COROPPLUSCODE,...,WIJKCODE,BUURTCODE,POSTCODE,LAT,LON,BESTEMMING,DUMMY_BERGING,DUMMY_GARAGE,DUMMY_MONUMENT,FOTOWIJZER_TYPE
0,8324024,13/3/2023,378000,T,1131,124,119,1976,28,3002,...,WK197804,BU19780402,3381CE,51.850627,4.893862,permanent,1,0,0,6
1,4695633,1/6/2016,257000,T,1131,145,208,2000,27,2000,...,WK037507,BU03750702,1945RW,52.492774,4.660815,permanent,1,0,0,6
2,6267016,16/11/2020,397500,T,1131,140,130,2014,25,1500,...,WK026809,BU02680970,6663JD,51.875469,5.868624,permanent,1,0,0,6
3,6428993,1/2/2021,1100000,T,1136,261,162,1912,28,2601,...,WK051809,BU05180907,2582JB,52.091945,4.272226,permanent,1,0,0,6
4,5382171,4/12/2018,180000,A,1181,50,0,1974,26,1701,...,WK073601,BU07360102,3641MC,52.213655,4.862490,permanent,1,0,0,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1613935,4527050,2/9/2015,196000,T,1131,125,155,1975,26,1702,...,WK034206,BU03420601,3766BB,52.167385,5.285358,permanent,1,0,0,6
1613936,8518792,27/11/2023,359037,T,1138,123,98,1970,25,1300,...,WK022803,BU02280301,6714MA,52.046115,5.638586,permanent,0,1,0,6
1613937,6068581,16/6/2020,246000,A,1184,93,0,2001,26,1702,...,WK030724,BU03072403,3824VS,52.199698,5.369747,permanent,1,0,0,10
1613938,4943077,5/5/2017,350000,V,1111,165,367,1935,28,3002,...,WK192708,BU19270801,2964GB,51.940161,4.853196,permanent,1,0,0,1


## Handle NaNs

In [7]:
nan_percentages = df_combined.isna().mean()*100
nan_percentages

TRANSID            0.000000
DATUM              0.000000
KOOPSOM            0.000000
WONINGTYPE         0.000000
SOC                0.000000
CALCOPP            0.000000
KAVOPP             0.000000
BOUWJAAR           0.000000
PROVINCIECODE      0.000000
COROPPLUSCODE      0.000000
GEMEENTECODE       0.024226
WIJKCODE           0.024226
BUURTCODE          0.024226
POSTCODE           0.024226
LAT                0.457266
LON                0.457266
BESTEMMING         0.000000
DUMMY_BERGING      0.000000
DUMMY_GARAGE       0.000000
DUMMY_MONUMENT     0.000000
FOTOWIJZER_TYPE    0.000000
dtype: float64

In [8]:
# Drop rows where Buurtcode is NaN
df_combined.dropna(subset=["BUURTCODE"], inplace=True)
nan_percentages = df_combined.isna().mean()*100
nan_percentages

TRANSID            0.000000
DATUM              0.000000
KOOPSOM            0.000000
WONINGTYPE         0.000000
SOC                0.000000
CALCOPP            0.000000
KAVOPP             0.000000
BOUWJAAR           0.000000
PROVINCIECODE      0.000000
COROPPLUSCODE      0.000000
GEMEENTECODE       0.000000
WIJKCODE           0.000000
BUURTCODE          0.000000
POSTCODE           0.000000
LAT                0.442751
LON                0.442751
BESTEMMING         0.000000
DUMMY_BERGING      0.000000
DUMMY_GARAGE       0.000000
DUMMY_MONUMENT     0.000000
FOTOWIJZER_TYPE    0.000000
dtype: float64

In [9]:
# Drop rows where LAT is NaN
df_combined.dropna(subset=["LAT"], inplace=True)
nan_percentages = df_combined.isna().mean()*100
nan_percentages = nan_percentages[nan_percentages>0]
nan_percentages

Series([], dtype: float64)

## Log Transform Target

In [10]:
import numpy as np
df_combined["LOG_KOOPSOM"] = np.log(df_combined["KOOPSOM"])

## Split Date into Year and Month

In [11]:
df_combined["DATUM"] = pd.to_datetime(df_combined["DATUM"])

  df_combined["DATUM"] = pd.to_datetime(df_combined["DATUM"])


In [12]:
df_combined.drop(["KOOPSOM"], axis=1, inplace=True)

## Remove other location features apart from BUURTCODE

In [13]:
df_combined.drop([ 'WIJKCODE', 'POSTCODE'], axis=1, inplace=True)
# df_rotterdam.drop([ 'WIJKCODE', 'POSTCODE'], axis=1, inplace=True)

## Encode Categorical Columns

In [14]:
from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
cat_cols = ['WONINGTYPE','BESTEMMING']
df_combined[cat_cols] = enc.fit_transform(df_combined[cat_cols])
enc.categories_

[array(['A', 'H', 'K', 'T', 'V'], dtype=object),
 array(['doelgroep', 'permanent', 'recreatief'], dtype=object)]

## Create Train-Test Splits

In [37]:
df_combined.dtypes

TRANSID                     int64
DATUM              datetime64[ns]
WONINGTYPE                float64
SOC                         int64
CALCOPP                     int64
KAVOPP                      int64
BOUWJAAR                    int64
PROVINCIECODE               int64
COROPPLUSCODE               int64
GEMEENTECODE               object
BUURTCODE                  object
LAT                       float64
LON                       float64
BESTEMMING                float64
DUMMY_BERGING               int64
DUMMY_GARAGE                int64
DUMMY_MONUMENT              int64
FOTOWIJZER_TYPE             int64
LOG_KOOPSOM               float64
dtype: object

## Save Files

In [38]:
df_combined.to_csv("../../housing-data/transaction_data.csv", index=False)

# df_rotterdam.to_csv("../../housing-data/rotterdam_transaction_data.csv", index=False)

In [39]:
df_combined = pd.read_csv("../../housing-data/transaction_data.csv")


df_combined

Unnamed: 0,TRANSID,DATUM,WONINGTYPE,SOC,CALCOPP,KAVOPP,BOUWJAAR,PROVINCIECODE,COROPPLUSCODE,GEMEENTECODE,BUURTCODE,LAT,LON,BESTEMMING,DUMMY_BERGING,DUMMY_GARAGE,DUMMY_MONUMENT,FOTOWIJZER_TYPE,LOG_KOOPSOM
0,8324024,2023-03-13,3.0,1131,124,119,1976,28,CP3002,GM1978,BU19780402,51.850627,4.893862,1.0,1,0,0,6,12.842649
1,4695633,2016-06-01,3.0,1131,145,208,2000,27,CP2000,GM0375,BU03750702,52.492774,4.660815,1.0,1,0,0,6,12.456831
2,6267016,2020-11-16,3.0,1131,140,130,2014,25,CP1500,GM0268,BU02680970,51.875469,5.868624,1.0,1,0,0,6,12.892950
3,6428993,2021-02-01,3.0,1136,261,162,1912,28,CP2601,GM0518,BU05180907,52.091945,4.272226,1.0,1,0,0,6,13.910821
4,5382171,2018-12-04,0.0,1181,50,0,1974,26,CP1701,GM0736,BU07360102,52.213655,4.862490,1.0,1,0,0,9,12.100712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1606347,4527050,2015-09-02,3.0,1131,125,155,1975,26,CP1702,GM0342,BU03420601,52.167385,5.285358,1.0,1,0,0,6,12.185870
1606348,8518792,2023-11-27,3.0,1138,123,98,1970,25,CP1300,GM0228,BU02280301,52.046115,5.638586,1.0,0,1,0,6,12.791181
1606349,6068581,2020-06-16,0.0,1184,93,0,2001,26,CP1702,GM0307,BU03072403,52.199698,5.369747,1.0,1,0,0,10,12.413087
1606350,4943077,2017-05-05,4.0,1111,165,367,1935,28,CP3002,GM1978,BU19780602,51.940161,4.853196,1.0,1,0,0,1,12.765688


In [None]:
adj_2023 = pd.read_csv("../../housing-data/buurt_adj_2023.csv",index_col=0)
adj_2023

In [None]:
set(df_combined["BUURTCODE"]).difference(set(adj_2023.columns.values))

## Extract COROP-plus region of Rotterdam (CP:2910)

In [40]:

df_rotterdam = df_combined[df_combined["COROPPLUSCODE"] =="CP2910"]
df_rotterdam

Unnamed: 0,TRANSID,DATUM,WONINGTYPE,SOC,CALCOPP,KAVOPP,BOUWJAAR,PROVINCIECODE,COROPPLUSCODE,GEMEENTECODE,BUURTCODE,LAT,LON,BESTEMMING,DUMMY_BERGING,DUMMY_GARAGE,DUMMY_MONUMENT,FOTOWIJZER_TYPE,LOG_KOOPSOM
34,7537440,2021-11-29,3.0,1131,97,132,1976,28,CP2910,GM0489,BU04890512,51.849001,4.541944,1.0,1,0,0,6,12.608199
41,7308578,2021-09-10,2.0,1121,181,457,1976,28,CP2910,GM0489,BU04890512,51.848052,4.544069,1.0,1,1,0,2,13.270783
47,4689877,2016-06-15,3.0,1131,90,109,2000,28,CP2910,GM0599,BU05990664,51.949338,4.512931,1.0,1,0,0,6,12.301383
54,5421309,2019-02-01,4.0,1111,196,346,1993,28,CP2910,GM0613,BU06130309,51.856716,4.403271,1.0,1,0,0,1,13.011432
55,6215385,2020-10-29,3.0,1131,87,111,1900,28,CP2910,GM0622,BU06220402,51.908115,4.350812,1.0,1,0,0,6,12.528156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1606291,5490122,2019-05-08,0.0,1183,85,0,1968,28,CP2910,GM0502,BU05020335,51.932054,4.596247,1.0,1,0,0,9,12.107357
1606293,5022491,2017-08-31,4.0,1111,232,756,1992,28,CP2910,GM1992,BU19920505,51.847523,4.156492,1.0,1,1,0,1,13.226904
1606296,5014000,2017-08-01,3.0,1171,111,104,1980,28,CP2910,GM0599,BU05991467,51.944781,4.566412,1.0,1,0,0,4,12.122691
1606320,4641789,2016-03-02,2.0,1121,100,104,1935,28,CP2910,GM0502,BU05020229,51.923281,4.586171,1.0,1,0,0,2,12.100712


In [41]:
import duckdb as db


gemeente_rotterdam = list(db.query("""select coroppluscode, gemeentecode from df_rotterdam group by coroppluscode, gemeentecode """).df()["GEMEENTECODE"].values)

In [42]:
gemeente_rotterdam

['GM1930',
 'GM0502',
 'GM0622',
 'GM0597',
 'GM0556',
 'GM0489',
 'GM0606',
 'GM1621',
 'GM1992',
 'GM0613',
 'GM0542',
 'GM0599']

In [43]:
import geopandas as gpd
import libpysal as lp

map_2023_path = "../../housing-data/cbsgebiedsindelingen2016-2025/cbsgebiedsindelingen2023.gpkg"


def read_file(file_path, layer="buurt_gegeneraliseerd"):
    gdf = gpd.read_file(file_path, layer=layer)
    #gdf = gdf[gdf['indelingswijziging_wijken_en_buurten'] > 0] -- Wel of niet???
    gdf_neighbors = lp.weights.Queen.from_dataframe(gdf, use_index=False)
    gdf_neighbors.to_sparse()
    codes = gdf.iloc[:, :1].to_numpy().flatten()
    adj_df = pd.DataFrame.sparse.from_spmatrix(
        gdf_neighbors.to_sparse(), index=codes, columns=codes
    )
    return adj_df, gdf

adj_df_2023, gdf_2023 = read_file(map_2023_path)

 There are 10 disconnected components.
 There are 4 islands with ids: 3025, 10085, 10252, 10810.
  W.__init__(self, neighbors, ids=ids, **kw)


In [48]:
buurten_rotterdam = list(gdf_2023[gdf_2023["gm_code"].isin(gemeente_rotterdam)]["statcode"].values)
print(buurten_rotterdam)
# gdf_2023[gdf_2023["gm_code"].isin(gemeente_rotterdam)]
# gdf_2023[gdf_2023["statcode"] == "BU19920505"]

['BU04890101', 'BU04890102', 'BU04890103', 'BU04890204', 'BU04890205', 'BU04890206', 'BU04890207', 'BU04890308', 'BU04890409', 'BU04890410', 'BU04890511', 'BU04890512', 'BU04890513', 'BU04890614', 'BU04890715', 'BU04890816', 'BU04890817', 'BU04890918', 'BU04890919', 'BU04890920', 'BU04890921', 'BU04890922', 'BU04891023', 'BU04891124', 'BU04891225', 'BU04891226', 'BU04891227', 'BU04891328', 'BU04891429', 'BU04891430', 'BU04891431', 'BU04891432', 'BU04891533', 'BU04891634', 'BU04891735', 'BU04891836', 'BU04891937', 'BU04892039', 'BU04893050', 'BU04893051', 'BU04893052', 'BU04893053', 'BU04893054', 'BU04893160', 'BU04893161', 'BU04893162', 'BU04893163', 'BU04893164', 'BU04895070', 'BU04895071', 'BU04895072', 'BU04895073', 'BU04895074', 'BU04895075', 'BU04895076', 'BU04895077', 'BU04895078', 'BU04895080', 'BU04895081', 'BU04895082', 'BU04895083', 'BU04895084', 'BU05020110', 'BU05020111', 'BU05020112', 'BU05020113', 'BU05020114', 'BU05020115', 'BU05020116', 'BU05020117', 'BU05020118', 'BU05

In [49]:
rotterdam_adj_2023 = adj_2023.loc[buurten_rotterdam, buurten_rotterdam]
rotterdam_adj_2023

Unnamed: 0,BU04890101,BU04890102,BU04890103,BU04890204,BU04890205,BU04890206,BU04890207,BU04890308,BU04890409,BU04890410,BU04890511,BU04890512,BU04890513,BU04890614,BU04890715,BU04890816,BU04890817,BU04890918,BU04890919,BU04890920,BU04890921,BU04890922,BU04891023,BU04891124,BU04891225,BU04891226,BU04891227,BU04891328,BU04891429,BU04891430,BU04891431,BU04891432,BU04891533,BU04891634,BU04891735,BU04891836,BU04891937,BU04892039,BU04893050,BU04893051,...,BU06060403,BU06060404,BU06060501,BU06060502,BU06060503,BU06060504,BU06060505,BU06060506,BU06060507,BU06060601,BU06060602,BU06060603,BU06060604,BU06060605,BU06060606,BU06060607,BU06060608,BU06060609,BU06060610,BU06060701,BU06060702,BU06060703,BU06060704,BU06060705,BU06060706,BU06060801,BU06060802,BU06060901,BU06060902,BU06060903,BU06060904,BU06061001,BU06061002,BU06061003,BU06061004,BU06061005,BU06061006,BU06061007,BU06061101,BU06061102
BU04890101,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BU04890102,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BU04890103,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BU04890204,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BU04890205,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BU06061005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0
BU06061006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
BU06061007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
BU06061101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0


In [50]:
rotterdam_adj_2023.to_csv("../../housing-data/rotterdam_adj_2023.csv")

In [52]:
df_combined.drop(["PROVINCIECODE","COROPPLUSCODE","GEMEENTECODE"],axis=1, inplace=True)
df_rotterdam.drop(["PROVINCIECODE","COROPPLUSCODE","GEMEENTECODE"],axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rotterdam.drop(["PROVINCIECODE","COROPPLUSCODE","GEMEENTECODE"],axis=1, inplace=True)


In [53]:
df_combined.to_csv("../../housing-data/transaction_data.csv", index=False)

df_rotterdam.to_csv("../../housing-data/rotterdam_transaction_data.csv", index=False)

Encoding buurten for rotterdam

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [58]:
transactions_rot = pd.read_csv("../../housing-data/rotterdam_transaction_data.csv")
features_rot = pd.read_csv("../../housing-data/all_neighborhood_features_rotterdam.csv")
adj_rot = pd.read_csv("../../housing-data/rotterdam_adj_2023.csv", index_col=0)

In [59]:
buurten = adj_rot.columns.values
label_encoder = LabelEncoder()
label_encoder.fit(buurten)

transactions_rot["BUURTCODE"] = label_encoder.transform(transactions_rot["BUURTCODE"])
features_rot["BUURTCODE"] = label_encoder.transform(features_rot["BUURTCODE"])

features_rot

Unnamed: 0,BUURTCODE,YEAR,LEEFBAAROMETER,GROEN,EC,NO2,PM2_5,PM10,GELUIDSHINDERTOTAAL,AFSTANDTOTHUISARTSENPRAKTIJK,HUISARTSENPRAKTIJKBINNEN1KM,HUISARTSENPRAKTIJKBINNEN3KM,HUISARTSENPRAKTIJKBINNEN5KM,AFSTANDTOTHUISARTSENPOST,AFSTANDTOTAPOTHEEK,AFSTANDTOTZIEKENHUISINCLBUITENPOLIKLINIEK,ZIEKENHUISINCLBUITENPOLIKLINIEKBINNEN5KM,ZIEKENHUISINCLBUITENPOLIKLINIEKBINNEN10KM,ZIEKENHUISINCLBUITENPOLIKLINIEKBINNEN20KM,AFSTANDTOTZIEKENHUISEXCLBUITENPOLIKLINIEK,ZIEKENHUISEXCLBUITENPOLIKLINIEKBINNEN5KM,ZIEKENHUISEXCLBUITENPOLIKLINIEKBINNEN10KM,ZIEKENHUISEXCLBUITENPOLIKLINIEKBINNEN20KM,AFSTANDTOTGROTESUPERMARKT,GROTESUPERMARKTBINNEN1KM,GROTESUPERMARKTBINNEN3KM,GROTESUPERMARKTBINNEN5KM,AFSTANDTOTOVDAGELLEVENSMIDDELEN,OVDAGELLEVENSMIDDELENBINNEN1KM,OVDAGELLEVENSMIDDELENBINNEN3KM,OVDAGELLEVENSMIDDELENBINNEN5KM,AFSTANDTOTWARENHUIS,WARENHUISBINNEN5KM,WARENHUISBINNEN10KM,WARENHUISBINNEN20KM,AFSTANDTOTCAFEED,CAFEEDBINNEN1KM,CAFEEDBINNEN3KM,CAFEEDBINNEN5KM,AFSTANDTOTCAFETARIAED,...,AARDGASVERBRUIKTWEEONDEREENKAPWONING,AARDGASVERBRUIKVRIJSTAANDEWONING,AARDGASVERBRUIKHUURWONING,AARDGASVERBRUIKEIGENWONING,AANTALINKOMENSONTVANGERS,GEMIDDELDINKOMENPERINKOMENSONTVANGER,GEMIDDELDINKOMENPERINWONER,K_40PERSONENMETLAAGSTEINKOMEN,K_20PERSONENMETHOOGSTEINKOMEN,K_40HUISHOUDENSMETLAAGSTEINKOMEN,K_20HUISHOUDENSMETHOOGSTEINKOMEN,HUISHOUDENSMETEENLAAGINKOMEN,HUISHONDEROFRONDSOCIAALMINIMUM,PERSONENPERSOORTUITKERINGBIJSTAND,PERSONENPERSOORTUITKERINGAO,PERSONENPERSOORTUITKERINGWW,PERSONENPERSOORTUITKERINGAOW,BEDRIJFSVESTIGINGENTOTAAL,ALANDBOUWBOSBOUWENVISSERIJ,BFNIJVERHEIDENENERGIE,GIHANDELENHORECA,HJVERVOERINFORMATIEENCOMMUNICATIE,KLFINANCIELEDIENSTENONROERENDGOED,MNZAKELIJKEDIENSTVERLENING,RUCULTUURRECREATIEOVERIGEDIENSTEN,PERSONENAUTOSTOTAAL,PERSONENAUTOSBRANDSTOFBENZINE,PERSONENAUTOSOVERIGEBRANDSTOF,PERSONENAUTOSPERHUISHOUDEN,PERSONENAUTOSNAAROPPERVLAKTE,MOTORFIETSEN,AFSTANDTOTSCHOOL,SCHOLENBINNEN3KM,OPPERVLAKTETOTAAL,OPPERVLAKTELAND,OPPERVLAKTEWATER,MEESTVOORKOMENDEPOSTCODE,DEKKINGSPERCENTAGE,MATEVANSTEDELIJKHEID,OMGEVINGSADRESSENDICHTHEID
0,0,2014,4.085167,0.019481,0.000689,0.021756,0.010006,0.017313,0.323869,0.400000,6.799998,10.399997,25.399992,3.199999,0.400000,3.399999,2.399999,7.899998,23.999992,3.399999,1.400000,5.899998,13.999996,0.200000,2.599999,3.999999,16.099995,0.100000,18.499994,21.299993,64.299980,0.500000,2.399999,12.699996,42.799987,0.600000,2.999999,3.299999,10.899997,0.200000,...,1649.999480,2549.999196,1249.999606,1649.999480,299.999905,28.099991,22.199993,44.399986,20.999993,38.999988,19.799994,10.099997,6.699998,9.999997,9.999997,0.000000,89.999972,154.999951,0.0,9.999997,94.999970,0.000000,14.999995,14.999995,19.999994,209.999934,184.999942,24.999992,1.100000,1633.999485,9.999997,0.800000,9.099997,12.999996,12.999996,0.0,2990.999057,1.000000,1.999999,2086.999342
1,1,2014,4.034464,0.041062,0.000660,0.020684,0.009676,0.016755,0.304635,0.293726,4.200780,8.895091,20.523754,3.519244,0.293726,3.616934,1.954893,5.864679,24.043927,3.617044,0.977446,3.909786,14.270008,0.293562,2.931302,3.910168,13.389487,0.293507,18.660324,20.526376,46.914099,0.586959,1.954893,11.336959,40.954243,0.684650,1.758420,2.932339,7.818370,0.293671,...,2052.856079,2736.822816,1417.133524,1710.422087,293.725532,27.275126,19.845878,40.560205,19.065887,32.251582,17.504267,4.986725,5.571991,9.790851,9.807237,9.790851,107.611968,19.570778,0.0,4.889963,4.887232,4.887232,4.889963,4.895426,0.005462,161.579084,142.005575,19.573509,0.879811,896.259696,9.796313,0.293398,8.797291,17.599499,17.599499,0.0,2923.542404,0.977446,1.954893,1982.046788
2,2,2014,4.110514,0.031598,0.000684,0.022700,0.009859,0.017139,0.322656,0.200000,6.899998,11.699997,32.599991,2.799999,0.400000,2.999999,3.099999,8.999998,24.899993,2.999999,2.099999,6.999998,14.899996,0.500000,1.899999,5.899998,21.399994,0.300000,17.599995,27.399993,93.299975,0.500000,2.999999,12.499997,42.699988,0.300000,2.999999,4.599999,21.299994,0.600000,...,1449.999609,2549.999313,999.999730,1499.999596,499.999865,28.499992,22.599994,42.299989,18.299995,49.899987,15.599996,9.299997,8.399998,19.999995,29.999992,19.999995,189.999949,74.999980,0.0,9.999997,19.999995,9.999997,9.999997,14.999996,9.999997,329.999911,289.999922,39.999989,0.900000,2419.999348,29.999992,0.300000,12.199997,13.999996,13.999996,0.0,2991.999193,1.000000,1.999999,1891.999490
3,3,2014,4.110993,0.037557,0.000684,0.022711,0.009891,0.017191,0.325598,0.400000,6.999998,10.999997,33.099992,2.799999,0.400000,2.999999,2.999999,8.999998,24.999994,2.999999,1.999999,6.999998,14.999996,0.500000,1.999999,5.799999,19.699995,0.400000,17.899996,28.099993,86.699978,0.300000,2.999999,12.399997,41.999989,0.600000,2.999999,4.299999,20.399995,0.500000,...,1899.999525,3149.999212,1299.999675,1649.999587,399.999900,28.499993,20.999995,42.299989,17.699996,39.999990,19.099995,10.499997,9.099998,19.999995,9.999997,9.999997,99.999975,29.999992,0.0,0.000000,4.999999,4.999999,4.999999,9.999997,4.999999,199.999950,179.999955,19.999995,0.900000,379.999905,9.999997,0.700000,11.199997,51.999987,51.999987,0.0,2990.999252,1.000000,1.999999,1676.999580
4,4,2014,4.048586,0.038952,0.000666,0.021961,0.009820,0.017036,0.301614,0.500000,4.999998,10.399997,25.499992,3.199999,0.400000,3.399999,2.499999,8.099997,23.999992,3.399999,1.500000,6.099998,13.999995,0.700000,1.999999,3.999999,15.599995,0.300000,17.899994,21.299993,63.699979,0.500000,2.499999,12.399996,41.999986,0.600000,2.999999,3.399999,10.399997,0.400000,...,1599.999478,3149.998973,1099.999641,1449.999527,399.999870,25.399992,18.199994,43.199986,13.099996,37.199988,13.299996,8.199997,6.799998,9.999997,19.999993,19.999993,79.999974,19.999993,0.0,4.999998,4.999998,0.000000,0.000000,4.999998,4.999998,219.999928,199.999935,14.999995,1.000000,3051.999005,19.999993,0.900000,9.099997,6.999998,6.999998,0.0,2990.999025,1.000000,1.999999,1578.999485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6870,620,2024,4.042706,0.040761,0.000251,0.017493,0.007697,0.015455,0.327098,3.400000,0.000000,0.000000,2.000000,10.700000,3.300000,4.800000,1.000000,3.000000,7.000000,10.700000,0.000000,0.000000,2.000000,0.200000,1.000000,1.000000,4.600000,3.400000,0.000000,0.000000,9.600000,5.200000,0.000000,3.200000,13.000000,3.300000,0.000000,0.000000,4.300000,0.400000,...,1340.000000,1570.000000,940.000000,1170.000000,500.000000,42.300000,35.300000,35.700000,24.700000,30.300000,27.200000,1.900000,4.300000,10.000000,20.000000,10.000000,110.000000,75.000000,0.0,15.000000,10.000000,0.000000,5.000000,15.000000,10.000000,345.000000,285.000000,55.000000,1.300000,1686.000000,30.000000,0.400000,1.500000,21.000000,20.000000,0.0,3238.000000,1.000000,5.000000,281.000000
6871,621,2024,4.061659,0.047161,0.000250,0.017436,0.007677,0.015409,0.321582,3.600000,0.000000,0.000000,1.500000,11.000000,3.500000,5.000000,0.500000,3.000000,6.800000,11.000000,0.000000,0.000000,2.000000,0.600000,1.000000,1.000000,3.300000,3.700000,0.000000,0.000000,8.600000,5.400000,0.000000,3.000000,12.800000,3.700000,0.000000,0.000000,4.000000,0.800000,...,1020.000000,1840.000000,1030.000000,1030.000000,800.000000,42.300000,35.300000,32.400000,29.400000,19.800000,32.900000,1.200000,1.200000,0.000000,20.000000,10.000000,190.000000,70.000000,0.0,10.000000,5.000000,5.000000,5.000000,20.000000,5.000000,585.000000,515.000000,70.000000,1.400000,2180.000000,60.000000,0.800000,1.000000,30.000000,27.000000,4.0,3238.000000,3.000000,5.000000,284.000000
6872,622,2024,4.269247,0.046067,0.000249,0.017399,0.007667,0.015397,0.330565,3.800000,0.000000,0.000000,1.000000,11.100000,3.700000,5.200000,0.000000,3.000000,6.200000,11.100000,0.000000,0.000000,2.000000,0.500000,1.000000,1.000000,2.100000,3.700000,0.000000,0.000000,8.000000,5.700000,0.000000,3.000000,12.500000,3.700000,0.000000,0.000000,4.000000,0.400000,...,1280.000000,1690.000000,1060.000000,1570.000000,100.000000,42.300000,35.300000,34.100000,28.400000,24.000000,30.800000,1.700000,2.600000,0.000000,0.000000,0.000000,20.000000,10.000000,15.0,30.000000,30.000000,15.000000,25.000000,45.000000,25.000000,45.000000,35.000000,10.000000,1.400000,134.000000,10.000000,0.500000,1.000000,17.000000,11.000000,6.0,3238.000000,1.000000,5.000000,278.000000
6873,623,2024,4.166958,0.032288,0.000240,0.017008,0.007543,0.015186,0.315714,3.592785,0.000000,0.199599,2.395190,11.177554,3.592785,4.690581,0.698597,2.894188,5.987975,11.177554,0.000000,0.099800,1.496994,2.095791,0.199599,0.898196,4.291382,2.694589,0.000000,1.297395,13.572744,5.289378,0.299399,3.193587,11.876151,3.492985,0.000000,0.299399,4.690581,2.195591,...,1556.873535,1536.913618,1516.953701,1496.993784,199.599171,42.215225,35.229254,37.724243,29.740277,26.646489,32.734264,2.594789,3.492985,0.000000,19.959917,9.979959,69.859710,60.000000,15.0,5.000000,5.000000,5.000000,10.000000,10.000000,5.000000,195.000000,155.000000,40.000000,1.600000,24.000000,25.000000,2.195591,1.197595,884.000000,809.000000,75.0,3238.000000,1.000000,5.000000,95.000000


In [60]:
transactions_rot

Unnamed: 0,TRANSID,DATUM,WONINGTYPE,SOC,CALCOPP,KAVOPP,BOUWJAAR,BUURTCODE,LAT,LON,BESTEMMING,DUMMY_BERGING,DUMMY_GARAGE,DUMMY_MONUMENT,FOTOWIJZER_TYPE,LOG_KOOPSOM
0,7537440,2021-11-29,3.0,1131,97,132,1976,11,51.849001,4.541944,1.0,1,0,0,6,12.608199
1,7308578,2021-09-10,2.0,1121,181,457,1976,11,51.848052,4.544069,1.0,1,1,0,2,13.270783
2,4689877,2016-06-15,3.0,1131,90,109,2000,209,51.949338,4.512931,1.0,1,0,0,6,12.301383
3,5421309,2019-02-01,4.0,1111,196,346,1993,346,51.856716,4.403271,1.0,1,0,0,1,13.011432
4,6215385,2020-10-29,3.0,1131,87,111,1900,382,51.908115,4.350812,1.0,1,0,0,6,12.528156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108994,5490122,2019-05-08,0.0,1183,85,0,1968,82,51.932054,4.596247,1.0,1,0,0,9,12.107357
108995,5022491,2017-08-31,4.0,1111,232,756,1992,582,51.847523,4.156492,1.0,1,1,0,1,13.226904
108996,5014000,2017-08-01,3.0,1171,111,104,1980,239,51.944781,4.566412,1.0,1,0,0,4,12.122691
108997,4641789,2016-03-02,2.0,1121,100,104,1935,78,51.923281,4.586171,1.0,1,0,0,2,12.100712


In [62]:
transactions_rot.to_csv("../../housing-data/rotterdam_transaction_data.csv", index=False)
features_rot.to_csv("../../housing-data/all_neighborhood_features_rotterdam.csv", index=False)


Encode BUURTCODE for all transaction data

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
transactions_rot = pd.read_csv("../../housing-data/transaction_data.csv")
features_rot = pd.read_csv("../../housing-data/all_neighborhood_features.csv")
adj_rot = pd.read_csv("../../housing-data/buurt_adj_2023.csv", index_col=0)

In [3]:
buurten = adj_rot.columns.values
label_encoder = LabelEncoder()
label_encoder.fit(buurten)

transactions_rot["BUURTCODE"] = label_encoder.transform(transactions_rot["BUURTCODE"])
features_rot["BUURTCODE"] = label_encoder.transform(features_rot["BUURTCODE"])

features_rot

Unnamed: 0,BUURTCODE,YEAR,LEEFBAAROMETER,GROEN,EC,NO2,PM2_5,PM10,GELUIDSHINDERTOTAAL,AFSTANDTOTHUISARTSENPRAKTIJK,...,MOTORFIETSEN,AFSTANDTOTSCHOOL,SCHOLENBINNEN3KM,OPPERVLAKTETOTAAL,OPPERVLAKTELAND,OPPERVLAKTEWATER,MEESTVOORKOMENDEPOSTCODE,DEKKINGSPERCENTAGE,MATEVANSTEDELIJKHEID,OMGEVINGSADRESSENDICHTHEID
0,0,2014,3.995231,0.006693,0.000461,0.016060,0.007822,0.014309,0.298703,0.284183,...,49.888193,0.375635,14.757827,18.490892,17.547828,0.943063,9154.228572,0.942569,0.942569,5887.160036
1,1,2014,4.007582,0.008081,0.000455,0.016122,0.007893,0.014397,0.283425,0.190341,...,166.508675,0.666068,17.033365,58.045759,52.336699,5.709061,9241.205004,0.951622,0.951622,5664.661950
2,2,2014,3.641874,0.006982,0.000415,0.014185,0.007163,0.013116,0.288608,0.260024,...,69.329417,0.691056,13.847204,20.233320,18.501707,1.725773,8415.392590,0.866582,0.866582,4884.304548
3,3,2014,0.753490,0.003809,0.000085,0.003003,0.001451,0.002648,0.059165,0.085903,...,6.063573,0.121271,2.891182,8.553599,8.039100,0.514499,1696.287864,0.174554,0.174554,943.576910
4,4,2014,0.894338,0.005688,0.000090,0.002953,0.001722,0.003162,0.068266,0.146036,...,12.225972,0.108682,3.353267,14.388711,13.969846,0.418865,2090.053623,0.215098,0.215098,1141.582224
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158626,14416,2024,4.042706,0.040761,0.000251,0.017493,0.007697,0.015455,0.327098,3.400000,...,30.000000,0.400000,1.500000,21.000000,20.000000,0.000000,3238.000000,1.000000,5.000000,281.000000
158627,14417,2024,4.061659,0.047161,0.000250,0.017436,0.007677,0.015409,0.321582,3.600000,...,60.000000,0.800000,1.000000,30.000000,27.000000,4.000000,3238.000000,3.000000,5.000000,284.000000
158628,14418,2024,4.269247,0.046067,0.000249,0.017399,0.007667,0.015397,0.330565,3.800000,...,10.000000,0.500000,1.000000,17.000000,11.000000,6.000000,3238.000000,1.000000,5.000000,278.000000
158629,14419,2024,4.166958,0.032288,0.000240,0.017008,0.007543,0.015186,0.315714,3.592785,...,25.000000,2.195591,1.197595,884.000000,809.000000,75.000000,3238.000000,1.000000,5.000000,95.000000


In [5]:
features_rot[features_rot["BUURTCODE"] == 1]

Unnamed: 0,BUURTCODE,YEAR,LEEFBAAROMETER,GROEN,EC,NO2,PM2_5,PM10,GELUIDSHINDERTOTAAL,AFSTANDTOTHUISARTSENPRAKTIJK,...,MOTORFIETSEN,AFSTANDTOTSCHOOL,SCHOLENBINNEN3KM,OPPERVLAKTETOTAAL,OPPERVLAKTELAND,OPPERVLAKTEWATER,MEESTVOORKOMENDEPOSTCODE,DEKKINGSPERCENTAGE,MATEVANSTEDELIJKHEID,OMGEVINGSADRESSENDICHTHEID
1,1,2014,4.007582,0.008081,0.000455,0.016122,0.007893,0.014397,0.283425,0.190341,...,166.508675,0.666068,17.033365,58.045759,52.336699,5.709061,9241.205004,0.951622,0.951622,5664.66195
14422,1,2015,4.035009,0.008133,0.000459,0.016232,0.007947,0.014495,0.285366,0.28744,...,167.565383,0.670352,17.148205,56.507148,52.676887,3.830261,9304.437302,0.958134,0.958134,5502.864153
28843,1,2016,4.078002,0.008099,0.000456,0.016157,0.00791,0.014428,0.284083,0.191088,...,171.506978,0.667121,16.973428,56.237708,52.426067,3.811641,9261.894134,0.953753,0.953753,5574.211013
43264,1,2017,4.09673,0.008147,0.000459,0.016232,0.007947,0.014495,0.285394,0.287554,...,181.91465,0.670352,17.052506,56.507148,52.676887,3.830261,9304.437302,0.958134,0.958134,5720.390059
57685,1,2018,4.049475,0.008089,0.000456,0.016157,0.00791,0.014428,0.283997,0.286295,...,166.747303,0.667124,16.973158,56.237931,52.426275,3.811656,9261.930844,0.953756,0.953756,5695.355118
72106,1,2019,4.049475,0.00812,0.000456,0.016155,0.00791,0.014427,0.284082,0.286295,...,104.82896,0.667124,16.973327,56.237931,52.426275,3.811656,9261.930844,0.953756,0.953756,5806.006035
86527,1,2020,4.07356,0.008121,0.000329,0.01318,0.00678,0.013309,0.284079,0.286295,...,95.29982,0.667124,16.878288,56.237931,52.426275,3.811656,9261.93093,0.953756,0.953756,5901.37163
100948,1,2021,4.07356,0.008118,0.000329,0.013179,0.00678,0.013309,0.284096,0.286295,...,104.803684,0.666956,16.496954,56.237931,52.426275,3.811656,9261.93093,0.953756,0.953756,5971.022815
115369,1,2022,4.07548,0.008109,0.000217,0.01221,0.007187,0.014209,0.284047,0.286295,...,90.547889,0.666956,16.496954,56.237931,52.426275,3.811656,9261.93093,0.953756,0.953756,6120.752473
129790,1,2023,4.272947,0.008485,0.000228,0.012802,0.007535,0.014897,0.297746,0.3,...,110.0,0.7,17.3,59.0,55.0,4.0,9711.0,1.0,1.0,6586.0


In [4]:
transactions_rot

Unnamed: 0,TRANSID,DATUM,WONINGTYPE,SOC,CALCOPP,KAVOPP,BOUWJAAR,BUURTCODE,LAT,LON,BESTEMMING,DUMMY_BERGING,DUMMY_GARAGE,DUMMY_MONUMENT,FOTOWIJZER_TYPE,LOG_KOOPSOM
0,8324024,2023-03-13,3.0,1131,124,119,1976,13959,51.850627,4.893862,1.0,1,0,0,6,12.842649
1,4695633,2016-06-01,3.0,1131,145,208,2000,4785,52.492774,4.660815,1.0,1,0,0,6,12.456831
2,6267016,2020-11-16,3.0,1131,140,130,2014,2955,51.875469,5.868624,1.0,1,0,0,6,12.892950
3,6428993,2021-02-01,3.0,1136,261,162,1912,6448,52.091945,4.272226,1.0,1,0,0,6,13.910821
4,5382171,2018-12-04,0.0,1181,50,0,1974,7828,52.213655,4.862490,1.0,1,0,0,9,12.100712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1606347,4527050,2015-09-02,3.0,1131,125,155,1975,3787,52.167385,5.285358,1.0,1,0,0,6,12.185870
1606348,8518792,2023-11-27,3.0,1138,123,98,1970,2583,52.046115,5.638586,1.0,0,1,0,6,12.791181
1606349,6068581,2020-06-16,0.0,1184,93,0,2001,3515,52.199698,5.369747,1.0,1,0,0,10,12.413087
1606350,4943077,2017-05-05,4.0,1111,165,367,1935,13973,51.940161,4.853196,1.0,1,0,0,1,12.765688


In [6]:
transactions_rot.to_csv("../../housing-data/transaction_data.csv", index=False)
features_rot.to_csv("../../housing-data/all_neighborhood_features.csv", index=False)