# Environment & Data


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os


In [2]:
df_bridges = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_bridges.csv')
df_cinemas = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_cinemas.csv')
df_cars = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_passenger_cars.csv')
df_guests = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_tourism_guests_overnightstays_2023_2024.csv')
df_overnight_stays = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_tourism_overnightstays_changes_2023_2024.csv')
df_trees = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_trees.csv')
df_libraries = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_libraries_2023.csv')
df_toilets = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_public_toilets.csv')

# Cleaning Functions

In [3]:
# Rename bezirk values for consistency
def clean_bezirk(bezirk):
    # Replace ö with oe
    bezirk = bezirk.replace('ö', 'oe').replace('Ö', 'oe')

    # Remove dots
    bezirk = bezirk.replace('.', '')

    # Lowercase and replace spaces with hyphens
    bezirk = bezirk.lower().replace(' ', '-')

    # Mapping for specific bezirk names
    bezirk_mapping = {
        'charlbg-wilmersd': 'charlottenburg-wilmersdorf',
        'friedrh-kreuzb': 'friedrichshain-kreuzberg',
        'marzahn-hellersd': 'marzahn-hellersdorf',
        'steglitz-zehlend': 'steglitz-zehlendorf',
        'tempelh-schoeneb': 'tempelhof-schoeneberg'
    }

    # Check if the bezirk is in the mapping, otherwise return the cleaned version
    return bezirk_mapping.get(bezirk, bezirk) or bezirk

# Bridges

In [4]:
df_bridges.head(12)

Unnamed: 0,Bezirk,Total bridges¹,City streets,Green spaces
0,Mitte,123,91,32
1,Friedrh.-Kreuzb.,40,34,5
2,Pankow,114,67,23
3,Charlbg.-Wilmersd.,126,54,9
4,Spandau,77,60,14
5,Steglitz-Zehlend.,88,48,20
6,Tempelh.-Schöneb.,49,19,6
7,Neukölln,54,14,30
8,Treptow-Köpenick,124,93,14
9,Marzahn-Hellersd.,59,53,6


In [5]:
df_bridges.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Bezirk          12 non-null     object
 1   Total bridges¹  12 non-null     int64 
 2   City streets    12 non-null     int64 
 3   Green spaces    12 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 516.0+ bytes


In [6]:
# Rename columns for consistency
df_bridges.rename(columns={
    'Bezirk': 'bezirk',
    'Total bridges¹': 'district_total_bridges',
    'City streets': 'district_bridges_city_streets',
    'Green spaces': 'district_bridges_green_spaces'
}, inplace=True)

In [7]:
# Rename bezirk values 
df_bridges['bezirk'] = df_bridges['bezirk'].apply(clean_bezirk)

In [8]:
df_bridges.head(12)

Unnamed: 0,bezirk,district_total_bridges,district_bridges_city_streets,district_bridges_green_spaces
0,mitte,123,91,32
1,friedrichshain-kreuzberg,40,34,5
2,pankow,114,67,23
3,charlottenburg-wilmersdorf,126,54,9
4,spandau,77,60,14
5,steglitz-zehlendorf,88,48,20
6,tempelhof-schoeneberg,49,19,6
7,neukoelln,54,14,30
8,treptow-koepenick,124,93,14
9,marzahn-hellersdorf,59,53,6


# Cinemas

In [9]:
df_cinemas.head(12)

Unnamed: 0,Bezirk,Movie Theaters
0,,
1,Mitte,16.0
2,Friedrichshain-Kreuzberg,11.0
3,Pankow,10.0
4,Charlottenburg-Wilmersdorf,12.0
5,Spandau,2.0
6,Steglitz-Zehlendorf,6.0
7,Tempelhof-Schöneberg,5.0
8,Neukölln,7.0
9,Treptow-Köpenick,9.0


In [10]:
df_cinemas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Bezirk          12 non-null     object 
 1   Movie Theaters  12 non-null     float64
dtypes: float64(1), object(1)
memory usage: 340.0+ bytes


In [11]:
# Drop NaN
df_cinemas.dropna(subset=['Bezirk'], inplace=True)

# Rename columns for consistency
df_cinemas.rename(columns={
    'Bezirk': 'bezirk',
    'Movie Theaters': 'district_movie_theaters'
}, inplace=True)

# Clean bezirk values for consistency
df_cinemas['bezirk'] = df_cinemas['bezirk'].apply(clean_bezirk)

# Convert data type
df_cinemas['district_movie_theaters'] = pd.to_numeric(df_cinemas['district_movie_theaters']).astype('Int64')

In [12]:
df_cinemas.head(12)

Unnamed: 0,bezirk,district_movie_theaters
1,mitte,16
2,friedrichshain-kreuzberg,11
3,pankow,10
4,charlottenburg-wilmersdorf,12
5,spandau,2
6,steglitz-zehlendorf,6
7,tempelhof-schoeneberg,5
8,neukoelln,7
9,treptow-koepenick,9
10,marzahn-hellersdorf,3


# Libraries

In [13]:
df_libraries.head(12)

Unnamed: 0,Bezirk,Libraries,visits,borrowings
0,Mitte,12,926691,2057975
1,Friedrichshain-Kreuzberg,6,528940,1572879
2,Pankow,8,516623,1952782
3,Charlottenburg-Wilmersdorf,7,593574,1419015
4,Spandau,7,601339,1366015
5,Steglitz-Zehlendorf,6,598644,2143456
6,Tempelhof-Schöneberg,7,543409,1565512
7,Neukölln,4,393484,1442335
8,Treptow-Köpenick,8,351997,1170322
9,Marzahn-Hellersdorf,6,507402,1235637


In [14]:
df_libraries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Bezirk      12 non-null     object
 1   Libraries   12 non-null     int64 
 2   visits      12 non-null     object
 3   borrowings  12 non-null     object
dtypes: int64(1), object(3)
memory usage: 516.0+ bytes


In [15]:
# Renaming columns for consistency
df_libraries.rename(columns={
    'Bezirk': 'bezirk',
    'Libraries': 'district_libraries',
    'visits': 'district_libraries_visits',
    'borrowings': 'district_libraries_borrowings'
}, inplace=True)

# Cleaning up the bezirk values
df_libraries['bezirk'] = df_libraries['bezirk'].apply(clean_bezirk)

# Converting visits and borrowings to numeric float
df_libraries['district_libraries_visits'] = (
    df_libraries['district_libraries_visits']
    .str.replace(",", "")
    .astype(int)
)

df_libraries['district_libraries_borrowings'] = (
    df_libraries['district_libraries_borrowings']
    .str.replace(",", "")
    .astype(int)
)

In [16]:
df_libraries.head(12)

Unnamed: 0,bezirk,district_libraries,district_libraries_visits,district_libraries_borrowings
0,mitte,12,926691,2057975
1,friedrichshain-kreuzberg,6,528940,1572879
2,pankow,8,516623,1952782
3,charlottenburg-wilmersdorf,7,593574,1419015
4,spandau,7,601339,1366015
5,steglitz-zehlendorf,6,598644,2143456
6,tempelhof-schoeneberg,7,543409,1565512
7,neukoelln,4,393484,1442335
8,treptow-koepenick,8,351997,1170322
9,marzahn-hellersdorf,6,507402,1235637


# Passenger Cars

In [17]:
df_cars.head(12)

Unnamed: 0,Bezirk,Total,Privat,Private cars per 100 inhabitants
0,Mitte,95 590,71 760,18.1
1,Friedrichshain-Kreuzberg,60 196,50 147,17.1
2,Pankow,121 013,110 940,26.1
3,Charlottenburg-Wilmersdorf,135 928,96 143,28.0
4,Spandau,88 508,83 118,32.3
5,Steglitz-Zehlendorf,121 445,113 621,36.6
6,Tempelhof-Schöneberg,122 165,103 056,29.0
7,Neukölln,93 298,85 890,26.0
8,Treptow-Köpenick,106 550,96 803,32.9
9,Marzahn-Hellersdorf,100 950,94 915,32.5


In [18]:
df_cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Bezirk                            12 non-null     object 
 1   Total                             12 non-null     object 
 2   Privat                            12 non-null     object 
 3   Private cars per 100 inhabitants  12 non-null     float64
dtypes: float64(1), object(3)
memory usage: 516.0+ bytes


In [19]:
# Rename columns for consistency
df_cars.rename(columns={
    'Bezirk': 'bezirk',
    'Total': 'district_total_cars',
    'Privat': 'district_private_cars',
    'Private cars per 100 inhabitants': 'district_private_cars_per_100_inhabitants'
}, inplace=True)

# Cleaning up the bezirk values
df_cars['bezirk'] = df_cars['bezirk'].apply(clean_bezirk)

# Remove white space and Convert data types 
df_cars['district_total_cars'] = df_cars['district_total_cars'].str.replace(" ", "").astype(int)
df_cars['district_private_cars'] = df_cars['district_private_cars'].str.replace(" ", "").astype(int)
df_cars['district_private_cars_per_100_inhabitants'] = df_cars['district_private_cars_per_100_inhabitants'].astype(float)

In [20]:
df_cars.head(12)

Unnamed: 0,bezirk,district_total_cars,district_private_cars,district_private_cars_per_100_inhabitants
0,mitte,95590,71760,18.1
1,friedrichshain-kreuzberg,60196,50147,17.1
2,pankow,121013,110940,26.1
3,charlottenburg-wilmersdorf,135928,96143,28.0
4,spandau,88508,83118,32.3
5,steglitz-zehlendorf,121445,113621,36.6
6,tempelhof-schoeneberg,122165,103056,29.0
7,neukoelln,93298,85890,26.0
8,treptow-koepenick,106550,96803,32.9
9,marzahn-hellersdorf,100950,94915,32.5


# Public Toilets

In [21]:
df_toilets.head()

Unnamed: 0,ID,Bezirk,Standort,Vertrag,Betreiber,Modelltyp,Symbol,Öffnungszeiten,Nutzungsentgelt,Zahlungsart,Barrierefrei,Barrierearm,Wickeltisch
0,2,Mitte,WC-Center Alexanderplatz 1,Toilettenvertrag mit Wall,Wall GmbH,WC-Center,WC,Mo-Sa 07:00-22:00 So 10:00-22:00,0.5,Bargeld,nein,ja,ja
1,4,Spandau,WC-Center Spandauer Markt,Toilettenvertrag mit Wall,Wall GmbH,WC-Center,WC,Mo-Sa 07:00-22:00 So 10:00-22:00,0.2,Bargeld,nein,ja,ja
2,6,Charlottenburg-Wilmersdorf,Hohenzollerndamm vor Rheinbabenallee (Roseneck...,Toilettenvertrag mit Wall,Wall GmbH,Berliner Toilette mit 1 Platz,WC,24h,0.5,Karte/App/Paypal/BVG-Guthabenkarte,ja,ja,nein
3,7,Spandau,"Rathaus Spandau, Altstädter Ring hinter Stabho...",Toilettenvertrag mit Wall,Wall GmbH,Berliner Toilette mit 2 Plätzen,WC,24h,0.5,Karte/App/Paypal/BVG-Guthabenkarte,ja,ja,ja
4,9,Steglitz-Zehlendorf,Schloßstr. ggü. 33-35 auf dem Hermann-Ehlers-P...,Toilettenvertrag mit Wall,Wall GmbH,Berliner Toilette mit 2 Plätzen,WC,24h,0.5,Karte/App/Paypal/BVG-Guthabenkarte,ja,ja,nein


In [22]:
df_toilets.shape

(509, 13)

In [23]:
# Aggregate the data by bezirk
df_toilets_district = (
    df_toilets
    .groupby('Bezirk')
    .agg(district_public_toilets=('ID', 'count'))
    .reset_index()
)

In [24]:
df_toilets_district.head(12)

Unnamed: 0,Bezirk,district_public_toilets
0,Charlottenburg-Wilmersdorf,58
1,Friedrichshain-Kreuzberg,57
2,Lichtenberg,38
3,Marzahn-Hellersdorf,23
4,Mitte,60
5,Neukölln,41
6,Pankow,34
7,Reinickendorf,39
8,Spandau,32
9,Steglitz-Zehlendorf,37


In [25]:
df_toilets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 509 entries, 0 to 508
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   ID               509 non-null    int64 
 1   Bezirk           509 non-null    object
 2   Standort         509 non-null    object
 3   Vertrag          509 non-null    object
 4   Betreiber        509 non-null    object
 5   Modelltyp        509 non-null    object
 6   Symbol           509 non-null    object
 7   Öffnungszeiten   509 non-null    object
 8   Nutzungsentgelt  509 non-null    object
 9   Zahlungsart      509 non-null    object
 10  Barrierefrei     509 non-null    object
 11  Barrierearm      509 non-null    object
 12  Wickeltisch      509 non-null    object
dtypes: int64(1), object(12)
memory usage: 51.8+ KB


In [26]:
# rename columns for consistency
df_toilets_district.rename(columns={
    'Bezirk': 'bezirk'
}, inplace=True)

# Clean up the bezirk values
df_toilets_district['bezirk'] = df_toilets_district['bezirk'].apply(clean_bezirk)

# Tourism - Overnight Stays

In [27]:
df_overnight_stays.head(12)

Unnamed: 0,Bezirke,overnight stays,Overnight stays Change
0,Mitte,13758148,501729
1,Friedrichshain-Kreuzberg,4419262,273921
2,Pankow,1190689,-76501
3,Charlottenburg-Wilmersdorf,4789079,23580
4,Spandau,606680,6307
5,Steglitz-Zehlendorf,379056,5023
6,Tempelhof-Schöneberg,1710674,-24580
7,Neukölln,783484,24242
8,Treptow-Köpenick,1009383,189079
9,Marzahn-Hellersdorf,319598,61765


In [28]:
df_overnight_stays.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 3 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Bezirke                 12 non-null     object
 1   overnight stays         12 non-null     object
 2   Overnight stays Change  12 non-null     object
dtypes: object(3)
memory usage: 420.0+ bytes


In [29]:
# Renaming columns for consistency
df_overnight_stays.rename(columns={
    'Bezirke': 'bezirk',
    'overnight stays': 'district_tourism_overnightstays_2024',
    'Overnight stays Change': 'district_tourism_overnightstays_change_2023_2024'
}, inplace=True)

# Clean up the bezirk values
df_overnight_stays['bezirk'] = df_overnight_stays['bezirk'].apply(clean_bezirk)

# Change data types
df_overnight_stays['district_tourism_overnightstays_2024'] = df_overnight_stays['district_tourism_overnightstays_2024'].str.replace(",", "").astype(int)
df_overnight_stays['district_tourism_overnightstays_change_2023_2024'] = df_overnight_stays['district_tourism_overnightstays_change_2023_2024'].str.replace(",", "").astype(int)


In [30]:
df_overnight_stays.head(12)

Unnamed: 0,bezirk,district_tourism_overnightstays_2024,district_tourism_overnightstays_change_2023_2024
0,mitte,13758148,501729
1,friedrichshain-kreuzberg,4419262,273921
2,pankow,1190689,-76501
3,charlottenburg-wilmersdorf,4789079,23580
4,spandau,606680,6307
5,steglitz-zehlendorf,379056,5023
6,tempelhof-schoeneberg,1710674,-24580
7,neukoelln,783484,24242
8,treptow-koepenick,1009383,189079
9,marzahn-hellersdorf,319598,61765


# Tourism Guests

In [31]:
df_guests.head(24)

Unnamed: 0,Bezirke,Year,Guests,Overnightstays
0,Mitte,2023,5544415,13256419
1,Friedrichshain-Kreuzberg,2023,1607258,4145341
2,Pankow,2023,472021,1267190
3,Charlottenburg-Wilmersdorf,2023,2006271,4765499
4,Spandau,2023,210873,600373
5,Steglitz-Zehlendorf,2023,155827,374033
6,Tempelhof-Schöneberg,2023,701111,1735254
7,Neukölln,2023,342509,759242
8,Treptow-Köpenick,2023,352977,820304
9,Marzahn-Hellersdorf,2023,84099,257833


In [32]:
df_guests.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Bezirke         24 non-null     object
 1   Year            24 non-null     int64 
 2   Guests          24 non-null     object
 3   Overnightstays  24 non-null     object
dtypes: int64(1), object(3)
memory usage: 900.0+ bytes


In [33]:
# FILTER FOR 2024
df_guests = df_guests[df_guests['Year'] == 2024]

# Remove columns that are not needed
df_guests.drop(columns=['Year', 'Overnightstays'], inplace=True)

# Rename columns for consistency
df_guests.rename(columns={
    'Bezirke': 'bezirk',
    'Guests': 'district_tourism_guests_2024'
}, inplace=True)

# Clean up the bezirk values
df_guests['bezirk'] = df_guests['bezirk'].apply(clean_bezirk)

# Convert data type
df_guests['district_tourism_guests_2024'] = df_guests['district_tourism_guests_2024'].str.replace(",", "").astype(int)

In [34]:
df_guests.head(12)

Unnamed: 0,bezirk,district_tourism_guests_2024
12,mitte,5850255
13,friedrichshain-kreuzberg,1745035
14,pankow,445141
15,charlottenburg-wilmersdorf,2035496
16,spandau,231159
17,steglitz-zehlendorf,161211
18,tempelhof-schoeneberg,702564
19,neukoelln,358146
20,treptow-koepenick,452348
21,marzahn-hellersdorf,91306


# Street Trees

In [35]:
df_trees.head(12)

Unnamed: 0,Bezirk,Street\ntrees\ntotal
0,Mitte,26 299
1,Friedrichshain-Kreuzberg,16 520
2,Pankow,42 206
3,Charlottenburg-Wilmersdorf,42 263
4,Spandau,23 800
5,Steglitz-Zehlendorf,59 650
6,Tempelhof-Schöneberg,35 126
7,Neukölln,20 632
8,Treptow-Köpenick,44 446
9,Marzahn-Hellersdorf,46 087


In [36]:
# Renaming columns for consistency
df_trees.rename(columns={
    'Bezirk': 'bezirk',
    'Street\ntrees\ntotal': 'district_street_trees'
}, inplace=True)

# Clean up the bezirk values
df_trees['bezirk'] = df_trees['bezirk'].apply(clean_bezirk)

# Convert data type
df_trees['district_street_trees'] = df_trees['district_street_trees'].str.replace(" ", "").astype(int)

# Master Table

In [37]:
# Join all dataframes on bezirk
df_master_district = df_bridges.merge(df_cinemas, on='bezirk', how='outer') \
    .merge(df_cars, on='bezirk', how='outer') \
    .merge(df_guests, on='bezirk', how='outer') \
    .merge(df_overnight_stays, on='bezirk', how='outer') \
    .merge(df_trees, on='bezirk', how='outer') \
    .merge(df_libraries, on='bezirk', how='outer')

In [38]:
df_master_district.head(12)

Unnamed: 0,bezirk,district_total_bridges,district_bridges_city_streets,district_bridges_green_spaces,district_movie_theaters,district_total_cars,district_private_cars,district_private_cars_per_100_inhabitants,district_tourism_guests_2024,district_tourism_overnightstays_2024,district_tourism_overnightstays_change_2023_2024,district_street_trees,district_libraries,district_libraries_visits,district_libraries_borrowings
0,charlottenburg-wilmersdorf,126,54,9,12,135928,96143,28.0,2035496,4789079,23580,42263,7,593574,1419015
1,friedrichshain-kreuzberg,40,34,5,11,60196,50147,17.1,1745035,4419262,273921,16520,6,528940,1572879
2,lichtenberg,22,18,4,1,84768,79716,25.6,537558,1275081,42363,31635,4,479442,1581303
3,marzahn-hellersdorf,59,53,6,3,100950,94915,32.5,91306,319598,61765,46087,6,507402,1235637
4,mitte,123,91,32,16,95590,71760,18.1,5850255,13758148,501729,26299,12,926691,2057975
5,neukoelln,54,14,30,7,93298,85890,26.0,358146,783484,24242,20632,4,393484,1442335
6,pankow,114,67,23,10,121013,110940,26.1,445141,1190689,-76501,42206,8,516623,1952782
7,reinickendorf,88,32,27,1,101724,92071,34.3,107171,365950,-9287,44105,7,424593,1480990
8,spandau,77,60,14,2,88508,83118,32.3,231159,606680,6307,23800,7,601339,1366015
9,steglitz-zehlendorf,88,48,20,6,121445,113621,36.6,161211,379056,5023,59650,6,598644,2143456


In [39]:
# Save as CSV
# df_master_district.to_csv('../data/cleaned_data/berlin_berzirk_enrichment.csv', index=False)

In [40]:
df_census = pd.read_csv('../data/master_tables/berlin_census_2024.csv')

In [41]:
df_census.head(12)

Unnamed: 0,district,central_heating_percentage,district_heating,district_heating_percentage,floor_heating,floor_heating_percentage,block_heating,block_heating_percentage,stove_heating,stove_heating_percentage,...,60-69_percentage,70-79,70-79_percentage,>80,>80_percentage,district_min_rent_m2,district_avg_rent_m2,district_max_rent_m2,district_min_Buy_m2,district_avg_buy_m2
0,Charlottenburg-Wilmersdorf,45.4,8428.0,43.7,1828.0,9.5,161,0.8,91,0.5,...,12.3,33657.0,10.6,23227.0,7.3,11.9675,16.9825,28.5675,3635.5,6172.25
1,Friedrichshain-Kreuzberg,40.8,4128.0,37.6,2016.0,18.4,207,1.9,147,1.3,...,8.0,11259.0,4.3,7947.0,3.0,14.17,16.73,29.185,3245.5,5253.5
2,Lichtenberg,43.6,7574.0,41.3,1854.0,10.1,311,1.7,544,3.0,...,11.0,22488.0,7.7,20039.0,6.9,9.588889,13.2,21.69,3296.777778,3967.555556
3,Marzahn-Hellersdorf,62.1,7411.0,22.6,3909.0,11.9,499,1.5,423,1.3,...,15.1,24091.0,8.9,15601.0,5.8,9.186,12.83,20.272,2678.4,3512.4
4,Mitte,39.0,6618.0,45.2,1902.0,13.0,229,1.6,161,1.1,...,8.6,19092.0,5.3,13924.0,3.9,12.273333,16.106667,29.12,2804.0,4967.666667
5,Neukölln,63.7,5596.0,19.5,3684.0,12.9,318,1.1,711,2.5,...,10.3,23386.0,7.7,17113.0,5.6,9.68,13.66,22.16,3061.5,3996.75
6,Pankow,58.3,8095.0,20.6,6519.0,16.6,909,2.3,710,1.8,...,9.0,22840.0,5.7,22431.0,5.6,10.796364,14.030909,22.812727,3329.363636,4171.181818
7,Reinickendorf,76.0,3026.0,8.5,4046.0,11.3,574,1.6,793,2.2,...,12.1,25916.0,10.2,20290.0,8.0,10.326667,13.957778,22.038889,3469.444444,4169.777778
8,Spandau,68.3,3822.0,13.2,3559.0,12.3,884,3.1,794,2.7,...,11.6,21542.0,9.1,17403.0,7.3,10.1,13.336667,21.436667,3349.0,3916.5
9,Steglitz-Zehlendorf,73.3,6538.0,15.8,3564.0,8.6,522,1.3,341,0.8,...,12.2,33168.0,11.2,27082.0,9.2,12.067143,16.101429,24.895714,3856.0,5703.571429


In [42]:
# Show all columns
pd.set_option('display.max_columns', None)
df_census.head()

Unnamed: 0,district,central_heating_percentage,district_heating,district_heating_percentage,floor_heating,floor_heating_percentage,block_heating,block_heating_percentage,stove_heating,stove_heating_percentage,no_heating,no_heating_percentage,gas_energy,gas_energy_percentage,oil_energy,oil_energy_percentage,mixed_energy_sources,mixed_energy_sources_percentage,solar_energy,solar_energy_percentage,wood_pellets_energy,wood_pellets_energy_percentage,biomass_energy,biomass_energy_percentage,electric_energy,electric_energy_percentage,coal_energy,coal_energy_percentage,no_energy_source,no_energy_source_percentage,<1950,<1950_percentage,1950-1969,1950-1969_percentage,1970-1989,1970-1989_percentage,1990-2009,1990-2009_percentage,>2010,>2010_percentage,total_apartments,occupied_by_owner,occupied_by_owner_percentage,residentual_rental,residentual_rental_percentage,vacation_leisure_rental,vacation_leisure_rental_percentage,empty,empty_percentage,avarage_cold_rent_m2,avarage_living_space_m2_x,Unnamed: 51,vacancy_rate,employed,employed_percentage,unemployed,unemployed_percentage,not_working,not_working_percentage,labor_force,male_labor_force,female_labor_force,total_households,single_household,single_household_percentage,couples_without_children,couples_without_children_percentage,couples_with_children,couples_with_children_percentage,single_parents,single_parents_percentage,WG,WG_percentage,only_seniors,only_seniors_percentage,seniors_and_young_adults,seniors_and_young_adults_percentage,owner,owner_percentage,tenant,tenant_percentage,EUR_per_squared_meter,1_person_EUR_per_squared_meter,2_person_EUR_per_squared_meter,3_person_EUR_per_squared_meter,4_person_EUR_per_squared_meter,5_person_EUR_per_squared_meter,6_person_EUR_per_squared_meter,average_rooms,1_person_average_rooms,2_person_average_rooms,3_person_average_rooms,4_person_average_rooms,5_person_average_rooms,6_person_average_rooms,average_person_per_household,average_years_of_residence,avarage_living_space_m2_y,full_time_employees,median_income,total_population,men_population,men_population_percentage,women_population,women_population_percentage,single,single_percentage,couples,couples_percentage,widowed,widowed_percentage,divorced,divorced_percentage,other_civil_status,other_civil_status_percentage,average_age,<18,<18_percentage,18-29,18-29_percentage,30-49,30-49_percentage,50-64,50-64_percentage,>65,>65_percentage,<10,<10_percentage,10-19,10-19_percentage,20-29,20-29_percentage,30-39,30-39_percentage,40-49,40-49_percentage,50-59,50-59_percentage,60-69,60-69_percentage,70-79,70-79_percentage,>80,>80_percentage,district_min_rent_m2,district_avg_rent_m2,district_max_rent_m2,district_min_Buy_m2,district_avg_buy_m2
0,Charlottenburg-Wilmersdorf,45.4,8428.0,43.7,1828.0,9.5,161,0.8,91,0.5,11,0.1,7337.0,38.1,3148.0,16.3,8428.0,43.7,176.0,0.9,60,0.3,3,-,105,0.5,15,0.1,11,0.1,10545.0,54.7,5045.0,26.2,2090.0,10.8,882.0,4.6,719.0,3.7,194159.0,31643.0,16.3,157028.0,80.9,663,0.3,4821.0,2.5,8.43,77.7,,2.4,157160.0,51.1,13520.0,4.4,136880.0,44.5,170680.0,85750.0,84930.0,187477.0,120247.0,64.1,25457.0,13.6,22500.0,12.0,11882.0,6.3,7392.0,3.9,49968.0,26.7,10613.0,5.7,31398.0,16.7,156006.0,83.2,8.44,8.46,8.31,8.55,8.56,8.33,8.25,3.3,3.0,3.8,4.0,4.4,4.6,4.5,1.7,13.2,77.7,79654,4398,317079.0,150770.0,47.6,166238.0,52.4,162685.0,51.3,100977.0,31.9,18499.0,5.8,33622.0,10.6,1230.0,0.4,45.0,44463.0,14.0,45878.0,14.5,84811.0,26.8,67044.0,21.1,74816.0,23.6,25835.0,8.1,23348.0,7.4,41155.0,13.0,47036.0,14.8,37772.0,11.9,45909.0,14.5,39070.0,12.3,33657.0,10.6,23227.0,7.3,11.9675,16.9825,28.5675,3635.5,6172.25
1,Friedrichshain-Kreuzberg,40.8,4128.0,37.6,2016.0,18.4,207,1.9,147,1.3,3,-,6120.0,55.8,477.0,4.3,4128.0,37.6,35.0,0.3,44,0.4,35,0.3,83,0.8,55,0.5,3,-,6677.0,60.8,1844.0,16.8,1197.0,10.9,698.0,6.4,556.0,5.1,157555.0,13217.0,8.4,140960.0,89.5,199,0.1,3181.0,2.0,8.19,67.86,,1.9,151700.0,58.7,12760.0,4.9,94010.0,36.4,164470.0,86880.0,77590.0,153287.0,97888.0,63.9,15331.0,10.0,17855.0,11.6,12004.0,7.8,10212.0,6.7,17732.0,11.6,5108.0,3.3,13153.0,8.6,140111.0,91.4,8.27,8.43,8.18,8.02,7.66,7.26,7.68,3.1,2.8,3.5,3.7,4.0,4.1,4.1,1.7,11.8,67.86,73484,4526,264170.0,134035.0,50.8,130001.0,49.2,176057.0,66.7,60575.0,22.9,7386.0,2.8,18980.0,7.2,1031.0,0.4,38.3,41668.0,15.8,43274.0,16.4,105860.0,40.1,45261.0,17.1,27969.0,10.6,25570.0,9.7,19810.0,7.5,39556.0,15.0,62512.0,23.7,43350.0,16.4,32895.0,12.5,21132.0,8.0,11259.0,4.3,7947.0,3.0,14.17,16.73,29.185,3245.5,5253.5
2,Lichtenberg,43.6,7574.0,41.3,1854.0,10.1,311,1.7,544,3.0,58,0.3,9503.0,51.8,330.0,1.8,7574.0,41.3,563.0,3.1,75,0.4,8,-,191,1.0,43,0.2,58,0.3,6293.0,34.3,2012.0,11.0,3753.0,20.5,3140.0,17.1,3150.0,17.2,163232.0,10637.0,6.5,149897.0,91.8,61,0.0,2639.0,1.6,7.15,66.23,,1.6,137030.0,49.9,11370.0,4.1,126140.0,45.9,148400.0,77480.0,70920.0,159151.0,87517.0,55.0,28507.0,17.9,22364.0,14.1,13998.0,8.8,6762.0,4.2,35216.0,22.1,6663.0,4.2,10637.0,6.7,148503.0,93.3,7.1,7.35,6.68,6.9,6.91,6.68,7.04,3.4,2.9,3.7,4.1,4.4,4.6,4.6,1.8,14.1,66.23,80280,3613,290814.0,142883.0,49.2,147662.0,50.8,156038.0,53.7,89697.0,30.9,16704.0,5.7,26394.0,9.1,1709.0,0.6,41.8,51008.0,17.6,40028.0,13.8,90419.0,31.1,52386.0,18.0,56700.0,19.5,31366.0,10.8,24048.0,8.3,35622.0,12.3,54082.0,18.6,36336.0,12.5,34629.0,11.9,31927.0,11.0,22488.0,7.7,20039.0,6.9,9.588889,13.2,21.69,3296.777778,3967.555556
3,Marzahn-Hellersdorf,62.1,7411.0,22.6,3909.0,11.9,499,1.5,423,1.3,162,0.5,21209.0,64.7,1595.0,4.9,7411.0,22.6,1675.0,5.1,218,0.7,10,-,389,1.2,91,0.3,162,0.5,6904.0,21.1,1298.0,4.0,7740.0,23.6,12736.0,38.9,4082.0,12.5,140979.0,25563.0,18.1,113257.0,80.3,119,0.1,2036.0,1.4,6.29,72.72,,1.4,126110.0,48.0,10470.0,4.0,126050.0,48.0,136580.0,71210.0,65370.0,138234.0,65892.0,47.7,31850.0,23.0,22065.0,16.0,14321.0,10.4,4106.0,3.0,33619.0,24.3,8854.0,6.4,25563.0,18.5,112662.0,81.5,6.31,6.54,6.0,6.11,6.05,6.09,6.04,3.6,3.0,4.0,4.3,4.6,4.8,4.9,1.9,15.1,72.72,72606,3314,270010.0,132017.0,48.9,137991.0,51.1,126605.0,46.9,99564.0,36.9,15819.0,5.9,26895.0,10.0,1127.0,0.4,43.4,48900.0,18.1,30260.0,11.2,73473.0,27.2,58410.0,21.6,58966.0,21.8,28549.0,10.6,24728.0,9.2,25885.0,9.6,41214.0,15.3,32258.0,11.9,36825.0,13.6,40863.0,15.1,24091.0,8.9,15601.0,5.8,9.186,12.83,20.272,2678.4,3512.4
4,Mitte,39.0,6618.0,45.2,1902.0,13.0,229,1.6,161,1.1,13,0.1,6497.0,44.4,1110.0,7.6,6618.0,45.2,107.0,0.7,115,0.8,4,-,114,0.8,51,0.3,13,0.1,7492.0,51.2,2806.0,19.2,2097.0,14.3,1180.0,8.1,1051.0,7.2,211686.0,16107.0,7.6,190337.0,89.9,509,0.2,4736.0,2.2,8.3,66.91,,2.1,186360.0,53.9,18730.0,5.4,140570.0,40.7,205090.0,110580.0,94510.0,205002.0,131301.0,64.0,21302.0,10.4,25130.0,12.3,14416.0,7.0,12849.0,6.3,29794.0,14.5,8327.0,4.1,16074.0,7.8,188835.0,92.1,8.43,8.61,8.29,8.16,7.84,7.37,7.56,3.0,2.7,3.3,3.6,3.9,4.0,4.1,1.7,11.2,66.91,93275,4327,357322.0,182387.0,51.0,174931.0,49.0,220601.0,61.7,92491.0,25.9,13182.0,3.7,28702.0,8.0,2345.0,0.7,38.6,58113.0,16.3,69351.0,19.4,121889.0,34.1,61740.0,17.3,46229.0,12.9,34646.0,9.7,29294.0,8.2,63526.0,17.8,74888.0,21.0,46999.0,13.2,44286.0,12.4,30666.0,8.6,19092.0,5.3,13924.0,3.9,12.273333,16.106667,29.12,2804.0,4967.666667


In [43]:
df_census.rename(columns={
    'district': 'bezirk',
    'central_heating_percentage': 'district_central_heating_percentage',
    'floor_heating_percentage': 'district_floor_heating_percentage',
    'block_heating_percentage': 'district_block_heating_percentage',
    'stove_heating_percentage': 'district_stove_heating_percentage',
    'no_heating_percentage': 'district_no_heating_percentage',
    'gas_energy_percentage': 'district_gas_energy_percentage',
    'oil_energy_percentage': 'district_oil_energy_percentage',
    'mixed_energy_sources_percentage': 'district_mixed_energy_sources_percentage',
    'solar_energy_percentage': 'district_solar_energy_percentage',
    'wood_pellets_energy_percentage': 'district_wood_pellets_energy_percentage',
    'biomass_energy_percentage': 'district_biomass_energy_percentage',
    'electric_energy_percentage': 'district_electric_energy_percentage',
    'coal_energy_percentage': 'district_coal_energy_percentage',
    'no_energy_source_percentage': 'district_no_energy_source_percentage',
    '<1950_percentage': 'district_housing_built_before_1950_percentage',
    '>2010_percentage': 'district_housing_built_after_2010_percentage',
    'total_apartments': 'district_total_apartments',
    'occupied_by_owner_percentage': 'district_occupied_by_owner_percentage',
    'residentual_rental_percentage': 'district_residential_rental_percentage',
    'vacation_leisure_rental_percentage': 'district_vacation_leisure_rental_percentage',
    'empty': 'district_empty_apartments',
    'empty_percentage': 'district_empty_apartments_percentage',
    'avarage_living_space_m2_x': 'district_average_living_space_m2',
    'employed': 'district_employed',
    'unemployed': 'district_unemployed',
    'employed_percentage': 'district_employed_percentage',
    'unemployed_percentage': 'district_unemployed_percentage',
    'not_working': 'district_not_working',
    'not_working_percentage': 'district_not_working_percentage',
    'labor_force': 'district_labor_force',
    'male_labor_force': 'district_male_labor_force',
    'female_labor_force': 'district_female_labor_force',
    'total_households': 'district_total_households',
    'single_household': 'district_single_households',
    'couples_without_children': 'district_couples_without_children',
    'couples_with_children': 'district_couples_with_children',
    'single_parents': 'district_single_parents',
    'WG': 'district_shared_apartments',
    'only_seniors': 'district_only_seniors_households',
    'owner_percentage': 'district_apartment_owner_percentage',
    'tenant_percentage': 'district_apartment_tenant_percentage',
    'average_rooms': 'district_apartment_average_rooms',
    'average_person_per_household': 'district_average_persons_per_household',
    'average_years_of_residence': 'district_average_years_of_residence',
    'full_time_employees': 'district_full_time_employees',
    'median_income': 'district_median_income',
    'total_population': 'district_total_population',
    'men_population': 'district_male_population',
    'women_population': 'district_female_population',
    'single': 'district_single_population',
    'couples': 'district_couples_population',
    'widowed': 'district_widowed_population',
    'divorced': 'district_divorced_population',
    'other_civil_status': 'district_other_civil_status_population',
    'average_age': 'district_average_age',
    '<18': 'district_population_under_18',
    '18-29': 'district_population_18_29',
    '30-49': 'district_population_30_49',
    '50-64': 'district_population_50_64',
    '>65': 'district_population_65_plus',
    '<18_percentage': 'district_population_under_18_percentage',
    '18-29_percentage': 'district_population_18_29_percentage',
    '30-49_percentage': 'district_population_30_49_percentage',
    '50-64_percentage': 'district_population_50_64_percentage',
    '>65_percentage': 'district_population_65_plus_percentage',
    'couples_without_children_percentage': 'district_couples_without_children_percentage',
    'couples_with_children_percentage': 'district_couples_with_children_percentage',
    'owner': 'district_apartment_ownes',
    'tenant': 'district_apartment_tenants',
    'men_population_percentage': 'district_male_population_percentage',
    'women_population_percentage': 'district_female_population_percentage'
}, inplace=True)

In [44]:
df_census.head()

Unnamed: 0,bezirk,district_central_heating_percentage,district_heating,district_heating_percentage,floor_heating,district_floor_heating_percentage,block_heating,district_block_heating_percentage,stove_heating,district_stove_heating_percentage,no_heating,district_no_heating_percentage,gas_energy,district_gas_energy_percentage,oil_energy,district_oil_energy_percentage,mixed_energy_sources,district_mixed_energy_sources_percentage,solar_energy,district_solar_energy_percentage,wood_pellets_energy,district_wood_pellets_energy_percentage,biomass_energy,district_biomass_energy_percentage,electric_energy,district_electric_energy_percentage,coal_energy,district_coal_energy_percentage,no_energy_source,district_no_energy_source_percentage,<1950,district_housing_built_before_1950_percentage,1950-1969,1950-1969_percentage,1970-1989,1970-1989_percentage,1990-2009,1990-2009_percentage,>2010,district_housing_built_after_2010_percentage,district_total_apartments,occupied_by_owner,district_occupied_by_owner_percentage,residentual_rental,district_residential_rental_percentage,vacation_leisure_rental,district_vacation_leisure_rental_percentage,district_empty_apartments,district_empty_apartments_percentage,avarage_cold_rent_m2,district_average_living_space_m2,Unnamed: 51,vacancy_rate,district_employed,district_employed_percentage,district_unemployed,district_unemployed_percentage,district_not_working,district_not_working_percentage,district_labor_force,district_male_labor_force,district_female_labor_force,district_total_households,district_single_households,single_household_percentage,district_couples_without_children,district_couples_without_children_percentage,district_couples_with_children,district_couples_with_children_percentage,district_single_parents,single_parents_percentage,district_shared_apartments,WG_percentage,district_only_seniors_households,only_seniors_percentage,seniors_and_young_adults,seniors_and_young_adults_percentage,district_apartment_ownes,district_apartment_owner_percentage,district_apartment_tenants,district_apartment_tenant_percentage,EUR_per_squared_meter,1_person_EUR_per_squared_meter,2_person_EUR_per_squared_meter,3_person_EUR_per_squared_meter,4_person_EUR_per_squared_meter,5_person_EUR_per_squared_meter,6_person_EUR_per_squared_meter,district_apartment_average_rooms,1_person_average_rooms,2_person_average_rooms,3_person_average_rooms,4_person_average_rooms,5_person_average_rooms,6_person_average_rooms,district_average_persons_per_household,district_average_years_of_residence,avarage_living_space_m2_y,district_full_time_employees,district_median_income,district_total_population,district_male_population,district_male_population_percentage,district_female_population,district_female_population_percentage,district_single_population,single_percentage,district_couples_population,couples_percentage,district_widowed_population,widowed_percentage,district_divorced_population,divorced_percentage,district_other_civil_status_population,other_civil_status_percentage,district_average_age,district_population_under_18,district_population_under_18_percentage,district_population_18_29,district_population_18_29_percentage,district_population_30_49,district_population_30_49_percentage,district_population_50_64,district_population_50_64_percentage,district_population_65_plus,district_population_65_plus_percentage,<10,<10_percentage,10-19,10-19_percentage,20-29,20-29_percentage,30-39,30-39_percentage,40-49,40-49_percentage,50-59,50-59_percentage,60-69,60-69_percentage,70-79,70-79_percentage,>80,>80_percentage,district_min_rent_m2,district_avg_rent_m2,district_max_rent_m2,district_min_Buy_m2,district_avg_buy_m2
0,Charlottenburg-Wilmersdorf,45.4,8428.0,43.7,1828.0,9.5,161,0.8,91,0.5,11,0.1,7337.0,38.1,3148.0,16.3,8428.0,43.7,176.0,0.9,60,0.3,3,-,105,0.5,15,0.1,11,0.1,10545.0,54.7,5045.0,26.2,2090.0,10.8,882.0,4.6,719.0,3.7,194159.0,31643.0,16.3,157028.0,80.9,663,0.3,4821.0,2.5,8.43,77.7,,2.4,157160.0,51.1,13520.0,4.4,136880.0,44.5,170680.0,85750.0,84930.0,187477.0,120247.0,64.1,25457.0,13.6,22500.0,12.0,11882.0,6.3,7392.0,3.9,49968.0,26.7,10613.0,5.7,31398.0,16.7,156006.0,83.2,8.44,8.46,8.31,8.55,8.56,8.33,8.25,3.3,3.0,3.8,4.0,4.4,4.6,4.5,1.7,13.2,77.7,79654,4398,317079.0,150770.0,47.6,166238.0,52.4,162685.0,51.3,100977.0,31.9,18499.0,5.8,33622.0,10.6,1230.0,0.4,45.0,44463.0,14.0,45878.0,14.5,84811.0,26.8,67044.0,21.1,74816.0,23.6,25835.0,8.1,23348.0,7.4,41155.0,13.0,47036.0,14.8,37772.0,11.9,45909.0,14.5,39070.0,12.3,33657.0,10.6,23227.0,7.3,11.9675,16.9825,28.5675,3635.5,6172.25
1,Friedrichshain-Kreuzberg,40.8,4128.0,37.6,2016.0,18.4,207,1.9,147,1.3,3,-,6120.0,55.8,477.0,4.3,4128.0,37.6,35.0,0.3,44,0.4,35,0.3,83,0.8,55,0.5,3,-,6677.0,60.8,1844.0,16.8,1197.0,10.9,698.0,6.4,556.0,5.1,157555.0,13217.0,8.4,140960.0,89.5,199,0.1,3181.0,2.0,8.19,67.86,,1.9,151700.0,58.7,12760.0,4.9,94010.0,36.4,164470.0,86880.0,77590.0,153287.0,97888.0,63.9,15331.0,10.0,17855.0,11.6,12004.0,7.8,10212.0,6.7,17732.0,11.6,5108.0,3.3,13153.0,8.6,140111.0,91.4,8.27,8.43,8.18,8.02,7.66,7.26,7.68,3.1,2.8,3.5,3.7,4.0,4.1,4.1,1.7,11.8,67.86,73484,4526,264170.0,134035.0,50.8,130001.0,49.2,176057.0,66.7,60575.0,22.9,7386.0,2.8,18980.0,7.2,1031.0,0.4,38.3,41668.0,15.8,43274.0,16.4,105860.0,40.1,45261.0,17.1,27969.0,10.6,25570.0,9.7,19810.0,7.5,39556.0,15.0,62512.0,23.7,43350.0,16.4,32895.0,12.5,21132.0,8.0,11259.0,4.3,7947.0,3.0,14.17,16.73,29.185,3245.5,5253.5
2,Lichtenberg,43.6,7574.0,41.3,1854.0,10.1,311,1.7,544,3.0,58,0.3,9503.0,51.8,330.0,1.8,7574.0,41.3,563.0,3.1,75,0.4,8,-,191,1.0,43,0.2,58,0.3,6293.0,34.3,2012.0,11.0,3753.0,20.5,3140.0,17.1,3150.0,17.2,163232.0,10637.0,6.5,149897.0,91.8,61,0.0,2639.0,1.6,7.15,66.23,,1.6,137030.0,49.9,11370.0,4.1,126140.0,45.9,148400.0,77480.0,70920.0,159151.0,87517.0,55.0,28507.0,17.9,22364.0,14.1,13998.0,8.8,6762.0,4.2,35216.0,22.1,6663.0,4.2,10637.0,6.7,148503.0,93.3,7.1,7.35,6.68,6.9,6.91,6.68,7.04,3.4,2.9,3.7,4.1,4.4,4.6,4.6,1.8,14.1,66.23,80280,3613,290814.0,142883.0,49.2,147662.0,50.8,156038.0,53.7,89697.0,30.9,16704.0,5.7,26394.0,9.1,1709.0,0.6,41.8,51008.0,17.6,40028.0,13.8,90419.0,31.1,52386.0,18.0,56700.0,19.5,31366.0,10.8,24048.0,8.3,35622.0,12.3,54082.0,18.6,36336.0,12.5,34629.0,11.9,31927.0,11.0,22488.0,7.7,20039.0,6.9,9.588889,13.2,21.69,3296.777778,3967.555556
3,Marzahn-Hellersdorf,62.1,7411.0,22.6,3909.0,11.9,499,1.5,423,1.3,162,0.5,21209.0,64.7,1595.0,4.9,7411.0,22.6,1675.0,5.1,218,0.7,10,-,389,1.2,91,0.3,162,0.5,6904.0,21.1,1298.0,4.0,7740.0,23.6,12736.0,38.9,4082.0,12.5,140979.0,25563.0,18.1,113257.0,80.3,119,0.1,2036.0,1.4,6.29,72.72,,1.4,126110.0,48.0,10470.0,4.0,126050.0,48.0,136580.0,71210.0,65370.0,138234.0,65892.0,47.7,31850.0,23.0,22065.0,16.0,14321.0,10.4,4106.0,3.0,33619.0,24.3,8854.0,6.4,25563.0,18.5,112662.0,81.5,6.31,6.54,6.0,6.11,6.05,6.09,6.04,3.6,3.0,4.0,4.3,4.6,4.8,4.9,1.9,15.1,72.72,72606,3314,270010.0,132017.0,48.9,137991.0,51.1,126605.0,46.9,99564.0,36.9,15819.0,5.9,26895.0,10.0,1127.0,0.4,43.4,48900.0,18.1,30260.0,11.2,73473.0,27.2,58410.0,21.6,58966.0,21.8,28549.0,10.6,24728.0,9.2,25885.0,9.6,41214.0,15.3,32258.0,11.9,36825.0,13.6,40863.0,15.1,24091.0,8.9,15601.0,5.8,9.186,12.83,20.272,2678.4,3512.4
4,Mitte,39.0,6618.0,45.2,1902.0,13.0,229,1.6,161,1.1,13,0.1,6497.0,44.4,1110.0,7.6,6618.0,45.2,107.0,0.7,115,0.8,4,-,114,0.8,51,0.3,13,0.1,7492.0,51.2,2806.0,19.2,2097.0,14.3,1180.0,8.1,1051.0,7.2,211686.0,16107.0,7.6,190337.0,89.9,509,0.2,4736.0,2.2,8.3,66.91,,2.1,186360.0,53.9,18730.0,5.4,140570.0,40.7,205090.0,110580.0,94510.0,205002.0,131301.0,64.0,21302.0,10.4,25130.0,12.3,14416.0,7.0,12849.0,6.3,29794.0,14.5,8327.0,4.1,16074.0,7.8,188835.0,92.1,8.43,8.61,8.29,8.16,7.84,7.37,7.56,3.0,2.7,3.3,3.6,3.9,4.0,4.1,1.7,11.2,66.91,93275,4327,357322.0,182387.0,51.0,174931.0,49.0,220601.0,61.7,92491.0,25.9,13182.0,3.7,28702.0,8.0,2345.0,0.7,38.6,58113.0,16.3,69351.0,19.4,121889.0,34.1,61740.0,17.3,46229.0,12.9,34646.0,9.7,29294.0,8.2,63526.0,17.8,74888.0,21.0,46999.0,13.2,44286.0,12.4,30666.0,8.6,19092.0,5.3,13924.0,3.9,12.273333,16.106667,29.12,2804.0,4967.666667


In [45]:
# Remove columns that are not needed
df_census.drop(columns=[
    'floor_heating', 'block_heating', 'stove_heating', 'no_heating', 'gas_energy',
    'oil_energy', 'mixed_energy_sources', 'solar_energy', 'wood_pellets_energy',
    'biomass_energy', 'electric_energy', 'coal_energy', 'no_energy_source', '<1950',
    '1950-1969', '1970-1989', '1990-2009', '1990-2009', '>2010', 'occupied_by_owner',
    'residentual_rental', 'vacation_leisure_rental', 'avarage_cold_rent_m2',
    'Unnamed: 51', 'vacancy_rate', 'single_household_percentage', 'single_parents_percentage',
    'WG_percentage', 'only_seniors_percentage', 'seniors_and_young_adults', 'seniors_and_young_adults_percentage',
    'EUR_per_squared_meter', '1_person_EUR_per_squared_meter',
    '3_person_EUR_per_squared_meter', '4_person_EUR_per_squared_meter',
    '5_person_EUR_per_squared_meter', '6_person_EUR_per_squared_meter',
    '1_person_average_rooms', '3_person_average_rooms',
    '4_person_average_rooms', '5_person_average_rooms', '6_person_average_rooms',
    'avarage_living_space_m2_y', 'single_percentage', 'couples_percentage', 'widowed_percentage',
    'divorced_percentage', 'other_civil_status_percentage', '<10', '10-19', '20-29', '30-39', '40-49',
    '50-59', '60-69', '70-79', '>80', '<10_percentage', '10-19_percentage', '20-29_percentage',
    '30-39_percentage', '40-49_percentage', '50-59_percentage', '60-69_percentage',
    '70-79_percentage', '>80_percentage'], inplace=True)

In [46]:
# Clean up the bezirk values
df_census['bezirk'] = df_census['bezirk'].apply(clean_bezirk)

In [47]:
df_census.head(12)

Unnamed: 0,bezirk,district_central_heating_percentage,district_heating,district_heating_percentage,district_floor_heating_percentage,district_block_heating_percentage,district_stove_heating_percentage,district_no_heating_percentage,district_gas_energy_percentage,district_oil_energy_percentage,district_mixed_energy_sources_percentage,district_solar_energy_percentage,district_wood_pellets_energy_percentage,district_biomass_energy_percentage,district_electric_energy_percentage,district_coal_energy_percentage,district_no_energy_source_percentage,district_housing_built_before_1950_percentage,1950-1969_percentage,1970-1989_percentage,1990-2009_percentage,district_housing_built_after_2010_percentage,district_total_apartments,district_occupied_by_owner_percentage,district_residential_rental_percentage,district_vacation_leisure_rental_percentage,district_empty_apartments,district_empty_apartments_percentage,district_average_living_space_m2,district_employed,district_employed_percentage,district_unemployed,district_unemployed_percentage,district_not_working,district_not_working_percentage,district_labor_force,district_male_labor_force,district_female_labor_force,district_total_households,district_single_households,district_couples_without_children,district_couples_without_children_percentage,district_couples_with_children,district_couples_with_children_percentage,district_single_parents,district_shared_apartments,district_only_seniors_households,district_apartment_ownes,district_apartment_owner_percentage,district_apartment_tenants,district_apartment_tenant_percentage,2_person_EUR_per_squared_meter,district_apartment_average_rooms,2_person_average_rooms,district_average_persons_per_household,district_average_years_of_residence,district_full_time_employees,district_median_income,district_total_population,district_male_population,district_male_population_percentage,district_female_population,district_female_population_percentage,district_single_population,district_couples_population,district_widowed_population,district_divorced_population,district_other_civil_status_population,district_average_age,district_population_under_18,district_population_under_18_percentage,district_population_18_29,district_population_18_29_percentage,district_population_30_49,district_population_30_49_percentage,district_population_50_64,district_population_50_64_percentage,district_population_65_plus,district_population_65_plus_percentage,district_min_rent_m2,district_avg_rent_m2,district_max_rent_m2,district_min_Buy_m2,district_avg_buy_m2
0,charlottenburg-wilmersdorf,45.4,8428.0,43.7,9.5,0.8,0.5,0.1,38.1,16.3,43.7,0.9,0.3,-,0.5,0.1,0.1,54.7,26.2,10.8,4.6,3.7,194159.0,16.3,80.9,0.3,4821.0,2.5,77.7,157160.0,51.1,13520.0,4.4,136880.0,44.5,170680.0,85750.0,84930.0,187477.0,120247.0,25457.0,13.6,22500.0,12.0,11882.0,7392.0,49968.0,31398.0,16.7,156006.0,83.2,8.31,3.3,3.8,1.7,13.2,79654,4398,317079.0,150770.0,47.6,166238.0,52.4,162685.0,100977.0,18499.0,33622.0,1230.0,45.0,44463.0,14.0,45878.0,14.5,84811.0,26.8,67044.0,21.1,74816.0,23.6,11.9675,16.9825,28.5675,3635.5,6172.25
1,friedrichshain-kreuzberg,40.8,4128.0,37.6,18.4,1.9,1.3,-,55.8,4.3,37.6,0.3,0.4,0.3,0.8,0.5,-,60.8,16.8,10.9,6.4,5.1,157555.0,8.4,89.5,0.1,3181.0,2.0,67.86,151700.0,58.7,12760.0,4.9,94010.0,36.4,164470.0,86880.0,77590.0,153287.0,97888.0,15331.0,10.0,17855.0,11.6,12004.0,10212.0,17732.0,13153.0,8.6,140111.0,91.4,8.18,3.1,3.5,1.7,11.8,73484,4526,264170.0,134035.0,50.8,130001.0,49.2,176057.0,60575.0,7386.0,18980.0,1031.0,38.3,41668.0,15.8,43274.0,16.4,105860.0,40.1,45261.0,17.1,27969.0,10.6,14.17,16.73,29.185,3245.5,5253.5
2,lichtenberg,43.6,7574.0,41.3,10.1,1.7,3.0,0.3,51.8,1.8,41.3,3.1,0.4,-,1.0,0.2,0.3,34.3,11.0,20.5,17.1,17.2,163232.0,6.5,91.8,0.0,2639.0,1.6,66.23,137030.0,49.9,11370.0,4.1,126140.0,45.9,148400.0,77480.0,70920.0,159151.0,87517.0,28507.0,17.9,22364.0,14.1,13998.0,6762.0,35216.0,10637.0,6.7,148503.0,93.3,6.68,3.4,3.7,1.8,14.1,80280,3613,290814.0,142883.0,49.2,147662.0,50.8,156038.0,89697.0,16704.0,26394.0,1709.0,41.8,51008.0,17.6,40028.0,13.8,90419.0,31.1,52386.0,18.0,56700.0,19.5,9.588889,13.2,21.69,3296.777778,3967.555556
3,marzahn-hellersdorf,62.1,7411.0,22.6,11.9,1.5,1.3,0.5,64.7,4.9,22.6,5.1,0.7,-,1.2,0.3,0.5,21.1,4.0,23.6,38.9,12.5,140979.0,18.1,80.3,0.1,2036.0,1.4,72.72,126110.0,48.0,10470.0,4.0,126050.0,48.0,136580.0,71210.0,65370.0,138234.0,65892.0,31850.0,23.0,22065.0,16.0,14321.0,4106.0,33619.0,25563.0,18.5,112662.0,81.5,6.0,3.6,4.0,1.9,15.1,72606,3314,270010.0,132017.0,48.9,137991.0,51.1,126605.0,99564.0,15819.0,26895.0,1127.0,43.4,48900.0,18.1,30260.0,11.2,73473.0,27.2,58410.0,21.6,58966.0,21.8,9.186,12.83,20.272,2678.4,3512.4
4,mitte,39.0,6618.0,45.2,13.0,1.6,1.1,0.1,44.4,7.6,45.2,0.7,0.8,-,0.8,0.3,0.1,51.2,19.2,14.3,8.1,7.2,211686.0,7.6,89.9,0.2,4736.0,2.2,66.91,186360.0,53.9,18730.0,5.4,140570.0,40.7,205090.0,110580.0,94510.0,205002.0,131301.0,21302.0,10.4,25130.0,12.3,14416.0,12849.0,29794.0,16074.0,7.8,188835.0,92.1,8.29,3.0,3.3,1.7,11.2,93275,4327,357322.0,182387.0,51.0,174931.0,49.0,220601.0,92491.0,13182.0,28702.0,2345.0,38.6,58113.0,16.3,69351.0,19.4,121889.0,34.1,61740.0,17.3,46229.0,12.9,12.273333,16.106667,29.12,2804.0,4967.666667
5,neukoelln,63.7,5596.0,19.5,12.9,1.1,2.5,0.3,49.1,26.7,19.5,1.6,0.6,-,2.0,0.3,0.3,36.3,20.6,27.2,11.4,4.5,167353.0,14.2,83.5,0.1,3577.0,2.1,69.82,141930.0,47.6,15560.0,5.2,140580.0,47.2,157490.0,84110.0,73380.0,162764.0,93451.0,22703.0,13.9,24215.0,14.9,12446.0,9946.0,33202.0,23779.0,14.6,138969.0,85.4,7.4,3.3,3.6,1.9,13.0,66456,3684,305405.0,150204.0,49.2,154818.0,50.8,166541.0,93961.0,16946.0,26528.0,1046.0,41.4,50826.0,16.7,44104.0,14.5,98245.0,32.2,57515.0,18.9,54334.0,17.8,9.68,13.66,22.16,3061.5,3996.75
6,pankow,58.3,8095.0,20.6,16.6,2.3,1.8,0.5,68.2,4.1,20.6,4.2,0.8,-,1.2,0.6,0.5,44.2,10.2,7.3,28.3,10.0,224212.0,15.9,82.0,0.2,4292.0,1.9,73.01,221880.0,57.1,13710.0,3.5,152800.0,39.3,235590.0,118710.0,116880.0,218848.0,122955.0,33914.0,15.5,34065.0,15.6,19293.0,8620.0,37365.0,35496.0,16.2,183282.0,83.7,7.86,3.4,3.7,1.8,13.4,114050,4424,400507.0,194348.0,48.5,206062.0,51.5,231392.0,117418.0,17643.0,32686.0,1274.0,40.8,72348.0,18.1,49501.0,12.4,136137.0,34.0,81843.0,20.4,60581.0,15.1,10.796364,14.030909,22.812727,3329.363636,4171.181818
7,reinickendorf,76.0,3026.0,8.5,11.3,1.6,2.2,0.3,53.9,32.0,8.5,2.5,0.7,-,1.9,0.1,0.3,39.9,21.3,23.8,10.7,4.3,132678.0,22.3,75.5,0.2,2724.0,2.1,78.04,107650.0,43.9,10670.0,4.4,126840.0,51.7,118320.0,62570.0,55750.0,128677.0,65714.0,25821.0,20.1,21710.0,16.9,11292.0,4145.0,35451.0,29315.0,22.8,99341.0,77.2,7.03,3.7,4.0,1.9,14.0,54364,3656,252941.0,123026.0,48.6,129916.0,51.4,115412.0,93866.0,17763.0,24918.0,979.0,44.3,43997.0,17.4,33295.0,13.2,61597.0,24.4,54289.0,21.5,59762.0,23.6,10.326667,13.957778,22.038889,3469.444444,4169.777778
8,spandau,68.3,3822.0,13.2,12.3,3.1,2.7,0.4,53.4,26.0,13.2,3.4,0.9,-,2.6,0.1,0.4,30.7,21.1,22.0,18.1,8.1,126796.0,18.0,79.5,0.1,3028.0,2.4,74.01,102020.0,44.4,10430.0,4.5,117280.0,51.1,112450.0,59310.0,53140.0,121144.0,62696.0,22087.0,18.2,20414.0,16.9,11929.0,4017.0,30762.0,22443.0,18.5,98687.0,81.5,7.05,3.5,3.8,1.9,13.1,54518,3512,237759.0,115185.0,48.4,122573.0,51.6,111666.0,85270.0,15946.0,23597.0,1276.0,43.0,44020.0,18.5,32257.0,13.6,60930.0,25.6,49334.0,20.7,51214.0,21.5,10.1,13.336667,21.436667,3349.0,3916.5
9,steglitz-zehlendorf,73.3,6538.0,15.8,8.6,1.3,0.8,0.2,51.9,28.3,15.8,2.3,0.5,0.1,0.8,-,0.2,43.8,23.8,18.0,9.7,4.7,162192.0,26.7,70.7,0.2,3919.0,2.4,85.95,139990.0,48.9,8910.0,3.1,137370.0,48.0,148900.0,74210.0,74690.0,155286.0,82458.0,30370.0,19.6,25973.0,16.7,11554.0,4931.0,46169.0,42413.0,27.3,112828.0,72.7,7.98,3.8,4.1,1.9,14.1,63689,4356,295130.0,137793.0,46.7,157329.0,53.3,133672.0,111516.0,20627.0,28419.0,883.0,46.1,47597.0,16.1,36074.0,12.2,69170.0,23.4,65574.0,22.2,76705.0,26.0,12.067143,16.101429,24.895714,3856.0,5703.571429


In [48]:
df_census.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 84 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   bezirk                                         12 non-null     object 
 1   district_central_heating_percentage            12 non-null     float64
 2   district_heating                               12 non-null     float64
 3   district_heating_percentage                    12 non-null     float64
 4   district_floor_heating_percentage              12 non-null     float64
 5   district_block_heating_percentage              12 non-null     float64
 6   district_stove_heating_percentage              12 non-null     float64
 7   district_no_heating_percentage                 12 non-null     object 
 8   district_gas_energy_percentage                 12 non-null     float64
 9   district_oil_energy_percentage                 12 non-nu

In [49]:
for col in df_census.columns.difference(['bezirk']):
    try:
        # Convert everything to string for replacement
        df_census[col] = df_census[col].astype(str).str.replace(",", "").str.strip()
        
        # Use float if any value contains "."
        if df_census[col].str.contains(".").any():
            df_census[col] = pd.to_numeric(df_census[col], errors='coerce').astype(float)
        else:
            df_census[col] = pd.to_numeric(df_census[col], errors='coerce').astype(int)
    
    except Exception as e:
        print(f"Error processing column '{col}': {e}")

In [50]:
df_census.head(12)

Unnamed: 0,bezirk,district_central_heating_percentage,district_heating,district_heating_percentage,district_floor_heating_percentage,district_block_heating_percentage,district_stove_heating_percentage,district_no_heating_percentage,district_gas_energy_percentage,district_oil_energy_percentage,district_mixed_energy_sources_percentage,district_solar_energy_percentage,district_wood_pellets_energy_percentage,district_biomass_energy_percentage,district_electric_energy_percentage,district_coal_energy_percentage,district_no_energy_source_percentage,district_housing_built_before_1950_percentage,1950-1969_percentage,1970-1989_percentage,1990-2009_percentage,district_housing_built_after_2010_percentage,district_total_apartments,district_occupied_by_owner_percentage,district_residential_rental_percentage,district_vacation_leisure_rental_percentage,district_empty_apartments,district_empty_apartments_percentage,district_average_living_space_m2,district_employed,district_employed_percentage,district_unemployed,district_unemployed_percentage,district_not_working,district_not_working_percentage,district_labor_force,district_male_labor_force,district_female_labor_force,district_total_households,district_single_households,district_couples_without_children,district_couples_without_children_percentage,district_couples_with_children,district_couples_with_children_percentage,district_single_parents,district_shared_apartments,district_only_seniors_households,district_apartment_ownes,district_apartment_owner_percentage,district_apartment_tenants,district_apartment_tenant_percentage,2_person_EUR_per_squared_meter,district_apartment_average_rooms,2_person_average_rooms,district_average_persons_per_household,district_average_years_of_residence,district_full_time_employees,district_median_income,district_total_population,district_male_population,district_male_population_percentage,district_female_population,district_female_population_percentage,district_single_population,district_couples_population,district_widowed_population,district_divorced_population,district_other_civil_status_population,district_average_age,district_population_under_18,district_population_under_18_percentage,district_population_18_29,district_population_18_29_percentage,district_population_30_49,district_population_30_49_percentage,district_population_50_64,district_population_50_64_percentage,district_population_65_plus,district_population_65_plus_percentage,district_min_rent_m2,district_avg_rent_m2,district_max_rent_m2,district_min_Buy_m2,district_avg_buy_m2
0,charlottenburg-wilmersdorf,45.4,8428.0,43.7,9.5,0.8,0.5,0.1,38.1,16.3,43.7,0.9,0.3,,0.5,0.1,0.1,54.7,26.2,10.8,4.6,3.7,194159.0,16.3,80.9,0.3,4821.0,2.5,77.7,157160.0,51.1,13520.0,4.4,136880.0,44.5,170680.0,85750.0,84930.0,187477.0,120247.0,25457.0,13.6,22500.0,12.0,11882.0,7392.0,49968.0,31398.0,16.7,156006.0,83.2,8.31,3.3,3.8,1.7,13.2,79654.0,4398.0,317079.0,150770.0,47.6,166238.0,52.4,162685.0,100977.0,18499.0,33622.0,1230.0,45.0,44463.0,14.0,45878.0,14.5,84811.0,26.8,67044.0,21.1,74816.0,23.6,11.9675,16.9825,28.5675,3635.5,6172.25
1,friedrichshain-kreuzberg,40.8,4128.0,37.6,18.4,1.9,1.3,,55.8,4.3,37.6,0.3,0.4,0.3,0.8,0.5,,60.8,16.8,10.9,6.4,5.1,157555.0,8.4,89.5,0.1,3181.0,2.0,67.86,151700.0,58.7,12760.0,4.9,94010.0,36.4,164470.0,86880.0,77590.0,153287.0,97888.0,15331.0,10.0,17855.0,11.6,12004.0,10212.0,17732.0,13153.0,8.6,140111.0,91.4,8.18,3.1,3.5,1.7,11.8,73484.0,4526.0,264170.0,134035.0,50.8,130001.0,49.2,176057.0,60575.0,7386.0,18980.0,1031.0,38.3,41668.0,15.8,43274.0,16.4,105860.0,40.1,45261.0,17.1,27969.0,10.6,14.17,16.73,29.185,3245.5,5253.5
2,lichtenberg,43.6,7574.0,41.3,10.1,1.7,3.0,0.3,51.8,1.8,41.3,3.1,0.4,,1.0,0.2,0.3,34.3,11.0,20.5,17.1,17.2,163232.0,6.5,91.8,0.0,2639.0,1.6,66.23,137030.0,49.9,11370.0,4.1,126140.0,45.9,148400.0,77480.0,70920.0,159151.0,87517.0,28507.0,17.9,22364.0,14.1,13998.0,6762.0,35216.0,10637.0,6.7,148503.0,93.3,6.68,3.4,3.7,1.8,14.1,80280.0,3613.0,290814.0,142883.0,49.2,147662.0,50.8,156038.0,89697.0,16704.0,26394.0,1709.0,41.8,51008.0,17.6,40028.0,13.8,90419.0,31.1,52386.0,18.0,56700.0,19.5,9.588889,13.2,21.69,3296.777778,3967.555556
3,marzahn-hellersdorf,62.1,7411.0,22.6,11.9,1.5,1.3,0.5,64.7,4.9,22.6,5.1,0.7,,1.2,0.3,0.5,21.1,4.0,23.6,38.9,12.5,140979.0,18.1,80.3,0.1,2036.0,1.4,72.72,126110.0,48.0,10470.0,4.0,126050.0,48.0,136580.0,71210.0,65370.0,138234.0,65892.0,31850.0,23.0,22065.0,16.0,14321.0,4106.0,33619.0,25563.0,18.5,112662.0,81.5,6.0,3.6,4.0,1.9,15.1,72606.0,3314.0,270010.0,132017.0,48.9,137991.0,51.1,126605.0,99564.0,15819.0,26895.0,1127.0,43.4,48900.0,18.1,30260.0,11.2,73473.0,27.2,58410.0,21.6,58966.0,21.8,9.186,12.83,20.272,2678.4,3512.4
4,mitte,39.0,6618.0,45.2,13.0,1.6,1.1,0.1,44.4,7.6,45.2,0.7,0.8,,0.8,0.3,0.1,51.2,19.2,14.3,8.1,7.2,211686.0,7.6,89.9,0.2,4736.0,2.2,66.91,186360.0,53.9,18730.0,5.4,140570.0,40.7,205090.0,110580.0,94510.0,205002.0,131301.0,21302.0,10.4,25130.0,12.3,14416.0,12849.0,29794.0,16074.0,7.8,188835.0,92.1,8.29,3.0,3.3,1.7,11.2,93275.0,4327.0,357322.0,182387.0,51.0,174931.0,49.0,220601.0,92491.0,13182.0,28702.0,2345.0,38.6,58113.0,16.3,69351.0,19.4,121889.0,34.1,61740.0,17.3,46229.0,12.9,12.273333,16.106667,29.12,2804.0,4967.666667
5,neukoelln,63.7,5596.0,19.5,12.9,1.1,2.5,0.3,49.1,26.7,19.5,1.6,0.6,,2.0,0.3,0.3,36.3,20.6,27.2,11.4,4.5,167353.0,14.2,83.5,0.1,3577.0,2.1,69.82,141930.0,47.6,15560.0,5.2,140580.0,47.2,157490.0,84110.0,73380.0,162764.0,93451.0,22703.0,13.9,24215.0,14.9,12446.0,9946.0,33202.0,23779.0,14.6,138969.0,85.4,7.4,3.3,3.6,1.9,13.0,66456.0,3684.0,305405.0,150204.0,49.2,154818.0,50.8,166541.0,93961.0,16946.0,26528.0,1046.0,41.4,50826.0,16.7,44104.0,14.5,98245.0,32.2,57515.0,18.9,54334.0,17.8,9.68,13.66,22.16,3061.5,3996.75
6,pankow,58.3,8095.0,20.6,16.6,2.3,1.8,0.5,68.2,4.1,20.6,4.2,0.8,,1.2,0.6,0.5,44.2,10.2,7.3,28.3,10.0,224212.0,15.9,82.0,0.2,4292.0,1.9,73.01,221880.0,57.1,13710.0,3.5,152800.0,39.3,235590.0,118710.0,116880.0,218848.0,122955.0,33914.0,15.5,34065.0,15.6,19293.0,8620.0,37365.0,35496.0,16.2,183282.0,83.7,7.86,3.4,3.7,1.8,13.4,114050.0,4424.0,400507.0,194348.0,48.5,206062.0,51.5,231392.0,117418.0,17643.0,32686.0,1274.0,40.8,72348.0,18.1,49501.0,12.4,136137.0,34.0,81843.0,20.4,60581.0,15.1,10.796364,14.030909,22.812727,3329.363636,4171.181818
7,reinickendorf,76.0,3026.0,8.5,11.3,1.6,2.2,0.3,53.9,32.0,8.5,2.5,0.7,,1.9,0.1,0.3,39.9,21.3,23.8,10.7,4.3,132678.0,22.3,75.5,0.2,2724.0,2.1,78.04,107650.0,43.9,10670.0,4.4,126840.0,51.7,118320.0,62570.0,55750.0,128677.0,65714.0,25821.0,20.1,21710.0,16.9,11292.0,4145.0,35451.0,29315.0,22.8,99341.0,77.2,7.03,3.7,4.0,1.9,14.0,54364.0,3656.0,252941.0,123026.0,48.6,129916.0,51.4,115412.0,93866.0,17763.0,24918.0,979.0,44.3,43997.0,17.4,33295.0,13.2,61597.0,24.4,54289.0,21.5,59762.0,23.6,10.326667,13.957778,22.038889,3469.444444,4169.777778
8,spandau,68.3,3822.0,13.2,12.3,3.1,2.7,0.4,53.4,26.0,13.2,3.4,0.9,,2.6,0.1,0.4,30.7,21.1,22.0,18.1,8.1,126796.0,18.0,79.5,0.1,3028.0,2.4,74.01,102020.0,44.4,10430.0,4.5,117280.0,51.1,112450.0,59310.0,53140.0,121144.0,62696.0,22087.0,18.2,20414.0,16.9,11929.0,4017.0,30762.0,22443.0,18.5,98687.0,81.5,7.05,3.5,3.8,1.9,13.1,54518.0,3512.0,237759.0,115185.0,48.4,122573.0,51.6,111666.0,85270.0,15946.0,23597.0,1276.0,43.0,44020.0,18.5,32257.0,13.6,60930.0,25.6,49334.0,20.7,51214.0,21.5,10.1,13.336667,21.436667,3349.0,3916.5
9,steglitz-zehlendorf,73.3,6538.0,15.8,8.6,1.3,0.8,0.2,51.9,28.3,15.8,2.3,0.5,0.1,0.8,,0.2,43.8,23.8,18.0,9.7,4.7,162192.0,26.7,70.7,0.2,3919.0,2.4,85.95,139990.0,48.9,8910.0,3.1,137370.0,48.0,148900.0,74210.0,74690.0,155286.0,82458.0,30370.0,19.6,25973.0,16.7,11554.0,4931.0,46169.0,42413.0,27.3,112828.0,72.7,7.98,3.8,4.1,1.9,14.1,63689.0,4356.0,295130.0,137793.0,46.7,157329.0,53.3,133672.0,111516.0,20627.0,28419.0,883.0,46.1,47597.0,16.1,36074.0,12.2,69170.0,23.4,65574.0,22.2,76705.0,26.0,12.067143,16.101429,24.895714,3856.0,5703.571429


In [51]:
# Replace NaN with 0
df_census.fillna(0, inplace=True)

In [53]:
# Merge dataframes df_master_districts and df_census on bezirk
df_master_districts_final = df_master_district.merge(df_census, on='bezirk', how='outer')

In [1]:
df_master_districts_final.shape

NameError: name 'df_master_districts_final' is not defined

In [55]:
# Save to master tables folder as csv
df_master_districts_final.to_csv('../data/cleaned_data/berlin_bezrik_master_table.csv', index=False)