In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df_ort_population = pd.read_csv('../data/raw_data/berlin_housing/extracted/berlin_subdistrict_population_2020.csv')

In [3]:
df_ort_population.head(15)

Unnamed: 0,Bezirk,Ortsteil,Gender,Nationality,Age group,Frequency
0,Mitte,Mitte,1,A,00_05,618
1,Mitte,Mitte,1,A,05_10,461
2,Mitte,Mitte,1,A,10_15,351
3,Mitte,Mitte,1,A,15_20,330
4,Mitte,Mitte,1,A,20_25,1155
5,Mitte,Mitte,1,A,25_30,2368
6,Mitte,Mitte,1,A,30_35,3149
7,Mitte,Mitte,1,A,35_40,2468
8,Mitte,Mitte,1,A,40_45,1644
9,Mitte,Mitte,1,A,45_50,1117


In [4]:
# Pivot the table
# Pivot by Ortsteil and Age group
df_subdistrict_age = df_ort_population.pivot_table(
    index=['Bezirk', 'Ortsteil'],
    columns='Age group',
    values='Frequency',
    aggfunc='sum'  # In case of duplicates
)

# Reset index to bring Bezirk + Ortsteil back as columns
df_subdistrict_age = df_subdistrict_age.reset_index()


In [5]:
df_subdistrict_age.head()

Age group,Bezirk,Ortsteil,00_05,05_10,10_15,15_20,20_25,25_30,30_35,35_40,...,50_55,55_60,60_65,65_70,70_75,75_80,80_85,85_90,90_95,95 und älter
0,Charlottenburg-Wilmersdorf,Charlottenburg,5217.0,4445.0,4102.0,4245.0,7582.0,10992.0,12145.0,10253.0,...,9354.0,9996.0,8469.0,7377.0,6772.0,5507.0,3737.0,1567.0,632.0,201.0
1,Charlottenburg-Wilmersdorf,Charlottenburg-Nord,976.0,893.0,872.0,792.0,1108.0,1317.0,1525.0,1296.0,...,1342.0,1381.0,1178.0,987.0,832.0,877.0,861.0,424.0,192.0,64.0
2,Charlottenburg-Wilmersdorf,Grunewald,479.0,505.0,461.0,466.0,450.0,484.0,558.0,622.0,...,841.0,838.0,695.0,633.0,700.0,857.0,659.0,297.0,161.0,88.0
3,Charlottenburg-Wilmersdorf,Halensee,630.0,507.0,492.0,534.0,782.0,1130.0,1348.0,1151.0,...,1168.0,1219.0,1004.0,978.0,852.0,821.0,580.0,216.0,83.0,33.0
4,Charlottenburg-Wilmersdorf,Schmargendorf,889.0,909.0,871.0,842.0,1162.0,1223.0,1393.0,1497.0,...,1701.0,1791.0,1402.0,1352.0,1408.0,1530.0,1138.0,546.0,227.0,111.0


In [6]:
df_subdistrict_age = df_subdistrict_age.rename(columns={
    'Bezirk': 'bezirk',
    'Ortsteil': 'ortsteil',
    '00_05': 'subdistrict_population_age_0_5',
    '05_10': 'subdistrict_population_age_5_10',
    '10_15': 'subdistrict_population_age_10_15',
    '15_20': 'subdistrict_population_age_15_20',
    '20_25': 'subdistrict_population_age_20_25',
    '25_30': 'subdistrict_population_age_25_30',
    '30_35': 'subdistrict_population_age_30_35',
    '35_40': 'subdistrict_population_age_35_40',
    '40_45': 'subdistrict_population_age_40_45',
    '45_50': 'subdistrict_population_age_45_50',
    '50_55': 'subdistrict_population_age_50_55',
    '55_60': 'subdistrict_population_age_55_60',
    '60_65': 'subdistrict_population_age_60_65',
    '65_70': 'subdistrict_population_age_65_70',
    '70_75': 'subdistrict_population_age_70_75',
    '75_80': 'subdistrict_population_age_75_80',
    '80_85': 'subdistrict_population_age_80_85',
    '85_90': 'subdistrict_population_age_85_90',
    '90_95': 'subdistrict_population_age_90_95',
    '95 und älter': 'subdistrict_population_age_95_plus'
})

# Rename index
df_subdistrict_age.reset_index(drop=True, inplace=True)


In [7]:
df_subdistrict_gender = df_ort_population.pivot_table(
    index=['Bezirk', 'Ortsteil'],
    columns='Gender',
    values='Frequency',
    aggfunc='sum'  # In case of duplicates
)

df_subdistrict_gender = df_subdistrict_gender.reset_index()


In [8]:
df_subdistrict_gender.head()

Gender,Bezirk,Ortsteil,1,2
0,Charlottenburg-Wilmersdorf,Charlottenburg,63894,65465
1,Charlottenburg-Wilmersdorf,Charlottenburg-Nord,9491,9931
2,Charlottenburg-Wilmersdorf,Grunewald,5353,5823
3,Charlottenburg-Wilmersdorf,Halensee,7567,7930
4,Charlottenburg-Wilmersdorf,Schmargendorf,10570,12163


In [9]:
# Rename columns
df_subdistrict_gender.rename(columns={
    'Bezirk': 'bezirk',
    'Ortsteil': 'ortsteil',
    1: 'subdistrict_male_population',
    2: 'subdistrict_female_population'
}, inplace=True)

In [10]:
# Rename bezirk values for consistency
def clean_bezirk(bezirk):
    # Replace ö with oe
    bezirk = bezirk.replace('ö', 'oe').replace('Ö', 'oe')

    # Remove dots
    bezirk = bezirk.replace('.', '')

    # Lowercase and replace spaces with hyphens
    bezirk = bezirk.lower().replace(' ', '-')

    # Mapping for specific bezirk names
    bezirk_mapping = {
        'charlbg-wilmersd': 'charlottenburg-wilmersdorf',
        'friedrh-kreuzb': 'friedrichshain-kreuzberg',
        'marzahn-hellersd': 'marzahn-hellersdorf',
        'steglitz-zehlend': 'steglitz-zehlendorf',
        'tempelh-schoeneb': 'tempelhof-schoeneberg'
    }

    # Check if the bezirk is in the mapping, otherwise return the cleaned version
    return bezirk_mapping.get(bezirk, bezirk) or bezirk
    

In [11]:
df_subdistrict_age['bezirk'] = df_subdistrict_age['bezirk'].apply(clean_bezirk)
df_subdistrict_gender['bezirk'] = df_subdistrict_gender['bezirk'].apply(clean_bezirk)

In [12]:
def clean_ortsteil(ortsteil):
    ortsteil = ortsteil.lower()

    return ortsteil

In [13]:
df_subdistrict_age['ortsteil'] = df_subdistrict_age['ortsteil'].apply(clean_ortsteil)
df_subdistrict_gender['ortsteil'] = df_subdistrict_gender['ortsteil'].apply(clean_ortsteil)

In [14]:
df_subdistrict_population_final = pd.merge(
    df_subdistrict_age,
    df_subdistrict_gender,
    on=['ortsteil', 'bezirk'],
    how='outer'
)

In [15]:
df_subdistrict_population_final.head(100)

Unnamed: 0,bezirk,ortsteil,subdistrict_population_age_0_5,subdistrict_population_age_5_10,subdistrict_population_age_10_15,subdistrict_population_age_15_20,subdistrict_population_age_20_25,subdistrict_population_age_25_30,subdistrict_population_age_30_35,subdistrict_population_age_35_40,...,subdistrict_population_age_60_65,subdistrict_population_age_65_70,subdistrict_population_age_70_75,subdistrict_population_age_75_80,subdistrict_population_age_80_85,subdistrict_population_age_85_90,subdistrict_population_age_90_95,subdistrict_population_age_95_plus,subdistrict_male_population,subdistrict_female_population
0,treptow-koepenick,adlershof,1177.0,846.0,710.0,671.0,1410.0,1765.0,2274.0,1833.0,...,991.0,833.0,607.0,764.0,762.0,383.0,150.0,22.0,10200,10010
1,lichtenberg,alt-hohenschönhausen,2960.0,2689.0,2308.0,1975.0,2360.0,2943.0,4609.0,3963.0,...,2956.0,2891.0,2392.0,2175.0,1729.0,869.0,344.0,64.0,24655,25415
2,treptow-koepenick,alt-treptow,840.0,727.0,560.0,450.0,457.0,895.0,1617.0,1685.0,...,506.0,339.0,252.0,284.0,230.0,146.0,59.0,10.0,6579,6588
3,treptow-koepenick,altglienicke,1724.0,1788.0,1631.0,1391.0,1417.0,1411.0,2162.0,2134.0,...,1960.0,1424.0,1043.0,1056.0,901.0,420.0,156.0,31.0,14638,14957
4,treptow-koepenick,baumschulenweg,880.0,732.0,620.0,569.0,866.0,1374.0,2107.0,1669.0,...,915.0,749.0,569.0,775.0,1257.0,744.0,238.0,73.0,9070,9824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,pankow,wilhelmsruh,409.0,365.0,355.0,314.0,343.0,406.0,626.0,653.0,...,484.0,409.0,350.0,420.0,434.0,179.0,33.0,10.0,3857,4266
93,spandau,wilhelmstadt,1807.0,1769.0,1745.0,1674.0,2230.0,2706.0,2891.0,2713.0,...,2593.0,2288.0,2073.0,2207.0,1832.0,781.0,289.0,72.0,19787,20676
94,charlottenburg-wilmersdorf,wilmersdorf,3957.0,3699.0,3599.0,3534.0,5174.0,7008.0,8202.0,7237.0,...,6665.0,6308.0,5920.0,5510.0,4065.0,1669.0,671.0,288.0,49306,52571
95,reinickendorf,wittenau,952.0,923.0,831.0,969.0,1383.0,1506.0,1602.0,1431.0,...,1685.0,1459.0,1358.0,1654.0,1426.0,666.0,231.0,59.0,12430,12296


In [16]:
df_subdistrict_population_final['total_population'] = df_subdistrict_population_final['subdistrict_male_population'] + df_subdistrict_population_final['subdistrict_female_population']

In [17]:
df_subdistrict_population_final['subdistrict_senior_population'] = (
    df_subdistrict_population_final['subdistrict_population_age_65_70'] +
    df_subdistrict_population_final['subdistrict_population_age_70_75'] +
    df_subdistrict_population_final['subdistrict_population_age_75_80'] +
    df_subdistrict_population_final['subdistrict_population_age_80_85'] +
    df_subdistrict_population_final['subdistrict_population_age_85_90'] +
    df_subdistrict_population_final['subdistrict_population_age_90_95'] +
    df_subdistrict_population_final['subdistrict_population_age_95_plus']
)

df_subdistrict_population_final['subdistrict_youth_population'] = (
    df_subdistrict_population_final['subdistrict_population_age_0_5'] +
    df_subdistrict_population_final['subdistrict_population_age_5_10'] +
    df_subdistrict_population_final['subdistrict_population_age_10_15'] +
    df_subdistrict_population_final['subdistrict_population_age_15_20']
)

df_subdistrict_population_final['subdistrict_senior_share'] = df_subdistrict_population_final['subdistrict_senior_population'] / df_subdistrict_population_final['total_population']
df_subdistrict_population_final['subdistrict_youth_share'] = df_subdistrict_population_final['subdistrict_youth_population'] / df_subdistrict_population_final['total_population']

df_subdistrict_population_final['subdistrict_middle_age_population'] = df_subdistrict_population_final['total_population'] - (
    df_subdistrict_population_final['subdistrict_youth_population'] + df_subdistrict_population_final['subdistrict_senior_population']
)

df_subdistrict_population_final['subdistrict_middle_age_population'] = df_subdistrict_population_final['subdistrict_middle_age_population'] / df_subdistrict_population_final['total_population']


In [18]:
# Save to CSV
#df_subdistrict_population_final.to_csv('../data/cleaned_data/berlin_ortsteil_population.csv', index=False)

# Map Street Level Data to Districts and Subdistricts

In [19]:
df_streets = pd.read_csv('../data/raw_data/spatial_data/berlin_street_level.csv')

In [20]:
df_streets.head()

Unnamed: 0,element_nr,strassensc,strassenna,str_bez,strassenkl,strassen_1,strassen_2,verkehrsri,bezirk,stadtteil,verkehrseb,beginnt_be,endet_bei_,laenge,gueltig_vo,okstra_id
0,34610003_34610004.01,2,Aalemannufer,,IV,G,STRA,B,Spandau,Hakenfelde,0,34610003,34610004,262.5,2010/01/01 00:00:00.000,D62521E5E27544729878420C54E6C59C
1,40540001_41540003.01,5,Abbestraße,,V,G,STRA,B,Charlottenburg-Wilmersdorf,Charlottenburg,0,40540001,41540003,182.45,2010/01/01 00:00:00.000,275EE05309AF45DCA49E046BBA0CBBCC
2,42590002_42590001.01,22,Afrikanische Straße,,II,G,STRA,B,Mitte,Wedding,0,42590002,42590001,65.36,2010/01/01 00:00:00.000,18F55F73EB5346F6A7A719E970B9D4EC
3,42590003_42590004.01,22,Afrikanische Straße,,II,G,STRA,B,Mitte,Wedding,0,42590003,42590004,61.41,2010/01/01 00:00:00.000,AE661AB3DB344DD183550F05E15A4BD7
4,42590005_42590006.01,22,Afrikanische Straße,,II,G,STRA,B,Mitte,Wedding,0,42590005,42590006,45.36,2010/01/01 00:00:00.000,D47D9FE02F13481FA79966646BA478FA


In [21]:
df_streets['strassenna'] = df_streets['strassenna'].str.strip().str.lower()
df_streets['bezirk'] = df_streets['bezirk'].str.strip().str.lower()
df_streets['stadtteil'] = df_streets['stadtteil'].str.strip().str.lower()

In [22]:
street_lookup = (
    df_streets[['strassenna', 'bezirk', 'stadtteil']]
    .drop_duplicates()
    .rename(columns={
        'strassenna': 'street_name',
        'stadtteil': 'ortsteil'
    })
)

In [23]:
df_miet = pd.read_csv('../data/cleaned_data/berlin_mietspiegel_street_directory_cleaned_2024.csv')

df_miet['street_name'] = df_miet['street_name'].str.strip().str.lower()
df_miet['district'] = df_miet['district'].str.strip().str.lower()

In [24]:
df_miet = df_miet.merge(
    street_lookup,
    left_on=['street_name', 'district'],
    right_on=['street_name', 'bezirk'],
    how='left'
)

In [25]:
df_income = pd.read_csv('../data/cleaned_data/berlin_plr_median_income.csv')

In [26]:
df_income.head()

Unnamed: 0,PLR,full_time_employees,median_income_eur
0,Stülerstraße,866,4721
1,Großer Tiergarten,619,6279
2,Lützowstraße,1250,4895
3,Körnerstraße,1126,5062
4,Wilhelmstraße,828,5384


In [27]:
df_income['street_name'] = df_income['PLR'].str.strip().str.lower()

In [28]:
plr_mapped = df_income.merge(
    street_lookup,
    on='street_name',
    how='left'
)

In [29]:
df_subdistrict = df_miet.merge(
    plr_mapped,
    on=['bezirk', 'ortsteil'],
    how='outer'
)

In [30]:
df_subdistrict.head()

Unnamed: 0,street_name_x,district,territorial_side,house_number_range,house_number_scheme_code,house_number_scheme,classification,bezirk,ortsteil,PLR,full_time_employees,median_income_eur,street_name_y
0,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,mittel,charlottenburg-wilmersdorf,charlottenburg,Uhlandstraße,2126,4355,uhlandstraße
1,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,mittel,charlottenburg-wilmersdorf,charlottenburg,Behaimstraße,1942,4267,behaimstraße
2,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,mittel,charlottenburg-wilmersdorf,charlottenburg,Klausenerplatz,2033,4073,klausenerplatz
3,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,mittel,charlottenburg-wilmersdorf,charlottenburg,Schloßstraße,2583,4723,schloßstraße
4,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,mittel,charlottenburg-wilmersdorf,charlottenburg,Amtsgerichtsplatz,2727,4786,amtsgerichtsplatz


In [31]:
df_subdistrict['bezirk'] = df_subdistrict['bezirk'].str.strip().str.lower()
df_subdistrict['ortsteil'] = df_subdistrict['ortsteil'].str.strip().str.lower()

In [32]:
df_subdistrict.dtypes

street_name_x                object
district                     object
territorial_side            float64
house_number_range           object
house_number_scheme_code     object
house_number_scheme          object
classification               object
bezirk                       object
ortsteil                     object
PLR                          object
full_time_employees          object
median_income_eur            object
street_name_y                object
dtype: object

In [33]:
classification_map = {
    'einfach': 1,
    'mittel': 2,
    'gut': 3
}

df_subdistrict['classification'] = df_subdistrict['classification'].str.strip().str.lower().map(classification_map)

In [34]:
df_subdistrict['full_time_employees'] = pd.to_numeric(df_subdistrict['full_time_employees'], errors='coerce')
df_subdistrict['median_income_eur'] = pd.to_numeric(df_subdistrict['median_income_eur'], errors='coerce')

In [35]:
df_subdistrict.head()

Unnamed: 0,street_name_x,district,territorial_side,house_number_range,house_number_scheme_code,house_number_scheme,classification,bezirk,ortsteil,PLR,full_time_employees,median_income_eur,street_name_y
0,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,2.0,charlottenburg-wilmersdorf,charlottenburg,Uhlandstraße,2126.0,4355.0,uhlandstraße
1,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,2.0,charlottenburg-wilmersdorf,charlottenburg,Behaimstraße,1942.0,4267.0,behaimstraße
2,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,2.0,charlottenburg-wilmersdorf,charlottenburg,Klausenerplatz,2033.0,4073.0,klausenerplatz
3,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,2.0,charlottenburg-wilmersdorf,charlottenburg,Schloßstraße,2583.0,4723.0,schloßstraße
4,abbestraße,charlottenburg-wilmersdorf,,whole street,K,Complete street,2.0,charlottenburg-wilmersdorf,charlottenburg,Amtsgerichtsplatz,2727.0,4786.0,amtsgerichtsplatz


In [36]:
df_ortsteil_agg = df_subdistrict.groupby(['bezirk', 'ortsteil']).agg({
    'classification': 'mean',            # Average Mietspiegel classification
    'full_time_employees': 'sum',        # Total full-time employees in the area
    'median_income_eur': 'mean',         # Average median income across PLRs
}).reset_index()

In [37]:
df_ortsteil_agg = df_ortsteil_agg.rename(columns={
    'classification': 'subdistrict_avg_mietspiegel_classification',
    'full_time_employees': 'subdistrict_total_full_time_employees',
    'median_income_eur': 'subdistrict_avg_median_income_eur'
})

In [38]:
df_ortsteil_agg.head(100)

Unnamed: 0,bezirk,ortsteil,subdistrict_avg_mietspiegel_classification,subdistrict_total_full_time_employees,subdistrict_avg_median_income_eur
0,charlottenburg-wilmersdorf,charlottenburg,2.195730,9204717.0,4566.785714
1,charlottenburg-wilmersdorf,charlottenburg-nord,1.040000,286400.0,
2,charlottenburg-wilmersdorf,grunewald,2.854369,603786.0,4554.750000
3,charlottenburg-wilmersdorf,halensee,2.560976,0.0,
4,charlottenburg-wilmersdorf,schmargendorf,2.846939,709226.0,4548.000000
...,...,...,...,...,...
91,treptow-köpenick,niederschöneweide,1.725490,0.0,
92,treptow-köpenick,oberschöneweide,1.528090,621665.0,4078.000000
93,treptow-köpenick,plänterwald,1.837209,0.0,
94,treptow-köpenick,rahnsdorf,1.287770,347222.0,4007.500000


In [39]:
def classify_affordability(x):
    if x < 1.5:
        return 'affordable'
    elif x < 2.5:
        return 'moderate'
    else:
        return 'expensive'

df_ortsteil_agg['classification_category'] = df_ortsteil_agg['subdistrict_avg_mietspiegel_classification'].apply(classify_affordability)

In [40]:
# Fill NaN values for median income with district average
df_ortsteil_agg['subdistrict_avg_median_income_eur'] = df_ortsteil_agg.groupby('bezirk')['subdistrict_avg_median_income_eur'].transform(
    lambda x: x.fillna(x.mean())
)

In [41]:
def clean_merge_keys(df):
    return (
        df.assign(
            bezirk=df['bezirk'].str.strip().str.lower().str.replace("ö", "oe").str.replace("ü", "ue").str.replace("ä", "ae").str.replace("ß", "ss"),
            ortsteil=df['ortsteil'].str.strip().str.lower().str.replace("ö", "oe").str.replace("ü", "ue").str.replace("ä", "ae").str.replace("ß", "ss")
        )
    )

df_subdistrict_population_final = clean_merge_keys(df_subdistrict_population_final)
df_ortsteil_agg = clean_merge_keys(df_ortsteil_agg)

In [48]:
df_ortsteil_agg.head()

Unnamed: 0,bezirk,ortsteil,subdistrict_avg_mietspiegel_classification,subdistrict_total_full_time_employees,subdistrict_avg_median_income_eur,classification_category
0,charlottenburg-wilmersdorf,charlottenburg,2.19573,9204717.0,4566.785714,moderate
1,charlottenburg-wilmersdorf,charlottenburg-nord,1.04,286400.0,4522.648195,affordable
2,charlottenburg-wilmersdorf,grunewald,2.854369,603786.0,4554.75,expensive
3,charlottenburg-wilmersdorf,halensee,2.560976,0.0,4522.648195,expensive
4,charlottenburg-wilmersdorf,schmargendorf,2.846939,709226.0,4548.0,expensive


In [49]:
df_ortsteil_agg.to_csv('../data/cleaned_data/berlin_ortsteil_rent_income.csv')

In [42]:
df_ortsteil_master = df_subdistrict_population_final.merge(
    df_ortsteil_agg,
    on=['bezirk', 'ortsteil'],
    how='inner'
)

In [43]:
print(df_subdistrict_population_final.duplicated(subset=['bezirk', 'ortsteil']).sum())
print(df_ortsteil_agg.duplicated(subset=['bezirk', 'ortsteil']).sum())

0
0


In [44]:
df_ortsteil_master.head()

Unnamed: 0,bezirk,ortsteil,subdistrict_population_age_0_5,subdistrict_population_age_5_10,subdistrict_population_age_10_15,subdistrict_population_age_15_20,subdistrict_population_age_20_25,subdistrict_population_age_25_30,subdistrict_population_age_30_35,subdistrict_population_age_35_40,...,total_population,subdistrict_senior_population,subdistrict_youth_population,subdistrict_senior_share,subdistrict_youth_share,subdistrict_middle_age_population,subdistrict_avg_mietspiegel_classification,subdistrict_total_full_time_employees,subdistrict_avg_median_income_eur,classification_category
0,treptow-koepenick,adlershof,1177.0,846.0,710.0,671.0,1410.0,1765.0,2274.0,1833.0,...,20210,3521.0,3404.0,0.174221,0.168431,0.657348,1.806202,428667.0,4388.0,moderate
1,lichtenberg,alt-hohenschoenhausen,2960.0,2689.0,2308.0,1975.0,2360.0,2943.0,4609.0,3963.0,...,50070,10464.0,9932.0,0.208987,0.198362,0.59265,1.523013,4613178.0,3757.875,moderate
2,treptow-koepenick,alt-treptow,840.0,727.0,560.0,450.0,457.0,895.0,1617.0,1685.0,...,13167,1320.0,2577.0,0.100251,0.195717,0.704033,2.306122,193648.0,3998.0,moderate
3,treptow-koepenick,altglienicke,1724.0,1788.0,1631.0,1391.0,1417.0,1411.0,2162.0,2134.0,...,29595,5031.0,6534.0,0.169995,0.220781,0.609225,1.5,142688.0,4138.0,moderate
4,treptow-koepenick,baumschulenweg,880.0,732.0,620.0,569.0,866.0,1374.0,2107.0,1669.0,...,18894,4405.0,2801.0,0.233143,0.148248,0.618609,1.690141,0.0,3955.611111,moderate


In [45]:
df_ortsteil_master.shape

(96, 34)

In [47]:
df_ortsteil_master.to_csv('../data/master_tables/berlin_ortsteil_master_table.csv', index=False)