In [1]:
import pandas as pd

In [21]:
file_path = './data/area/'

parks = pd.read_excel(file_path+'Flächennutzung.xls')
quality = pd.read_csv(file_path+'Wohnlage.csv', sep=';')
bld_style = pd.read_excel(file_path+'Siedlungsstruktur.xlsx')

### Area per sqm (incl. parks and living area)

In [3]:
parks.replace('-', 0, inplace=True)

In [4]:
parks['Nummer Planungsraum'] = parks['Nummer Planungsraum'].apply('{:0>8}'.format)

#extracting LOR key from Nummer Planungsraum
parks['LOR'] = parks['Nummer Planungsraum'].apply(lambda x: x[:6])

In [5]:
#grouping by LOR
parks_per_lor = parks.groupby('LOR').sum().reset_index()

In [6]:
parks_per_lor.columns

Index(['LOR', 'Größe des Planungsraums [m²]',
       'Summe der Block- Blockteilflächen [m²]', 'Straßen [m²]',
       'Wohngebiete [m²]', 'Mischgebiete [m²]', 'Kerngebiete [m²]',
       'Gewerbe- und Industriegebiete [m²]',
       'Gemeinbedarfs- und Sondernutzungen [m²]',
       'Ver- und Entsorgungseinrichtungen [m²]', 'Wochenendhausgebiete [m²]',
       'Verkehrsflächen (außer Straßenland) [m²]', 'Wald [m²]',
       'Grünland (Wiesen und Weiden) [m²]', 'Ackerland [m²]',
       'Park, Grünflächen [m²]', 'Stadtplätze/Promenaden [m²]',
       'Friedhöfe [m²]', 'Kleingärten [m²]',
       'Brachfläche, vegetationsfrei [m²]',
       'Brachfläche, wiesenartiger Vegetationsbestand [m²]',
       'Brachfläche, Mischbestand aus Wiesen, Gebüsch und Bäumen [m²]',
       'Baumschule / Gartenbau [m²]', 'Gewässer [m²]'],
      dtype='object')

In [7]:
parks_per_lor.columns = ['LOR', 'area', 'block_area', 'street_area', 'living_area', 'mixed_area', 
                         'central_area', 'industrial_area', 'special_area', 'disposal_area',
                         'datscha_area', 'traffic_area', 'forest_area', 'greens_area', 'farm_area',
                         'park_area', 'platz_area', 'cemetery_area', 'garden_area', 'fallow_no_veg',
                         'fallow_veg', 'fallow_mixed', 'nursery_area', 'water_area']

In [8]:
parks_per_lor['leisure_area'] = parks_per_lor['park_area'] + parks_per_lor['cemetery_area']

In [9]:
parks_final = parks_per_lor[['LOR', 'area', 'living_area', 'leisure_area']]

### Quality

In [37]:
quality['RAUMID'] = quality['RAUMID'].apply('{:0>8}'.format)

#extracting LOR key from Nummer Planungsraum
quality['LOR'] = quality['RAUMID'].apply(lambda x: x[:6])

In [38]:
quality_per_lor = quality.groupby('LOR').sum().reset_index()

In [40]:
quality_per_lor['total'] = quality_per_lor[['WLEINFoL','WLEINFmL', 'WLMIToL', 'WLMITmL',
                                            'WLGUToL', 'WLGUTmL', 'WLNZORD']].sum(axis=1)

In [43]:
quality_per_lor['lower_quality'] = quality_per_lor[['WLEINFoL','WLEINFmL']].sum(axis=1) / quality_per_lor['total']
quality_per_lor['average_quality'] = quality_per_lor[['WLMIToL', 'WLMITmL']].sum(axis=1) / quality_per_lor['total']
quality_per_lor['upper_quality'] = quality_per_lor[['WLGUToL', 'WLGUTmL']].sum(axis=1) / quality_per_lor['total']

In [45]:
quality_final = quality_per_lor[['LOR', 'lower_quality', 'average_quality', 'upper_quality']]

### Urbanisation style

In [48]:
bld_style['LOR'] = bld_style['Nummer'].astype('int').apply('{:0>6}'.format)

In [50]:
urbanisation = bld_style[['LOR', 'Hauptprägung']]
urbanisation.columns = ['LOR', 'urban_style']

### Combining dataframes

In [52]:
area = parks_final.merge(quality_final, on='LOR')\
        .merge(urbanisation, on='LOR')

In [53]:
area

Unnamed: 0,LOR,area,living_area,leisure_area,lower_quality,average_quality,upper_quality,urban_style
0,010111,5165169,124902,2002658,0.046380,0.756490,0.196129,Verdichtete Blockrandbebauung
1,010112,2702649,178220,27498,0.000000,0.124392,0.870742,Großsiedlungen der 60er - 80er Jahre
2,010113,6228553,1008603,246654,0.000363,0.655512,0.341050,Verdichtete Blockrandbebauung
3,010114,1755444,620375,245531,0.000000,0.397393,0.602607,Verdichtete Blockrandbebauung
4,010221,4331156,965328,60415,0.755678,0.179795,0.064052,Verdichtete Blockrandbebauung
...,...,...,...,...,...,...,...,...
133,122311,13854910,7343199,1111912,0.000000,0.256306,0.743694,Einfamilienhausgebiete
134,123012,11825409,4098800,1630898,0.000412,0.999113,0.000000,Einfamilienhausgebiete
135,123021,3146413,1747281,123289,0.789693,0.210307,0.000000,Großsiedlungen der 60er - 80er Jahre
136,123022,1618108,748067,71730,0.947919,0.052081,0.000000,Großsiedlungen der 60er - 80er Jahre


In [54]:
import joblib

joblib.dump(area, './dataframes/final_area.pkl')

['./dataframes/final_area.pkl']