# ETL
This project extract real estate properties from an API, transform and loads them into a CSV.

Extract    
Extract all properties from Immo API. Call API.

Transform   
Transform (normalize) the nested jsons. Fill NAN values.
 
Load   
Load finish csv into S3 Bucket.

In [1]:
# Importing modules
import pandas as pd

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.options.mode.chained_assignment = None

DEBUG = True

In [2]:
import sys
sys.path.insert(1, '../pipeline')
sys.path.insert(2, '../data')

from etl_staging.etl_staging import ETL
# This is how you start the pipeline
etl = ETL()
# etl.execute("APARTMENTBUY")
etl.execute("HOUSEBUY")

INFO: Execute for category: HOUSEBUY
__________________________________________________
INFO: Start extracting data
INFO: Type: HOUSEBUY, size: 300, offset: 0
INFO: Property url: https://api.thinkimmo.com/immo?active=true&type=HOUSEBUY&sortBy=publishDate,desc&from=0&size=300&grossReturnAnd=false&allowUnknown=false&ownCapital=10&ownCapitalInPercent=true&ownCapitalAdditionalCost=false&managementCostInPercent=true&renovationCostInPercent=true&renovationCost=0&interestRate=1.5&repaymentRate=2&managementCost=35&additionalPurchaseCost=2&favorite=false&excludedFields=true&geoSearches=[]&averageAggregation=buyingPrice%3BpricePerSqm%3BsquareMeter%3BconstructionYear%3BrentPrice%3BrentPricePerSqm%3BrentPricePerSqm%3BrunningTime
INFO: Get API data batch 0-300 of total: 600
<Response [200]>
SUCCESS: Request OK
INFO: Response data 89015 found
INFO: Type: HOUSEBUY, size: 300, offset: 300
INFO: Property url: https://api.thinkimmo.com/immo?active=true&type=HOUSEBUY&sortBy=publishDate,desc&from=300&size

## Transformation 
We want to clean the nested JSON (normalize them). 

In [100]:
import json
# package for flattening json in pandas df
from pandas.io.json import json_normalize 

JSON_PATH = "../data/immo.json"

with open(JSON_PATH) as json_file:
    data = json.load(json_file)

def transform(json_raw):
    # normalize (tranform nested jsons to multiple columns) to dataframe
    df_raw = json_normalize(json_raw)
    # flatten nested json arrays for 'platforms' and 'buyingPriceHistory' 
    df_flat = df_raw.assign(platforms=df_raw['platforms']).explode('platforms')
    df_flat = df_flat.assign(buyingPriceHistory=df_raw['buyingPriceHistory']).explode('buyingPriceHistory')
    # again transform nested json columns to clean dataframe
    df_normalized = json_normalize(df_flat.to_dict('records'))
    return df_normalized

transform(data).info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 163 columns):
 #    Column                                   Dtype  
---   ------                                   -----  
 0    id                                       object 
 1    title                                    object 
 2    zip                                      object 
 3    buyingPrice                              float64
 4    rooms                                    float64
 5    squareMeter                              float64
 6    comission                                float64
 7    rentPricePerSqm                          float64
 8    pricePerSqm                              float64
 9    rentPrice                                float64
 10   rentPriceCurrent                         float64
 11   rentPriceCurrentPerSqm                   float64
 12   region                                   object 
 13   foreClosure                              bool   
 14   grossRet

Here we describe the process on transformation 

In [82]:
import json
from pandas.io.json import json_normalize #package for flattening json in pandas df

In [83]:

with open(JSON_PATH) as json_file:
    data = json.load(json_file)
df = pd.DataFrame.from_dict(data)
df.head(3)

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,address,region,foreClosure,locationFactor,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,aggregations,leasehold,priceInMarket,oAddress,originalAddress,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages
0,457f0ced1570618f7355f104101899ef,Jetzt schnell sein und mitgestalten - Penthouse,60599,789000.0,3.0,129.0,5.81,"[{'name': 'ebk', 'id': 'ebk04dc6b3c7e1b6d06dcd...",16.046512,6116.27907,2070.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Hessen,False,"{'population': 763380, 'populationTrend': {'fr...",3.15,,2021.0,TERRACED_FLAT,FIRST_TIME_USE,,False,,,False,False,True,False,2021-02-19T17:01:25.142Z,False,"{'district': {'name': 'Süd', 'buyingPrice': 63...",False,85.297297,"{'ebk': {'street': None, 'postcode': '60599', ...","{'street': '', 'postcode': '60599', 'locationQ...",,[],"[{'buyingPrice': 789000, 'platformName': 'ebk'...",False,False,0,2021-02-19T17:01:25.431Z,0,,-1043.25,-15.86692,-1043.25,False
1,febc9045602be17b45272286419f9533,Fußläufig in die Innenstadt! Attraktive 71m² g...,59368,195000.0,2.0,71.08,2.99,"[{'name': 'is24', 'id': '126734904', 'url': 'h...",6.021384,2743.387732,428.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Nordrhein-Westfalen,False,"{'population': 29717, 'populationTrend': {'fro...",2.63,,1927.0,GROUND_FLOOR,WELL_KEPT,2012.0,False,,2.0,True,False,True,False,2021-02-19T17:01:05.000Z,False,"{'location': {'name': 'Werne', 'buyingPrice': ...",False,189.320388,"{'is24': {'street': None, 'postcode': '59368',...","{'postcode': '59368', 'location': 'Werne'}",195.0,"[{'id': '28262858476863481ee67dbc5ffb4df8', 'o...","[{'buyingPrice': 195000, 'platformName': 'is24...",False,False,0,2021-02-19T17:04:26.608Z,0,,-299.3,-18.418462,-299.3,True
2,f309110164a043d2db71b7027a40f673,Sonnige 2-Zimmer-Dachgeschosswohnung mit Westb...,85622,283000.0,2.0,42.25,2.38,"[{'name': 'is24', 'id': '126734881', 'url': 'h...",17.065089,6698.224852,721.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Bayern,False,"{'population': 0, 'populationTrend': {'from': ...",3.06,,1999.0,ROOF_STOREY,FULLY_RENOVATED,,True,3.0,3.0,True,False,True,False,2021-02-19T16:59:36.000Z,False,"{'location': {'name': 'Feldkirchen', 'buyingPr...",False,96.587031,"{'is24': {'street': None, 'postcode': '85622',...","{'postcode': '85622', 'location': 'Feldkirchen'}",157.0,"[{'id': 'bf954a438a27ced66e6df75067eb1972', 'o...","[{'buyingPrice': 283000, 'platformName': 'is24...",False,False,0,2021-02-19T17:04:30.274Z,0,,-339.27,-14.385866,-339.27,True


### Inspection of the data

3 Sub rows   
Goal: We have the data (no loss of data)   
Case 1 Duplicate expose ID (because of platforms)   --> Our current go to strategy   
Case 2 Single expose ID (nested platforms)          --> harder to parse and display  
Case 3 Duplicate Expose ID (duplicate platforms)    -->   
    
platforms, buyingPriceHistory have nested array  

In [84]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 50 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   id                      600 non-null    object 
 1   title                   600 non-null    object 
 2   zip                     599 non-null    object 
 3   buyingPrice             571 non-null    float64
 4   rooms                   574 non-null    float64
 5   squareMeter             591 non-null    float64
 6   comission               303 non-null    float64
 7   platforms               600 non-null    object 
 8   rentPricePerSqm         600 non-null    float64
 9   pricePerSqm             568 non-null    float64
 10  rentPrice               591 non-null    float64
 11  rentPriceCurrent        71 non-null     float64
 12  rentPriceCurrentPerSqm  71 non-null     float64
 13  address                 600 non-null    object 
 14  region                  600 non-null    ob

### Normalize nested arrray structures of jsons and nested jsons


Things that are always given are id, title, zip.

We have to be careful with the purchase price. There are missing values that have to be filled or kicked out. 

The json array has nested jsons/arrays. In the next step we try to flaten the json columns and aggregate them to one. 

The columns are:
- platforms
- buyingPriceHistory
- locationFactor
- aggregations
- oAddress
- originalAddress

In [85]:
df_raw = json_normalize(data)
df_raw.head(2)

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,region,foreClosure,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,leasehold,priceInMarket,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages,address.ISO_3166-1_alpha-2,address.ISO_3166-1_alpha-3,address._category,address._type,address.city,address.city_district,address.continent,address.country,address.country_code,address.political_union,address.postcode,address.state,address.state_code,address.suburb,address.lat,address.lon,address.displayName,locationFactor.population,locationFactor.populationTrend.from,locationFactor.populationTrend.to,locationFactor.hasUniversity,locationFactor.unemploymentRate,locationFactor.numberOfStudents,locationFactor.score,locationFactor.unemploymentRateScore,locationFactor.universityScore,locationFactor.populationScore,locationFactor.populationTrendScore,aggregations.district.name,aggregations.district.buyingPrice,aggregations.district.pricePerSqm,aggregations.district.grossReturn,aggregations.location.name,aggregations.location.buyingPrice,aggregations.location.pricePerSqm,aggregations.location.grossReturn,aggregations.similarListing.name,aggregations.similarListing.buyingPrice,aggregations.similarListing.pricePerSqm,aggregations.similarListing.grossReturn,oAddress.ebk.street,oAddress.ebk.postcode,oAddress.ebk.locationQuery,originalAddress.street,originalAddress.postcode,originalAddress.locationQuery,address.county,address.town,oAddress.is24.street,oAddress.is24.postcode,oAddress.is24.location,originalAddress.location,address.village,oAddress.is24.district,originalAddress.district,address.house_number,address.landuse,address.local_administrative_area,address.borough,address.road,oAddress.ebk.location,address.municipality,address.residential,address.railway,address.neighbourhood,address.office,address.highway,address.road_type,oAddress.ivd24.street,oAddress.ivd24.postcode,oAddress.ivd24.location,oAddress.ivd24.district,oAddress.wohnungjetzt.street,oAddress.wohnungjetzt.postcode,oAddress.wohnungjetzt.location,address.building,address.community_centre,oAddress.immobilien.street,oAddress.immobilien.postcode,oAddress.immobilien.location,address.shop,address.retail,address.fuel,address.craft,address.hamlet,address.quarter,address.boundary,oAddress.regionalimmobilien.street,oAddress.regionalimmobilien.postcode,oAddress.regionalimmobilien.location,oAddress.wohnungsboerse.street,oAddress.wohnungsboerse.postcode,oAddress.wohnungsboerse.location,oAddress.wohnungsboerse.district,oAddress.immobilio.street,oAddress.immobilio.postcode,oAddress.immobilio.location,address.place_of_worship,address.information,address.hospital,address.industrial,address.attraction,oAddress.ohnemakler.street,oAddress.ohnemakler.postcode,oAddress.ohnemakler.location,address.city_block,address.hotel,address.waterway,address.leisure,address.cafe
0,457f0ced1570618f7355f104101899ef,Jetzt schnell sein und mitgestalten - Penthouse,60599,789000.0,3.0,129.0,5.81,"[{'name': 'ebk', 'id': 'ebk04dc6b3c7e1b6d06dcd...",16.046512,6116.27907,2070.0,,,Hessen,False,3.15,,2021.0,TERRACED_FLAT,FIRST_TIME_USE,,False,,,False,False,True,False,2021-02-19T17:01:25.142Z,False,False,85.297297,,[],"[{'buyingPrice': 789000, 'platformName': 'ebk'...",False,False,0,2021-02-19T17:01:25.431Z,0,,-1043.25,-15.86692,-1043.25,False,DE,DEU,postcode,postcode,Frankfurt am Main,Süd,Europe,Deutschland,de,European Union,60599,Hessen,HE,Sachsenhausen Süd,50.095928,8.710911,"60599 Frankfurt am Main, Deutschland",763380,-5,5,True,7.3,66400.0,77,10.479452,25,25.0,16.664,Süd,638550.0,4950.0,3.175758,Frankfurt am Main,748200.0,5800,2.97931,,925000.0,7170.542636,2.685405,,60599.0,"60599 Süd, Oberrad",,60599,"60599 Süd, Oberrad",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,febc9045602be17b45272286419f9533,Fußläufig in die Innenstadt! Attraktive 71m² g...,59368,195000.0,2.0,71.08,2.99,"[{'name': 'is24', 'id': '126734904', 'url': 'h...",6.021384,2743.387732,428.0,,,Nordrhein-Westfalen,False,2.63,,1927.0,GROUND_FLOOR,WELL_KEPT,2012.0,False,,2.0,True,False,True,False,2021-02-19T17:01:05.000Z,False,False,189.320388,195.0,"[{'id': '28262858476863481ee67dbc5ffb4df8', 'o...","[{'buyingPrice': 195000, 'platformName': 'is24...",False,False,0,2021-02-19T17:04:26.608Z,0,,-299.3,-18.418462,-299.3,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,59368,Nordrhein-Westfalen,NW,,51.667475,7.64057,"59368 Werne, Deutschland",29717,-5,5,False,8.0,0.0,43,9.5625,0,16.66,16.664,,,,,Werne,120836.0,1700,4.588235,,103000.0,1449.071469,4.986408,,,,,59368,,Kreis Unna,Werne,,59368.0,Werne,Werne,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [87]:
df_flat = df_raw.assign(platforms=df_raw['platforms']).explode('platforms')
df_flat = df_flat.assign(buyingPriceHistory=df_raw['buyingPriceHistory']).explode('buyingPriceHistory')
df_flat.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 674 entries, 0 to 599
Data columns (total 155 columns):
 #    Column                                   Dtype  
---   ------                                   -----  
 0    id                                       object 
 1    title                                    object 
 2    zip                                      object 
 3    buyingPrice                              float64
 4    rooms                                    float64
 5    squareMeter                              float64
 6    comission                                float64
 7    platforms                                object 
 8    rentPricePerSqm                          float64
 9    pricePerSqm                              float64
 10   rentPrice                                float64
 11   rentPriceCurrent                         float64
 12   rentPriceCurrentPerSqm                   float64
 13   region                                   object 
 14   foreClos

In [89]:
df_normalized = json_normalize(df_flat.to_dict('records'))
df_normalized.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 163 columns):
 #    Column                                   Dtype  
---   ------                                   -----  
 0    id                                       object 
 1    title                                    object 
 2    zip                                      object 
 3    buyingPrice                              float64
 4    rooms                                    float64
 5    squareMeter                              float64
 6    comission                                float64
 7    rentPricePerSqm                          float64
 8    pricePerSqm                              float64
 9    rentPrice                                float64
 10   rentPriceCurrent                         float64
 11   rentPriceCurrentPerSqm                   float64
 12   region                                   object 
 13   foreClosure                              bool   
 14   grossRet

In [90]:
df_normalized[:10]

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,region,foreClosure,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,leasehold,priceInMarket,houseMoney,images,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages,address.ISO_3166-1_alpha-2,address.ISO_3166-1_alpha-3,address._category,address._type,address.city,address.city_district,address.continent,address.country,address.country_code,address.political_union,address.postcode,address.state,address.state_code,address.suburb,address.lat,address.lon,address.displayName,locationFactor.population,locationFactor.populationTrend.from,locationFactor.populationTrend.to,locationFactor.hasUniversity,locationFactor.unemploymentRate,locationFactor.numberOfStudents,locationFactor.score,locationFactor.unemploymentRateScore,locationFactor.universityScore,locationFactor.populationScore,locationFactor.populationTrendScore,aggregations.district.name,aggregations.district.buyingPrice,aggregations.district.pricePerSqm,aggregations.district.grossReturn,aggregations.location.name,aggregations.location.buyingPrice,aggregations.location.pricePerSqm,aggregations.location.grossReturn,aggregations.similarListing.name,aggregations.similarListing.buyingPrice,aggregations.similarListing.pricePerSqm,aggregations.similarListing.grossReturn,oAddress.ebk.street,oAddress.ebk.postcode,oAddress.ebk.locationQuery,originalAddress.street,originalAddress.postcode,originalAddress.locationQuery,address.county,address.town,oAddress.is24.street,oAddress.is24.postcode,oAddress.is24.location,originalAddress.location,address.village,oAddress.is24.district,originalAddress.district,address.house_number,address.landuse,address.local_administrative_area,address.borough,address.road,oAddress.ebk.location,address.municipality,address.residential,address.railway,address.neighbourhood,address.office,address.highway,address.road_type,oAddress.ivd24.street,oAddress.ivd24.postcode,oAddress.ivd24.location,oAddress.ivd24.district,oAddress.wohnungjetzt.street,oAddress.wohnungjetzt.postcode,oAddress.wohnungjetzt.location,address.building,address.community_centre,oAddress.immobilien.street,oAddress.immobilien.postcode,oAddress.immobilien.location,address.shop,address.retail,address.fuel,address.craft,address.hamlet,address.quarter,address.boundary,oAddress.regionalimmobilien.street,oAddress.regionalimmobilien.postcode,oAddress.regionalimmobilien.location,oAddress.wohnungsboerse.street,oAddress.wohnungsboerse.postcode,oAddress.wohnungsboerse.location,oAddress.wohnungsboerse.district,oAddress.immobilio.street,oAddress.immobilio.postcode,oAddress.immobilio.location,address.place_of_worship,address.information,address.hospital,address.industrial,address.attraction,oAddress.ohnemakler.street,oAddress.ohnemakler.postcode,oAddress.ohnemakler.location,address.city_block,address.hotel,address.waterway,address.leisure,address.cafe,platforms.name,platforms.id,platforms.url,platforms.creationDate,platforms.publishDate,platforms.active,buyingPriceHistory.buyingPrice,buyingPriceHistory.platformName,buyingPriceHistory.creationDate,buyingPriceHistory
0,457f0ced1570618f7355f104101899ef,Jetzt schnell sein und mitgestalten - Penthouse,60599,789000.0,3.0,129.0,5.81,16.046512,6116.27907,2070.0,,,Hessen,False,3.15,,2021.0,TERRACED_FLAT,FIRST_TIME_USE,,False,,,False,False,True,False,2021-02-19T17:01:25.142Z,False,False,85.297297,,[],False,False,0,2021-02-19T17:01:25.431Z,0,,-1043.25,-15.86692,-1043.25,False,DE,DEU,postcode,postcode,Frankfurt am Main,Süd,Europe,Deutschland,de,European Union,60599,Hessen,HE,Sachsenhausen Süd,50.095928,8.710911,"60599 Frankfurt am Main, Deutschland",763380,-5,5,True,7.3,66400.0,77,10.479452,25,25.0,16.664,Süd,638550.0,4950.0,3.175758,Frankfurt am Main,748200.0,5800,2.97931,,925000.0,7170.542636,2.685405,,60599.0,"60599 Süd, Oberrad",,60599,"60599 Süd, Oberrad",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ebk,ebk04dc6b3c7e1b6d06dcd9ec5eaeb26410,https://www.ebay-kleinanzeigen.de/s-anzeige/je...,2021-02-19T17:01:25.142Z,2021-02-19T17:01:25.142Z,True,789000.0,ebk,2021-02-19T17:01:25.142Z,
1,febc9045602be17b45272286419f9533,Fußläufig in die Innenstadt! Attraktive 71m² g...,59368,195000.0,2.0,71.08,2.99,6.021384,2743.387732,428.0,,,Nordrhein-Westfalen,False,2.63,,1927.0,GROUND_FLOOR,WELL_KEPT,2012.0,False,,2.0,True,False,True,False,2021-02-19T17:01:05.000Z,False,False,189.320388,195.0,"[{'id': '28262858476863481ee67dbc5ffb4df8', 'o...",False,False,0,2021-02-19T17:04:26.608Z,0,,-299.3,-18.418462,-299.3,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,59368,Nordrhein-Westfalen,NW,,51.667475,7.64057,"59368 Werne, Deutschland",29717,-5,5,False,8.0,0.0,43,9.5625,0,16.66,16.664,,,,,Werne,120836.0,1700,4.588235,,103000.0,1449.071469,4.986408,,,,,59368,,Kreis Unna,Werne,,59368.0,Werne,Werne,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734904,https://www.immobilienscout24.de/expose/126734904,2021-02-19T17:01:05.000Z,2021-02-19T17:01:05.000Z,True,195000.0,is24,2021-02-19T17:01:05.000Z,
2,f309110164a043d2db71b7027a40f673,Sonnige 2-Zimmer-Dachgeschosswohnung mit Westb...,85622,283000.0,2.0,42.25,2.38,17.065089,6698.224852,721.0,,,Bayern,False,3.06,,1999.0,ROOF_STOREY,FULLY_RENOVATED,,True,3.0,3.0,True,False,True,False,2021-02-19T16:59:36.000Z,False,False,96.587031,157.0,"[{'id': 'bf954a438a27ced66e6df75067eb1972', 'o...",False,False,0,2021-02-19T17:04:30.274Z,0,,-339.27,-14.385866,-339.27,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,85622,Bayern,BY,,48.145558,11.736855,"85622 Feldkirchen, Deutschland",0,-5,5,False,5.3,0.0,31,14.433962,0,0.0,16.664,,,,,Feldkirchen,259837.5,6150,2.907317,,293000.0,6934.911243,2.952901,,,,,85622,,Landkreis München,,,85622.0,Feldkirchen,Feldkirchen,Feldkirchen,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734881,https://www.immobilienscout24.de/expose/126734881,2021-02-19T16:59:36.000Z,2021-02-19T16:59:36.000Z,True,283000.0,is24,2021-02-19T16:59:36.000Z,
3,e8d2b47c7586d7fba69769ea9fb77178,Aussicht genießen - Sofort einziehen,70378,310000.0,3.0,76.71,3.48,12.33216,4041.194108,946.0,,,Baden-Württemberg,False,3.66,,1961.0,APARTMENT,NO_INFORMATION,2013.0,True,3.0,,True,False,True,False,2021-02-19T16:57:13.000Z,False,False,105.084746,314.0,"[{'id': 'f74454b73bae86ae3d613286329c877f', 'o...",False,False,0,2021-02-19T16:58:51.729Z,0,,-292.18,-11.310323,-292.18,True,DE,DEU,place,neighbourhood,Stuttgart,Mühlhausen,Europe,Deutschland,de,European Union,70378,Baden-Württemberg,BW,Mönchfeld,48.842323,9.219188,"Mönchfeld, Stuttgart, Baden-Württemberg, Deuts...",635911,-5,5,True,5.6,89282.0,80,13.660714,25,25.0,16.664,Mühlhausen,306840.0,4000.0,3.87,Stuttgart,356701.5,4650,3.741935,,295000.0,3845.652457,3.848136,,,,,70378,,,,,70378.0,Stuttgart,Stuttgart,,Mönchfeld,Mönchfeld,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734861,https://www.immobilienscout24.de/expose/126734861,2021-02-19T16:57:13.000Z,2021-02-19T16:57:13.000Z,True,310000.0,is24,2021-02-19T16:57:13.000Z,
4,16311b825ea9722c7c7c87c9a1ec5ffe,Anlegerobjekt im beliebten Connewitz,4277,154000.0,2.0,57.49,3.57,7.357801,2678.726735,423.0,,,Sachsen,False,3.3,,1995.0,NO_INFORMATION,NO_INFORMATION,,False,1.0,,True,False,True,False,2021-02-19T16:54:46.000Z,False,False,117.557252,,"[{'id': '5fceb95f91f2f24d42dd75b4181b5a16', 'o...",False,False,0,2021-02-19T16:58:55.427Z,0,,-171.3,-13.348052,-171.3,True,DE,DEU,unknown,landuse,Leipzig,Süd,Europe,Deutschland,de,European Union,4277,Sachsen,SN,Connewitz,51.307054,12.386359,"Friedhof Connewitz, 80, 04277 Leipzig, Deutsch...",593145,-5,5,True,8.2,36985.0,76,9.329268,25,25.0,16.664,Süd,129352.5,2250.0,3.946667,Leipzig,123603.5,2150,4.018605,,131000.0,2278.657158,3.874809,,,,,4277,,,,,4277.0,Leipzig-Connewitz,Leipzig-Connewitz,,Connewitz,Connewitz,80.0,Friedhof Connewitz,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734833,https://www.immobilienscout24.de/expose/126734833,2021-02-19T16:54:46.000Z,2021-02-19T16:54:46.000Z,True,154000.0,is24,2021-02-19T16:54:46.000Z,
5,f60ae61d1bcf721c847fdac7c5546e10,"Top sanierte 2-Zimmer-Wohnung mit Garten, Balk...",60318,259000.0,2.0,31.0,1.95,20.16129,8354.83871,625.0,,,Hessen,False,2.9,,1964.0,NO_INFORMATION,WELL_KEPT,,False,1.0,4.0,True,True,True,False,2021-02-19T16:51:26.000Z,False,False,111.158798,150.0,"[{'id': '15429ff3c7ee251b7ff9803a194d6d01', 'o...",False,False,0,2021-02-19T16:55:09.799Z,0,,-349.17,-16.177606,-349.17,True,DE,DEU,place,neighbourhood,Frankfurt am Main,Innenstadt 3,Europe,Deutschland,de,European Union,60318,Hessen,HE,Nordend West,50.124914,8.67795,"Nordend West, Frankfurt am Main, Hessen, Deuts...",763380,-5,5,True,7.3,66400.0,77,10.479452,25,25.0,16.664,Innenstadt 3,153450.0,4950.0,3.175758,Frankfurt am Main,179800.0,5800,2.97931,,233000.0,7516.129032,3.218884,,,,,60318,,,,,60318.0,Frankfurt am Main,Frankfurt am Main,,Nordend-West,Nordend-West,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734531,https://www.immobilienscout24.de/expose/126734531,2021-02-19T16:51:26.000Z,2021-02-19T16:51:26.000Z,True,259000.0,is24,2021-02-19T16:51:26.000Z,
6,5face758d41ce7b2492826bad6a262af,Galeriewohnung im Garchinger Zentrum,85748,630000.0,3.0,90.0,3.57,15.766667,7000.0,1419.0,,,Bayern,False,2.7,,1989.0,MAISONETTE,WELL_KEPT,,False,,,True,False,True,False,2021-02-19T16:50:43.000Z,False,False,105.882353,387.0,"[{'id': 'f92cd79f089513a2752d15e99566a097', 'o...",False,False,0,2021-02-19T16:55:14.765Z,0,,-897.65,-17.098095,-897.65,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,85748,Bayern,BY,,48.249003,11.64595,"85748 Garching bei München, Deutschland",0,-5,5,True,5.3,116473.0,56,14.433962,25,0.0,16.664,,,,,Garching bei München,589500.0,6550,2.98626,,595000.0,6611.111111,2.861849,,,,,85748,,Landkreis München,Garching bei München,,85748.0,Garching,Garching,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126706011,https://www.immobilienscout24.de/expose/126706011,2021-02-19T16:50:43.000Z,2021-02-19T16:50:43.000Z,True,630000.0,is24,2021-02-19T16:50:43.000Z,
7,e14805d711358b44f69db8163a78c2bd,Frankfurt - Bergen-Enkheim: Exklusive 3-Zimmer...,60388,615000.0,3.0,98.67,2.9,13.894801,6232.897537,1371.0,,,Hessen,False,2.68,,2021.0,APARTMENT,FIRST_TIME_USE,,False,2.0,3.0,True,True,True,False,2021-02-19T16:49:28.000Z,False,False,106.770833,,"[{'id': '1ef971d6e3355edcf90dddade93a69f2', 'o...",False,False,0,2021-02-19T16:50:55.972Z,0,,-920.1,-17.953171,-920.1,True,DE,DEU,place,neighbourhood,Frankfurt am Main,Bergen-Enkheim,Europe,Deutschland,de,European Union,60388,Hessen,HE,Bergen-Enkheim,50.153524,8.755571,"60388 Frankfurt am Main, Deutschland",763380,-5,5,True,7.3,66400.0,77,10.479452,25,25.0,16.664,Bergen-Enkheim,572286.0,5800.0,2.97931,Frankfurt am Main,572286.0,5800,2.97931,,576000.0,5837.64062,2.85625,,,,,60388,,,,,60388.0,Frankfurt am Main / Bergen-Enkheim,Frankfurt am Main / Bergen-Enkheim,,Bergen-Enkheim,Bergen-Enkheim,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734677,https://www.immobilienscout24.de/expose/126734677,2021-02-19T16:49:28.000Z,2021-02-19T16:49:28.000Z,True,615000.0,is24,2021-02-19T16:49:28.000Z,
8,9d68c78fe8d69b8640f313dda3e78396,Frankfurt - Bergen-Enkheim: Exklusive 2-Zimmer...,60388,339000.0,2.0,54.34,2.9,14.225248,6238.498344,773.0,,,Hessen,False,2.74,,2021.0,HALF_BASEMENT,FIRST_TIME_USE,,False,,3.0,True,True,True,False,2021-02-19T16:48:31.000Z,False,False,109.708738,,"[{'id': '9c92adf5f462ccd68c5b3c5f0bf95c93', 'o...",False,False,0,2021-02-19T16:51:02.406Z,0,,-495.05,-17.523894,-495.05,True,DE,DEU,place,neighbourhood,Frankfurt am Main,Bergen-Enkheim,Europe,Deutschland,de,European Union,60388,Hessen,HE,Bergen-Enkheim,50.153524,8.755571,"60388 Frankfurt am Main, Deutschland",763380,-5,5,True,7.3,66400.0,77,10.479452,25,25.0,16.664,Bergen-Enkheim,315172.0,5800.0,2.97931,Frankfurt am Main,315172.0,5800,2.97931,,309000.0,5686.418844,3.001942,,,,,60388,,,,,60388.0,Frankfurt am Main / Bergen-Enkheim,Frankfurt am Main / Bergen-Enkheim,,Bergen-Enkheim,Bergen-Enkheim,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734637,https://www.immobilienscout24.de/expose/126734637,2021-02-19T16:48:31.000Z,2021-02-19T16:48:31.000Z,True,339000.0,is24,2021-02-19T16:48:31.000Z,
9,4e8957bc0044bd00dd92c466807112c4,"Wildeshausen in guter Lage, Neubau-ETW, KfW55 ...",27793,269000.0,3.0,86.04,2.98,9.437471,3126.452813,812.0,,,Niedersachsen,False,3.62,,2022.0,NO_INFORMATION,NO_INFORMATION,,False,,,True,False,True,False,2021-02-19T16:48:21.000Z,False,False,92.758621,,"[{'id': '088d8a266826f9710d49c264eac8f126', 'o...",False,False,0,2021-02-19T16:51:09.532Z,0,,-256.78,-11.455019,-256.78,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,27793,Niedersachsen,NI,,52.893266,8.4364,"27793 Wildeshausen, Deutschland",20129,-5,5,False,4.1,0.0,44,18.658537,0,8.333,16.664,,,,,Wildeshausen,163476.0,1900,4.357895,,290000.0,3370.525337,3.36,,,,,27793,,Landkreis Oldenburg,Wildeshausen,,27793.0,Wildeshausen,Wildeshausen,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,125221199,https://www.immobilienscout24.de/expose/125221199,2021-02-19T16:48:21.000Z,2021-02-19T16:48:21.000Z,True,269000.0,is24,2021-02-19T16:48:21.000Z,


In [49]:
df_test = df_raw.assign(platforms=df_raw['platforms']).explode('platforms')
df_test = df_test.assign(buyingPriceHistory=df_raw['buyingPriceHistory']).explode('buyingPriceHistory')
df_test[df['id'] == '3531fba10ec2058d1155a63a9e147c8d']


Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,region,foreClosure,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,leasehold,priceInMarket,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages,address.ISO_3166-1_alpha-2,address.ISO_3166-1_alpha-3,address._category,address._type,address.city,address.city_district,address.continent,address.country,address.country_code,address.political_union,address.postcode,address.state,address.state_code,address.suburb,address.lat,address.lon,address.displayName,locationFactor.population,locationFactor.populationTrend.from,locationFactor.populationTrend.to,locationFactor.hasUniversity,locationFactor.unemploymentRate,locationFactor.numberOfStudents,locationFactor.score,locationFactor.unemploymentRateScore,locationFactor.universityScore,locationFactor.populationScore,locationFactor.populationTrendScore,aggregations.district.name,aggregations.district.buyingPrice,aggregations.district.pricePerSqm,aggregations.district.grossReturn,aggregations.location.name,aggregations.location.buyingPrice,aggregations.location.pricePerSqm,aggregations.location.grossReturn,aggregations.similarListing.name,aggregations.similarListing.buyingPrice,aggregations.similarListing.pricePerSqm,aggregations.similarListing.grossReturn,oAddress.ebk.street,oAddress.ebk.postcode,oAddress.ebk.locationQuery,originalAddress.street,originalAddress.postcode,originalAddress.locationQuery,address.county,address.town,oAddress.is24.street,oAddress.is24.postcode,oAddress.is24.location,originalAddress.location,address.village,oAddress.is24.district,originalAddress.district,address.house_number,address.landuse,address.local_administrative_area,address.borough,address.road,oAddress.ebk.location,address.municipality,address.residential,address.railway,address.neighbourhood,address.office,address.highway,address.road_type,oAddress.ivd24.street,oAddress.ivd24.postcode,oAddress.ivd24.location,oAddress.ivd24.district,oAddress.wohnungjetzt.street,oAddress.wohnungjetzt.postcode,oAddress.wohnungjetzt.location,address.building,address.community_centre,oAddress.immobilien.street,oAddress.immobilien.postcode,oAddress.immobilien.location,address.shop,address.retail,address.fuel,address.craft,address.hamlet,address.quarter,address.boundary,oAddress.regionalimmobilien.street,oAddress.regionalimmobilien.postcode,oAddress.regionalimmobilien.location,oAddress.wohnungsboerse.street,oAddress.wohnungsboerse.postcode,oAddress.wohnungsboerse.location,oAddress.wohnungsboerse.district,oAddress.immobilio.street,oAddress.immobilio.postcode,oAddress.immobilio.location,address.place_of_worship,address.information,address.hospital,address.industrial,address.attraction,oAddress.ohnemakler.street,oAddress.ohnemakler.postcode,oAddress.ohnemakler.location,address.city_block,address.hotel,address.waterway,address.leisure,address.cafe
33,3531fba10ec2058d1155a63a9e147c8d,"Gartentraum in Bietigheim - große 3,5-Zimmerwo...",74321,380000.0,3.0,83.21,2.98,"{'name': 'ivd24', 'publishDate': '2021-02-19T1...",11.008292,4566.758803,916.0,,,Baden-Württemberg,False,2.89,,1970.0,GROUND_FLOOR,WELL_KEPT,,False,,,True,False,True,False,2021-02-19T16:35:08.000Z,False,False,122.977346,364.84,"[{'id': '570a38195aa28682596923141a30928c', 'o...","{'buyingPrice': 380000, 'platformName': 'is24'...",False,False,0,2021-02-19T16:37:25.804Z,0,,-512.93,-16.197895,-512.93,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,74321,Baden-Württemberg,BW,,48.951549,9.122468,"74321 Bietigheim-Bissingen, Deutschland",43227,-5,5,False,3.8,0.0,53,20.131579,0,16.66,16.664,,,,,Bietigheim-Bissingen,316198.0,3800,3.568421,,309000.0,3713.495974,3.557282,,,,,74321,,Landkreis Ludwigsburg,Bietigheim-Bissingen,,74321,Bietigheim-Bissingen,Bietigheim-Bissingen,,,,,,,,,,Vereinbarte Verwaltungsgemeinschaft der Stadt ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
33,3531fba10ec2058d1155a63a9e147c8d,"Gartentraum in Bietigheim - große 3,5-Zimmerwo...",74321,380000.0,3.0,83.21,2.98,"{'name': 'is24', 'publishDate': '2021-02-19T16...",11.008292,4566.758803,916.0,,,Baden-Württemberg,False,2.89,,1970.0,GROUND_FLOOR,WELL_KEPT,,False,,,True,False,True,False,2021-02-19T16:35:08.000Z,False,False,122.977346,364.84,"[{'id': '570a38195aa28682596923141a30928c', 'o...","{'buyingPrice': 380000, 'platformName': 'is24'...",False,False,0,2021-02-19T16:37:25.804Z,0,,-512.93,-16.197895,-512.93,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,74321,Baden-Württemberg,BW,,48.951549,9.122468,"74321 Bietigheim-Bissingen, Deutschland",43227,-5,5,False,3.8,0.0,53,20.131579,0,16.66,16.664,,,,,Bietigheim-Bissingen,316198.0,3800,3.568421,,309000.0,3713.495974,3.557282,,,,,74321,,Landkreis Ludwigsburg,Bietigheim-Bissingen,,74321,Bietigheim-Bissingen,Bietigheim-Bissingen,,,,,,,,,,Vereinbarte Verwaltungsgemeinschaft der Stadt ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [65]:
df_test[['id','platforms','buyingPriceHistory']][:3]

Unnamed: 0,id,platforms,buyingPriceHistory
33,3531fba10ec2058d1155a63a9e147c8d,"{'name': 'ivd24', 'publishDate': '2021-02-19T1...","{'buyingPrice': 380000, 'platformName': 'is24'..."
33,3531fba10ec2058d1155a63a9e147c8d,"{'name': 'is24', 'publishDate': '2021-02-19T16...","{'buyingPrice': 380000, 'platformName': 'is24'..."


In [75]:
df_normalized = json_normalize(df_test.to_dict('records'))
df_normalized.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 163 columns):
 #    Column                                   Dtype  
---   ------                                   -----  
 0    id                                       object 
 1    title                                    object 
 2    zip                                      object 
 3    buyingPrice                              float64
 4    rooms                                    float64
 5    squareMeter                              float64
 6    comission                                float64
 7    rentPricePerSqm                          float64
 8    pricePerSqm                              float64
 9    rentPrice                                float64
 10   rentPriceCurrent                         float64
 11   rentPriceCurrentPerSqm                   float64
 12   region                                   object 
 13   foreClosure                              bool   
 14   grossRet

In [None]:
df_normalized[:10]

In [91]:
# entry with multiple platforms and same price history
df_normalized[df_normalized['id']=='3531fba10ec2058d1155a63a9e147c8d' ]

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,region,foreClosure,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,leasehold,priceInMarket,houseMoney,images,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages,address.ISO_3166-1_alpha-2,address.ISO_3166-1_alpha-3,address._category,address._type,address.city,address.city_district,address.continent,address.country,address.country_code,address.political_union,address.postcode,address.state,address.state_code,address.suburb,address.lat,address.lon,address.displayName,locationFactor.population,locationFactor.populationTrend.from,locationFactor.populationTrend.to,locationFactor.hasUniversity,locationFactor.unemploymentRate,locationFactor.numberOfStudents,locationFactor.score,locationFactor.unemploymentRateScore,locationFactor.universityScore,locationFactor.populationScore,locationFactor.populationTrendScore,aggregations.district.name,aggregations.district.buyingPrice,aggregations.district.pricePerSqm,aggregations.district.grossReturn,aggregations.location.name,aggregations.location.buyingPrice,aggregations.location.pricePerSqm,aggregations.location.grossReturn,aggregations.similarListing.name,aggregations.similarListing.buyingPrice,aggregations.similarListing.pricePerSqm,aggregations.similarListing.grossReturn,oAddress.ebk.street,oAddress.ebk.postcode,oAddress.ebk.locationQuery,originalAddress.street,originalAddress.postcode,originalAddress.locationQuery,address.county,address.town,oAddress.is24.street,oAddress.is24.postcode,oAddress.is24.location,originalAddress.location,address.village,oAddress.is24.district,originalAddress.district,address.house_number,address.landuse,address.local_administrative_area,address.borough,address.road,oAddress.ebk.location,address.municipality,address.residential,address.railway,address.neighbourhood,address.office,address.highway,address.road_type,oAddress.ivd24.street,oAddress.ivd24.postcode,oAddress.ivd24.location,oAddress.ivd24.district,oAddress.wohnungjetzt.street,oAddress.wohnungjetzt.postcode,oAddress.wohnungjetzt.location,address.building,address.community_centre,oAddress.immobilien.street,oAddress.immobilien.postcode,oAddress.immobilien.location,address.shop,address.retail,address.fuel,address.craft,address.hamlet,address.quarter,address.boundary,oAddress.regionalimmobilien.street,oAddress.regionalimmobilien.postcode,oAddress.regionalimmobilien.location,oAddress.wohnungsboerse.street,oAddress.wohnungsboerse.postcode,oAddress.wohnungsboerse.location,oAddress.wohnungsboerse.district,oAddress.immobilio.street,oAddress.immobilio.postcode,oAddress.immobilio.location,address.place_of_worship,address.information,address.hospital,address.industrial,address.attraction,oAddress.ohnemakler.street,oAddress.ohnemakler.postcode,oAddress.ohnemakler.location,address.city_block,address.hotel,address.waterway,address.leisure,address.cafe,platforms.name,platforms.id,platforms.url,platforms.creationDate,platforms.publishDate,platforms.active,buyingPriceHistory.buyingPrice,buyingPriceHistory.platformName,buyingPriceHistory.creationDate,buyingPriceHistory
33,3531fba10ec2058d1155a63a9e147c8d,"Gartentraum in Bietigheim - große 3,5-Zimmerwo...",74321,380000.0,3.0,83.21,2.98,11.008292,4566.758803,916.0,,,Baden-Württemberg,False,2.89,,1970.0,GROUND_FLOOR,WELL_KEPT,,False,,,True,False,True,False,2021-02-19T16:35:08.000Z,False,False,122.977346,364.84,"[{'id': '570a38195aa28682596923141a30928c', 'o...",False,False,0,2021-02-19T16:37:25.804Z,0,,-512.93,-16.197895,-512.93,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,74321,Baden-Württemberg,BW,,48.951549,9.122468,"74321 Bietigheim-Bissingen, Deutschland",43227,-5,5,False,3.8,0.0,53,20.131579,0,16.66,16.664,,,,,Bietigheim-Bissingen,316198.0,3800,3.568421,,309000.0,3713.495974,3.557282,,,,,74321,,Landkreis Ludwigsburg,Bietigheim-Bissingen,,74321,Bietigheim-Bissingen,Bietigheim-Bissingen,,,,,,,,,,Vereinbarte Verwaltungsgemeinschaft der Stadt ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ivd24,ivd24184220956,https://ivd24immobilien.de/74321-bietigheim-bi...,2021-02-19T16:56:22.068Z,2021-02-19T16:56:22.068Z,True,380000.0,is24,2021-02-19T16:35:08.000Z,
34,3531fba10ec2058d1155a63a9e147c8d,"Gartentraum in Bietigheim - große 3,5-Zimmerwo...",74321,380000.0,3.0,83.21,2.98,11.008292,4566.758803,916.0,,,Baden-Württemberg,False,2.89,,1970.0,GROUND_FLOOR,WELL_KEPT,,False,,,True,False,True,False,2021-02-19T16:35:08.000Z,False,False,122.977346,364.84,"[{'id': '570a38195aa28682596923141a30928c', 'o...",False,False,0,2021-02-19T16:37:25.804Z,0,,-512.93,-16.197895,-512.93,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,74321,Baden-Württemberg,BW,,48.951549,9.122468,"74321 Bietigheim-Bissingen, Deutschland",43227,-5,5,False,3.8,0.0,53,20.131579,0,16.66,16.664,,,,,Bietigheim-Bissingen,316198.0,3800,3.568421,,309000.0,3713.495974,3.557282,,,,,74321,,Landkreis Ludwigsburg,Bietigheim-Bissingen,,74321,Bietigheim-Bissingen,Bietigheim-Bissingen,,,,,,,,,,Vereinbarte Verwaltungsgemeinschaft der Stadt ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126734352,https://www.immobilienscout24.de/expose/126734352,2021-02-19T16:35:08.000Z,2021-02-19T16:35:08.000Z,True,380000.0,is24,2021-02-19T16:35:08.000Z,


## Inspect null values

In [92]:
df_normalized.isnull().sum()

id                                           0
title                                        0
zip                                          1
buyingPrice                                 29
rooms                                       26
squareMeter                                  9
comission                                  344
rentPricePerSqm                              0
pricePerSqm                                 32
rentPrice                                    9
rentPriceCurrent                           598
rentPriceCurrentPerSqm                     598
region                                       0
foreClosure                                  0
grossReturn                                 33
grossReturnCurrent                         598
constructionYear                           113
apartmentType                                0
condition                                    0
lastRefurbishment                          588
lift                                         0
floor        

In [93]:
df_normalized['zip'].unique().shape

(415,)

In [94]:
df_normalized[df_normalized['comission'].isnull()].sample(3)

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,region,foreClosure,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,leasehold,priceInMarket,houseMoney,images,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages,address.ISO_3166-1_alpha-2,address.ISO_3166-1_alpha-3,address._category,address._type,address.city,address.city_district,address.continent,address.country,address.country_code,address.political_union,address.postcode,address.state,address.state_code,address.suburb,address.lat,address.lon,address.displayName,locationFactor.population,locationFactor.populationTrend.from,locationFactor.populationTrend.to,locationFactor.hasUniversity,locationFactor.unemploymentRate,locationFactor.numberOfStudents,locationFactor.score,locationFactor.unemploymentRateScore,locationFactor.universityScore,locationFactor.populationScore,locationFactor.populationTrendScore,aggregations.district.name,aggregations.district.buyingPrice,aggregations.district.pricePerSqm,aggregations.district.grossReturn,aggregations.location.name,aggregations.location.buyingPrice,aggregations.location.pricePerSqm,aggregations.location.grossReturn,aggregations.similarListing.name,aggregations.similarListing.buyingPrice,aggregations.similarListing.pricePerSqm,aggregations.similarListing.grossReturn,oAddress.ebk.street,oAddress.ebk.postcode,oAddress.ebk.locationQuery,originalAddress.street,originalAddress.postcode,originalAddress.locationQuery,address.county,address.town,oAddress.is24.street,oAddress.is24.postcode,oAddress.is24.location,originalAddress.location,address.village,oAddress.is24.district,originalAddress.district,address.house_number,address.landuse,address.local_administrative_area,address.borough,address.road,oAddress.ebk.location,address.municipality,address.residential,address.railway,address.neighbourhood,address.office,address.highway,address.road_type,oAddress.ivd24.street,oAddress.ivd24.postcode,oAddress.ivd24.location,oAddress.ivd24.district,oAddress.wohnungjetzt.street,oAddress.wohnungjetzt.postcode,oAddress.wohnungjetzt.location,address.building,address.community_centre,oAddress.immobilien.street,oAddress.immobilien.postcode,oAddress.immobilien.location,address.shop,address.retail,address.fuel,address.craft,address.hamlet,address.quarter,address.boundary,oAddress.regionalimmobilien.street,oAddress.regionalimmobilien.postcode,oAddress.regionalimmobilien.location,oAddress.wohnungsboerse.street,oAddress.wohnungsboerse.postcode,oAddress.wohnungsboerse.location,oAddress.wohnungsboerse.district,oAddress.immobilio.street,oAddress.immobilio.postcode,oAddress.immobilio.location,address.place_of_worship,address.information,address.hospital,address.industrial,address.attraction,oAddress.ohnemakler.street,oAddress.ohnemakler.postcode,oAddress.ohnemakler.location,address.city_block,address.hotel,address.waterway,address.leisure,address.cafe,platforms.name,platforms.id,platforms.url,platforms.creationDate,platforms.publishDate,platforms.active,buyingPriceHistory.buyingPrice,buyingPriceHistory.platformName,buyingPriceHistory.creationDate,buyingPriceHistory
654,22976e5602d28f81c5ace32006a558da,Helle 3-Zimmer-Wohnungen zum Kauf in Hagen,58089,79000.0,3.0,87.0,,5.091954,908.045977,443.0,,,Nordrhein-Westfalen,False,6.73,,1914.0,APARTMENT,NEED_OF_RENOVATION,,False,2.0,4.0,False,False,True,False,2021-02-19T13:13:05.000Z,True,False,102.597403,,"[{'id': '4107a7d8d27538a6ca5883f7a5d81fe2', 'o...",False,False,0,2021-02-19T15:30:30.924Z,0,,60.45,9.182278,60.45,True,DE,DEU,building,building,Hagen,Hagen-Mitte,Europe,Deutschland,de,European Union,58089,Nordrhein-Westfalen,NW,Kuhlerkamp,51.354385,7.453917,"Minervastraße 1, 58089 Hagen, Deutschland",188686,-10,-5,True,12.2,67958.0,62,0.0,25,25.0,12.498,Hagen-Mitte,117450.0,1350.0,5.6,Hagen,95700.0,1100,6.218182,,77000.0,885.057471,6.903896,,,,Minervastraße 1,58089,,,,Minervastraße 1,58089,Hagen,Hagen,,Wehringhausen,Wehringhausen,1.0,,,,Minervastraße,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126727771,https://www.immobilienscout24.de/expose/126727771,2021-02-19T13:13:05.000Z,2021-02-19T13:13:05.000Z,True,79000.0,is24,2021-02-19T15:30:30.923Z,
212,113f28cdb64fc1fcd92a7324e41382b5,NEUBAU - HOCH HINAUS - ETW IM STAFFELGESCHOSS,22045,414000.0,3.0,67.46,,13.726653,6136.970056,926.0,,,Hamburg,False,2.68,,2021.0,ROOF_STOREY,MINT_CONDITION,,True,,,True,False,True,False,2021-02-19T15:23:21.000Z,False,False,128.173375,,"[{'id': '5ee6f02bce09c72f733a349e32ace659', 'o...",False,False,0,2021-02-19T15:28:28.485Z,0,,-564.77,-16.370048,-564.77,True,DE,DEU,place,neighbourhood,Hamburg,Wandsbek,Europe,Deutschland,de,European Union,22045,Hamburg,HH,Jenfeld,53.575837,10.133736,"22045 Hamburg, Deutschland",1847253,-5,5,True,8.1,100880.0,76,9.444444,25,25.0,16.664,Wandsbek,293451.0,4350.0,3.117241,Hamburg,303570.0,4500,3.173333,,323000.0,4788.022532,3.440248,,,,,22045,,,,,22045,Hamburg,Hamburg,,Jenfeld,Jenfeld,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ivd24,ivd24184220898,https://ivd24immobilien.de/22045-hamburg-wohnu...,2021-02-19T15:42:03.945Z,2021-02-19T15:42:03.945Z,True,414000.0,is24,2021-02-19T15:23:21.000Z,
103,7aab38e9555eea7d433cfb4b4d05803a,2 Zi-DG Wohnung auf 2 Etagen --provisionsfrei-...,83301,150000.0,2.0,38.0,,10.578947,3947.368421,402.0,370.0,9.736842,Bayern,False,3.22,2.96,1997.0,MAISONETTE,WELL_KEPT,2005.0,True,2.0,2.0,False,False,True,True,2021-02-19T16:09:00.000Z,True,False,111.111111,,"[{'id': '62c14d63c2cd5e08baea50d8567b794c', 'o...",False,False,0,2021-02-19T16:11:53.835Z,0,,-176.58,-14.126667,-176.58,True,DE,DEU,postcode,postcode,,,Europe,Deutschland,de,European Union,83301,Bayern,BY,,47.950898,12.569133,"83301 Bayern, Deutschland",0,-5,5,False,0.0,0.0,17,0.0,0,0.0,16.664,,,,,,133000.0,3500,3.154286,,135000.0,3552.631579,3.573333,,,,Herrmannstädter Str. 1,83301,,,,Herrmannstädter Str. 1,83301,Traunreut,Traunreut,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,is24,126709519,https://www.immobilienscout24.de/expose/126709519,2021-02-19T16:09:00.000Z,2021-02-19T16:09:00.000Z,True,150000.0,is24,2021-02-19T16:09:00.000Z,


In [95]:
# Check null values and unique values
df_normalized.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Columns: 163 entries, id to buyingPriceHistory
dtypes: bool(12), float64(41), int64(8), object(102)
memory usage: 803.1+ KB


In [99]:
# group by city
# group by type
df_normalized.groupby("zip")['cashFlow'].max().sort_values(ascending=False)[:10]

zip
70192    1816.10
67061     404.48
47798     311.13
39288     290.63
42107     269.58
67307     217.05
45127     212.55
42551     162.68
68167     154.37
06249     130.57
Name: cashFlow, dtype: float64

# Database
Here we want to save the CSV in the new creatd datbase schema.  

In [19]:
import psycopg2
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv()

DB_HOST = os.getenv('DB_HOST')
DB_NAME  = os.getenv('DB_NAME')
DB_USER  = os.getenv('DB_USER')
DB_PASSWORD  = os.getenv('DB_PASSWORD')

connection_parameter = {
    "host"      : DB_HOST,
    "database"  : DB_NAME,
    "user"      : DB_USER,
    "password"  : DB_PASSWORD
}

def connect(connection_parameter):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**connection_parameter)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1)
    print("Connection successful")
    return conn

def postgresql_to_dataframe(conn, select_query):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    # cursor = conn.cursor()
    # try:
    #     cursor.execute(select_query)
    # except (Exception, psycopg2.DatabaseError) as error:
    #     print("Error: %s" % error)
    #     cursor.close()
    #     return 1
    
    # # Naturally we get a list of tupples
    # tupples = cursor.fetchall()
    # cursor.close()
    print("Read from table")
    df = pd.read_sql_query(select_query,con=conn)
    
    # We just need to turn it into a pandas dataframe
    # df = pd.DataFrame(tupples, columns=column_names)
    return df

In [20]:
# Connect to the database
conn = connect(connection_parameter)
column_names = []#["id", "source", "datetime", "mean_temp"]
# Execute the "SELECT *" query
df = postgresql_to_dataframe(conn, "SELECT * FROM public.api_liveaiproperty")
df.head()

Connecting to the PostgreSQL database...
Connection successful
Read from table


Unnamed: 0,id,active,property_id,property_url,source_id,source_portal,property_type,image_urls,image_captions,city,title,address,rooms,living_space,floor,no_of_floors,construction_year,condition,latitude,longitude,zipcode,foreclosure,is_rented_out,created_at,updated_at,price_id
0,45857,True,6a867704a532a627f707d778d30d8439,https://www.immobilienscout24.de/expose/116894250,116894250,is24,flat,https://pictures.immobilienscout24.de/listings...,1AA1\n3ABD\n7\nLogopartner\nLogopartner,Berlin,HERRSCHAFTLICHE GARTENRESIDENZ mit Privatgarte...,"Tiergarten, Berlin, Deutschland",2.0,73.0,,4.0,1880.0,FIRST_TIME_USE,52.509778,13.35726,10785,False,False,2020-12-15 15:59:06.680017+00:00,2020-12-15 15:59:06.684562+00:00,46854.0
1,33503,True,f65bb574710bc2bb043bea8069d10131,https://www.immobilienscout24.de/expose/120330418,120330418,is24,flat,https://pictures.immobilienscout24.de/listings...,Gartenperspektive\nInnenraumperspektive\nVogel...,Obersontheim,Exklusives 5-Zimmer-Penthouse mit Dachterrasse...,"Frankenstraße, 74423 Obersontheim, Deutschland",5.0,127.79,,,2021.0,FIRST_TIME_USE,49.058834,9.887907,74423,False,False,2020-12-13 15:34:46.452927+00:00,2020-12-15 15:38:05.368774+00:00,33650.0
2,33504,True,5835cc06d03607cc80f642d982de4eff,https://www.immobilienscout24.de/expose/120331198,120331198,is24,flat,https://pictures.immobilienscout24.de/listings...,Gartenperspektive\nInnenraumperspektive\nVogel...,Obersontheim,Raum & Lebensfreunde! Repräsentatives 5-Zimmer...,"Frankenstraße, 74423 Obersontheim, Deutschland",4.0,138.44,,,2021.0,FIRST_TIME_USE,49.058834,9.887907,74423,False,False,2020-12-13 15:34:46.464072+00:00,2020-12-15 15:38:05.374792+00:00,33651.0
3,34809,True,68e621da2bb7a3b7bb8557f04b74f3bd,https://www.immobilienscout24.de/expose/122422682,122422682,is24,flat,https://pictures.immobilienscout24.de/listings...,Diele\nHausflur\nKüche\nBadezimmer\nGäste-WC\n...,Düsseldorf,Sanierte und barrierefreie 4-Zimmer-Wohnung mi...,"Holthausen, 40589 Düsseldorf, Deutschland",4.0,90.0,,,1967.0,NO_INFORMATION,51.175156,6.83236,40589,False,False,2020-12-13 15:37:15.498003+00:00,2020-12-15 15:40:34.740057+00:00,34956.0
4,35827,True,4813f6cbf82e17c704360dcad5aa5480,https://www.immobilienscout24.de/expose/122184987,122184987,is24,flat,https://pictures.immobilienscout24.de/listings...,Wohnzimmer\nAußenansicht\nBadezimmer Beispiel\...,Nieste,Wohnen im Herzen von Nieste,"34329 Nieste, Deutschland",3.0,93.9,,3.0,2020.0,FIRST_TIME_USE,51.312294,9.670994,34329,False,False,2020-12-13 15:39:42.735693+00:00,2020-12-15 15:43:02.089328+00:00,35974.0


In [21]:
df.shape

(108045, 26)

In [22]:
df.columns[df.dtypes == object]

Index(['property_id', 'property_url', 'source_id', 'source_portal',
       'property_type', 'image_urls', 'image_captions', 'city', 'title',
       'address', 'condition', 'zipcode', 'foreclosure', 'is_rented_out'],
      dtype='object')

In [23]:
#columns_strings = df.columns[df.dtypes == object]
columns_strings = ['property_id', 'property_url', 'source_id', 'source_portal',
       'property_type', 'image_urls', 'image_captions', 'city', 'title',
       'address', 'condition', 'zipcode']
for column in columns_strings:
    print(f"Column: {column}: {df[column].str.len().max()}")

Column: property_id: 32.0
Column: property_url: 288.0
Column: source_id: 50.0
Column: source_portal: 18.0
Column: property_type: 5
Column: image_urls: 6995.0
Column: image_captions: 1727.0
Column: city: 48.0
Column: title: 199.0
Column: address: 176.0
Column: condition: 34.0
Column: zipcode: 5.0


# ROI Calculation
This is part of the transformation. We will add more columns for the ROI/cashflow calculation and later use those methods to updte the ROI/cashflow on the fly for the API endpoints.

In [25]:
from transformer import get_roi_gross, get_cashflow_after_operating_expenses, get_cashflow_after_taxes, get_cashflow_after_reserves, get_tax_year


In [40]:
# FROM EXPOSE
purchase_price = 150000         # required
living_space_sqm = 60           # required
cold_rent_given = 700           # can be given or not; DEFAULT: None
house_allowance_given = 300     # can be given or not; DEFAULT: None


# OUR DEFAULT VALUES
rent_price_index_per_sqm = 11               # varies from city and living space
vacancy_rate = 0.02                         # 2% standard = no rent per year
house_allowance_rate = 0.35                 # 35% of cold rent
interest_rate = 0.02                        # 2% as standard
mortgage_rate = 0.02                        # 2% as standard
equity_rate = 0.2                           # 20% of purchase price 
maintenance_reserve_per_sqm_per_year = 10   # 10€ per sqm and per year as standard
tax_rate = 0.3                              # could be 20-30%
tax_write_off_rate = 0.02                   # 2% (has to be validated by professionals)

# VALUES FOR KEY METRICS CALCULATION
#   COLD RENT
cold_rent_month = cold_rent_given if cold_rent_given else cold_rent_estimated
cold_rent_year = cold_rent_month * 12

#   EQUITY
equity = purchase_price * equity_rate
#   LOAN
loan = purchase_price - equity

# ESTIMATED VALUES
cold_rent_estimated = living_space_sqm * rent_price_index_per_sqm
house_allowance_estimated = cold_rent_month * house_allowance_rate
maintenance_reserve_private_year = living_space_sqm * maintenance_reserve_per_sqm_per_year
maintenance_reserve_private_month = maintenance_reserve_private_year / 12
vacancy_year = cold_rent_year * vacancy_rate
vacancy_month = vacancy_year / 12
tax_write_off_year = tax_write_off_rate * loan
tax_write_off_month = tax_write_off_year / 12

#   HOUSE ALLOWANCE 
house_allowance_month = house_allowance_given if house_allowance_given else house_allowance_estimated
house_allowance_year = house_allowance_month * 12
#   INTEREST
interest_year = loan * interest_rate
interest_month = interest_year / 12
#   MORTGAGE
mortgage_year = loan * mortgage_rate
mortgage_month = mortgage_year / 12
#   HOUSE ALLOWANCE (Hausgeld)
tax = get_tax_year(cold_rent_year, vacancy_year, house_allowance_month, interest_year, tax_write_off_year, maintenance_reserve_private_year)

NameError: name 'get_tax_year' is not defined

In [41]:

roi_gross = get_roi_gross(cold_rent_year,purchase_price)

cashflow_after_operating_expenses = get_cashflow_after_operating_expenses(cold_rent_month, vacancy_month, house_allowance_month, interest_month, mortgage_month)

cashflow_after_taxes = get_cashflow_after_taxes(cold_rent_month, vacancy_month, house_allowance_month, interest_month, mortgage_month, tax)

cashflow_after_reserves = get_cashflow_after_reserves(cold_rent_month, vacancy_month, house_allowance_month, interest_month, mortgage_month, tax, maintenance_reserve_private_month)

print(roi_gross)
print(cashflow_after_operating_expenses)
print(cashflow_after_taxes)
print(cashflow_after_reserves)

NameError: name 'get_roi_gross' is not defined

In [42]:
df.sample(10)

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,address,region,foreClosure,locationFactor,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,aggregations,leasehold,priceInMarket,oAddress,originalAddress,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages
478,5c2607457bdb53f2f05fb2eaed988743,Vermietete Einzimmerwohnung in zentraler Lage ...,68159,129000.0,1.0,36.0,1.95,[{'deactivationDate': '2021-02-19T09:01:08.690...,11.722222,3583.333333,422.0,310.0,8.611111,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Baden-Württemberg,False,"{'population': 310658, 'populationTrend': {'fr...",3.93,2.88,1988.0,NO_INFORMATION,WELL_KEPT,,True,7.0,7.0,False,False,True,True,2021-02-18T18:21:23.000Z,False,"{'district': {'name': 'Innenstadt/Jungbusch', ...",False,126.470588,"{'is24': {'street': None, 'postcode': '68159',...","{'postcode': '68159', 'location': 'Mannheim', ...",,"[{'id': '81d34d244d9d038c57140ed89b51e533', 'o...","[{'buyingPrice': 129000, 'platformName': 'is24...",False,False,1,2021-02-18T18:24:19.755Z,0,,-171.83,-15.984496,-171.83,True
107,ba8f834dc6d0c7deec3fd79bfe113bd7,Geräumige 3-Zimmer-Eigentumswohnung in MA-Waldhof,68305,85000.0,3.0,79.26,,"[{'name': 'is24', 'id': '126717302', 'url': 'h...",10.017663,1072.419884,794.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Baden-Württemberg,True,"{'population': 310658, 'populationTrend': {'fr...",11.21,,1900.0,APARTMENT,NEED_OF_RENOVATION,,False,,,False,False,True,False,2021-02-19T08:09:04.000Z,False,"{'district': {'name': 'Waldhof', 'buyingPrice'...",False,34.274194,"{'is24': {'street': None, 'postcode': '68305',...","{'postcode': '68305', 'location': 'Mannheim', ...",,"[{'id': 'd588b00303587dd4319af6fd4b650163', 'o...","[{'buyingPrice': 85000, 'platformName': 'is24'...",False,False,0,2021-02-19T08:12:33.302Z,0,,276.93,39.096471,276.93,True
159,68527ade9cbb5d85e2580bc27eb51388,"🏡🌳GRÜN, RUHIG; ZENTRAL🌳: Neubau-Maisonette mit...",13156,941500.0,4.0,201.0,,"[{'name': 'is24', 'id': '126714433', 'url': 'h...",15.522388,4684.079602,3120.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Berlin,False,"{'population': 3669491, 'populationTrend': {'f...",3.98,,2021.0,NO_INFORMATION,FIRST_TIME_USE,2021.0,True,,,True,True,True,False,2021-02-19T06:52:16.000Z,False,"{'district': {'name': 'Niederschönhausen', 'bu...",False,78.13278,"{'is24': {'street': None, 'postcode': '13156',...","{'postcode': '13156', 'location': 'Berlin - Pa...",,"[{'id': '71b1520678bdfd05aa3bdc74d465eafe', 'o...","[{'buyingPrice': 941500, 'platformName': 'is24...",False,False,0,2021-02-19T08:56:49.252Z,0,,-664.08,-8.464153,-664.08,True
148,5e5e9e7b211959aa794428fd73d7e820,Moderne 2-Zimmer-Wohnung auf ca. 60 m² mit gro...,12529,287800.0,2.0,59.41,,"[{'name': 'is24', 'id': '126668719', 'url': 'h...",12.943949,4844.302306,769.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Brandenburg,False,"{'population': 0, 'populationTrend': {'from': ...",3.21,,2023.0,APARTMENT,FIRST_TIME_USE,,True,6.0,,True,False,True,False,2021-02-19T07:13:48.000Z,False,"{'district': {'name': 'Schönefeld', 'buyingPri...",False,96.254181,"{'is24': {'street': 'Rathausgasse 3', 'postcod...","{'street': 'Rathausgasse 3', 'postcode': '1252...",,"[{'id': '3a28f1df5b796d153714bbf1cc7f04fc', 'o...","[{'buyingPrice': 287800, 'platformName': 'is24...",False,False,0,2021-02-19T09:17:24.653Z,0,,-325.57,-13.574705,-325.57,True
229,8f3b9ec2069a33c6fd363b29a5717d8d,Großzügige Wohnmöglichkeit über zwei Ebenen...,22529,2175000.0,7.0,250.0,6.12,"[{'name': 'ebk', 'id': 'ebkf295daea30d13c350dc...",15.252,8700.0,3813.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Hamburg,False,"{'population': 1847253, 'populationTrend': {'f...",2.1,,2011.0,GROUND_FLOOR,WELL_KEPT,,False,,,True,False,True,False,2021-02-18T22:19:54.454Z,False,"{'district': {'name': 'Eimsbüttel', 'buyingPri...",False,110.969388,"{'ebk': {'street': None, 'postcode': '22529', ...","{'street': '', 'postcode': '22529', 'location'...",,[],"[{'buyingPrice': 2175000, 'platformName': 'ebk...",False,False,0,2021-02-19T05:56:51.715Z,0,,-4031.55,-22.243034,-4031.55,False
411,21618ae01e987ba8a6cede3b60ce7c4a,"Zwangsversteigerung Wohnung, Bürgermeister-Kut...",67059,156000.0,3.0,91.0,,"[{'name': 'immobilien', 'publishDate': '2021-0...",8.703297,1714.285714,792.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Rheinland-Pfalz,True,"{'population': 172253, 'populationTrend': {'fr...",6.09,,,APARTMENT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-18T19:27:59.223Z,False,{'district': {'name': 'Ludwigshafen-Südliche-I...,False,68.122271,"{'ivd24': {'street': None, 'postcode': '67059'...","{'postcode': '67059', 'location': 'Ludwigshafen'}",,"[{'id': 'c16955ccf813ddf86dace4ecb7f9c9cb', 'o...","[{'buyingPrice': 156000, 'platformName': 'ivd2...",False,False,0,2021-02-19T08:37:25.164Z,0,,74.38,5.721795,74.38,True
82,0ec32c15b880eda6a7e19b9be990369e,Hochwertige Neubauwohnung - inkl. Erstvermietung,48167,291887.0,2.0,61.45,,"[{'name': 'is24', 'id': '126718144', 'url': 'h...",11.863303,4749.991863,729.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Nordrhein-Westfalen,False,"{'population': 315293, 'populationTrend': {'fr...",3.0,,2021.0,NO_INFORMATION,FIRST_TIME_USE,,True,2.0,4.0,True,False,True,True,2021-02-19T08:35:33.000Z,False,"{'district': {'name': 'Münster-Südost', 'buyin...",False,113.134496,"{'is24': {'street': 'Schlesienstraße 21', 'pos...","{'street': 'Schlesienstraße 21', 'postcode': '...",,"[{'id': 'bd79628d93da2c974b0e5b4a3307f34f', 'o...","[{'buyingPrice': 291887, 'platformName': 'is24...",False,False,0,2021-02-19T08:38:32.190Z,0,,-366.15,-15.053086,-366.15,True
193,c91b3f34f9a6fcf14892b77b38b50c6b,Kapitalanleger aufgepasst: DG-Whg. mit TOP-Re...,10551,399000.0,3.0,93.0,6.99,"[{'name': 'immobilienmarkt1a', 'id': 'immobili...",11.55914,4290.322581,1075.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Berlin,False,"{'population': 3669491, 'populationTrend': {'f...",3.23,,1918.0,ROOF_STOREY,NO_INFORMATION,,False,5.0,,False,False,True,False,2021-02-19T02:16:48.519Z,False,"{'location': {'name': 'Berlin', 'buyingPrice':...",False,101.269036,"{'immobilienmarkt1a': {'street': None, 'postco...","{'postcode': '10551', 'location': 'Berlin'}",,"[{'id': '9779fc1c2a9940a83ef2885980127866', 'o...","[{'buyingPrice': 399000, 'platformName': 'immo...",False,False,0,2021-02-19T02:16:50.189Z,0,,-523.33,-15.739348,-523.33,True
558,3f5a72754cb254fb98e9d79d110cac28,"Wiesbaden – Solide vermietete, pfiffige Wohnun...",65189,145000.0,1.0,51.0,3.57,"[{'name': 'ivd24', 'publishDate': '2021-02-18T...",11.45098,2843.137255,584.0,166.916667,3.272876,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Hessen,False,"{'population': 278474, 'populationTrend': {'fr...",4.83,1.38,1983.0,GROUND_FLOOR,NO_INFORMATION,,False,,,False,False,True,True,2021-02-18T17:24:44.502Z,False,"{'district': {'name': 'Südost', 'buyingPrice':...",False,81.460674,"{'immobilio': {'street': 'Friedenstraße 24b', ...","{'street': 'Friedenstraße 24b', 'postcode': '6...",182.0,"[{'id': '8d65156e36ea47a80ac8a12c67f2dd01', 'o...","[{'buyingPrice': 145000, 'platformName': 'ivd2...",False,False,0,2021-02-19T08:14:29.114Z,0,,-320.25,-26.503793,-320.25,True
456,4e7d3a5cedba1ac58b5a8690ccfd6303,Voll ausgestattete Zwei-Zimmer-Wohnung mit Bal...,85598,389000.0,2.0,57.0,1.95,"[{'name': 'is24', 'publishDate': '2021-02-18T1...",13.982456,6824.561404,797.0,764.0,13.403509,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Bayern,False,"{'population': 0, 'populationTrend': {'from': ...",2.46,2.36,1972.0,NO_INFORMATION,WELL_KEPT,,True,2.0,5.0,True,False,True,True,2021-02-18T18:51:15.000Z,False,"{'location': {'name': 'Vaterstetten', 'buyingP...",False,123.492063,"{'is24': {'street': None, 'postcode': '85598',...","{'postcode': '85598', 'location': 'Vaterstette...",220.0,"[{'id': 'f93bc1ef2776a0e92b30fa38e1d46159', 'o...","[{'buyingPrice': 389000, 'platformName': 'is24...",False,False,1,2021-02-19T08:51:23.374Z,0,,-608.82,-18.780977,-608.82,True


In [43]:
df_cashflow_comparisson = df[["id","title","zip","buyingPrice","cashFlow"]] 
df_cashflow_comparisson["liveAiCashflow"] = get_cashflow_after_operating_expenses(df_cashflow_comparisson["buyingPrice"],)
df_cashflow_comparisson.head()

NameError: name 'get_cashflow_after_operating_expenses' is not defined

# Next steps

- Clean null values. 
- Transform into clean schema.
- Load clean csv into S3 bucket.
