# ETL
This project extract real estate properties from an API, transform and loads them into a CSV.

Extract    
Extract all properties from Immo API. Call API.

Transform   
Transform (normalize) the nested jsons. Fill NAN values.
 
Load   
Load finish csv into S3 Bucket.

In [1]:
# Importing modules
import pandas as pd

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.options.mode.chained_assignment = None

In [2]:
DEBUG = True

In [3]:
import sys
sys.path.insert(1, '../pipeline')
sys.path.insert(2, '../data')

from etl import ETL

In [4]:
# This is how you start the pipeline
etl = ETL()
etl.execute(None)

## Transformation 
We want to clean the nested JSON (normalize them). 

In [34]:
import json
from pandas.io.json import json_normalize #package for flattening json in pandas df

In [36]:
JSON_PATH = "../data/immo.json"

with open(JSON_PATH) as json_file:
    data = json.load(json_file)
df = pd.DataFrame.from_dict(data)
df.head(3)

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,address,region,foreClosure,locationFactor,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,aggregations,leasehold,priceInMarket,oAddress,originalAddress,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages
0,a6403543c394e8f9dc0607d2d0b4d296,Moderne 1 ZKB Dachgeschosswohnung für Singles!,67122,90000.0,1.0,30.2,3.57,"[{'name': 'ebk', 'id': 'ebk9172d28a8c5c4e69b37...",10.761589,2980.13245,325.0,300.0,9.933775,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Rheinland-Pfalz,False,"{'population': 0, 'populationTrend': {'from': ...",4.33,4.0,,ROOF_STOREY,WELL_KEPT,,False,,,False,False,True,True,2021-02-19T09:39:48.811Z,False,"{'location': {'name': 'Altrip', 'buyingPrice':...",False,118.421053,"{'ebk': {'street': None, 'postcode': '67122', ...","{'street': '', 'postcode': '67122', 'location'...",,[],"[{'buyingPrice': 90000, 'platformName': 'ebk',...",False,False,0,2021-02-19T09:39:49.562Z,0,,-70.42,-9.388889,-70.42,False
1,a0fa2a2e9a03e1e51183319d389b9b8d,Gemütliches Zuhause zum Verweilen und Entspannen,33154,282425.0,3.0,78.41,,"[{'name': 'ebk', 'id': 'ebk34a7c7315353548e1fa...",7.039918,3601.900268,552.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Nordrhein-Westfalen,False,"{'population': 24956, 'populationTrend': {'fro...",2.35,,,APARTMENT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-19T09:39:39.691Z,False,"{'district': {'name': 'Salzkotten', 'buyingPri...",False,120.180851,"{'ebk': {'street': None, 'postcode': '33154', ...","{'street': '', 'postcode': '33154', 'locationQ...",,[],"[{'buyingPrice': 282425, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:39:40.122Z,0,,-452.03,-19.206515,-452.03,False
2,f36467918129e396a5997571c6765a65,Charmante Wohnung mit Wohlfühlatmosphäre in de...,63739,540000.0,4.0,110.0,,"[{'name': 'ebk', 'id': 'ebkd4472406fcbf00c60be...",10.054545,4909.090909,1106.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Bayern,False,"{'population': 0, 'populationTrend': {'from': ...",2.46,,1890.0,TERRACED_FLAT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-19T09:39:31.366Z,True,"{'location': {'buyingPrice': 385000, 'pricePer...",False,122.727273,"{'ebk': {'street': None, 'postcode': '63739', ...","{'street': '', 'postcode': '63739', 'location'...",250.0,[],"[{'buyingPrice': 540000, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:39:31.847Z,0,,-786.1,-17.468889,-786.1,False


In [72]:
df['platforms'][0]

[{'name': 'ebk',
  'id': 'ebk9172d28a8c5c4e69b37c7a65bb0ca6c9',
  'url': 'https://www.ebay-kleinanzeigen.de/s-anzeige/moderne-1-zkb-dachgeschosswohnung-fuer-singles-/1672091692-196-5386',
  'creationDate': '2021-02-19T09:39:48.811Z',
  'publishDate': '2021-02-19T09:39:48.811Z',
  'active': True}]

3 Sub rows   
Goal: We have the data (no loss of data)   
Case 1 Duplicate expose ID (because of platforms)   --> Our current go to strategy   
Case 2 Single expose ID (nested platforms)          --> harder to parse and display  
Case 3 Duplicate Expose ID (duplicate platforms)    -->   
    
platforms, buyingPriceHistory have nested array  

### Inspection of the data

In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 50 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   id                      600 non-null    object 
 1   title                   600 non-null    object 
 2   zip                     600 non-null    object 
 3   buyingPrice             584 non-null    float64
 4   rooms                   581 non-null    float64
 5   squareMeter             597 non-null    float64
 6   comission               278 non-null    float64
 7   platforms               600 non-null    object 
 8   rentPricePerSqm         599 non-null    float64
 9   pricePerSqm             584 non-null    float64
 10  rentPrice               596 non-null    float64
 11  rentPriceCurrent        51 non-null     float64
 12  rentPriceCurrentPerSqm  51 non-null     float64
 13  address                 599 non-null    object 
 14  region                  599 non-null    ob

Things that are always given are id, title, zip.

We have to be careful with the purchase price. There are missing values that have to be filled or kicked out. 

The json array has nested jsons/arrays. In the next step we try to flaten the json columns and aggregate them to one. 

The columns are:
- platforms
- buyingPriceHistory
- locationFactor
- aggregations
- oAddress
- originalAddress

In [74]:
df_raw = json_normalize(data)
df_raw.head()

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,region,foreClosure,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,leasehold,priceInMarket,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages,address.ISO_3166-1_alpha-2,address.ISO_3166-1_alpha-3,address._category,address._type,address.continent,address.country,address.country_code,address.county,address.municipality,address.political_union,address.postcode,address.state,address.state_code,address.village,address.lat,address.lon,address.displayName,locationFactor.population,locationFactor.populationTrend.from,locationFactor.populationTrend.to,locationFactor.hasUniversity,locationFactor.unemploymentRate,locationFactor.numberOfStudents,locationFactor.score,locationFactor.unemploymentRateScore,locationFactor.universityScore,locationFactor.populationScore,locationFactor.populationTrendScore,aggregations.location.name,aggregations.location.buyingPrice,aggregations.location.pricePerSqm,aggregations.location.grossReturn,aggregations.similarListing.name,aggregations.similarListing.buyingPrice,aggregations.similarListing.pricePerSqm,aggregations.similarListing.grossReturn,oAddress.ebk.street,oAddress.ebk.postcode,oAddress.ebk.location,oAddress.ebk.locationQuery,originalAddress.street,originalAddress.postcode,originalAddress.location,originalAddress.locationQuery,address.city,address.suburb,address.town,aggregations.district.name,aggregations.district.buyingPrice,aggregations.district.pricePerSqm,aggregations.district.grossReturn,address.neighbourhood,address.road,address.road_type,address.city_district,oAddress.is24.street,oAddress.is24.postcode,oAddress.is24.location,oAddress.is24.district,originalAddress.district,address.house_number,address.residential,oAddress.ivd24.street,oAddress.ivd24.postcode,oAddress.ivd24.location,address.borough,address.hamlet,address.local_administrative_area,address.museum,oAddress.ivd24.district,address.industrial,address.leisure,address.shop,address.hotel,oAddress.immobilio.street,oAddress.immobilio.postcode,address.doctors,oAddress.immobilien.street,oAddress.immobilien.postcode,oAddress.immobilien.location,address.commercial,address.fuel,oAddress.immobilienmarkt1a.street,oAddress.immobilienmarkt1a.postcode,oAddress.immobilienmarkt1a.location,oAddress.sueddeutsche.street,oAddress.sueddeutsche.postcode,oAddress.sueddeutsche.location,oAddress.faz.street,oAddress.faz.postcode,oAddress.faz.location,oAddress.immobilio.location,oAddress.regionalimmobilien.street,oAddress.regionalimmobilien.postcode,oAddress.regionalimmobilien.location,oAddress.immoticket24.street,oAddress.immoticket24.postcode,oAddress.immoticket24.location,address.office,address.library,address.railway,address.townhall,address.fire_station,address.building,oAddress.ohnemakler.street,oAddress.ohnemakler.postcode,oAddress.ohnemakler.location,address,address.highway,oAddress.reedb.street,oAddress.reedb.postcode,oAddress.reedb.location,address.place_of_worship,address.parking,address.bank,address.quarter,aggregations.district,oAddress.meinestadt.street,oAddress.meinestadt.postcode,oAddress.meinestadt.location,oAddress.immostar.street,oAddress.immostar.postcode,oAddress.immostar.location,oAddress.immobilien.district,address.pharmacy,address.landuse,address.school,address.man_made,oAddress.vrwohnen.street,oAddress.vrwohnen.postcode,oAddress.vrwohnen.location,address.place,address.retail
0,a6403543c394e8f9dc0607d2d0b4d296,Moderne 1 ZKB Dachgeschosswohnung für Singles!,67122,90000.0,1.0,30.2,3.57,"[{'name': 'ebk', 'id': 'ebk9172d28a8c5c4e69b37...",10.761589,2980.13245,325.0,300.0,9.933775,Rheinland-Pfalz,False,4.33,4.0,,ROOF_STOREY,WELL_KEPT,,False,,,False,False,True,True,2021-02-19T09:39:48.811Z,False,False,118.421053,,[],"[{'buyingPrice': 90000, 'platformName': 'ebk',...",False,False,0,2021-02-19T09:39:49.562Z,0,,-70.42,-9.388889,-70.42,False,DE,DEU,postcode,postcode,Europe,Deutschland,de,Rhein-Pfalz-Kreis,Rheinauen,European Union,67122,Rheinland-Pfalz,RP,Altrip,49.433318,8.498292,"67122 Altrip, Deutschland",0.0,-5.0,5.0,False,4.6,0.0,33.0,16.630435,0.0,0.0,16.664,Altrip,75500.0,2500.0,3.936,,76000.0,2516.556291,5.131579,,67122,altrip,"67122 Rheinland-Pfalz, Altrip",,67122,altrip,"67122 Rheinland-Pfalz, Altrip",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,a0fa2a2e9a03e1e51183319d389b9b8d,Gemütliches Zuhause zum Verweilen und Entspannen,33154,282425.0,3.0,78.41,,"[{'name': 'ebk', 'id': 'ebk34a7c7315353548e1fa...",7.039918,3601.900268,552.0,,,Nordrhein-Westfalen,False,2.35,,,APARTMENT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-19T09:39:39.691Z,False,False,120.180851,,[],"[{'buyingPrice': 282425, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:39:40.122Z,0,,-452.03,-19.206515,-452.03,False,DE,DEU,postcode,postcode,Europe,Deutschland,de,Kreis Paderborn,,European Union,33154,Nordrhein-Westfalen,NW,,51.674682,8.603219,"33154 Salzkotten, Deutschland",24956.0,-5.0,5.0,False,5.8,0.0,38.0,13.189655,0.0,8.333,16.664,Salzkotten,188184.0,2400.0,3.8,,235000.0,2997.066701,2.818723,,33154,,"33154 Nordrhein-Westfalen, Salzkotten",,33154,,"33154 Nordrhein-Westfalen, Salzkotten",Kreis Paderborn,Salzkotten,Salzkotten,Salzkotten,141138.0,1800.0,5.066667,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,f36467918129e396a5997571c6765a65,Charmante Wohnung mit Wohlfühlatmosphäre in de...,63739,540000.0,4.0,110.0,,"[{'name': 'ebk', 'id': 'ebkd4472406fcbf00c60be...",10.054545,4909.090909,1106.0,,,Bayern,False,2.46,,1890.0,TERRACED_FLAT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-19T09:39:31.366Z,True,False,122.727273,250.0,[],"[{'buyingPrice': 540000, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:39:31.847Z,0,,-786.1,-17.468889,-786.1,False,DE,DEU,postcode,postcode,Europe,Deutschland,de,,,European Union,63739,Bayern,BY,,49.974426,9.166406,"63739 Bayern, Deutschland",0.0,-5.0,5.0,False,0.0,0.0,17.0,0.0,0.0,0.0,16.664,,385000.0,3500.0,3.154286,,440000.0,4000.0,3.016364,,63739,bayern,"63739 Bayern, Aschaffenburg",,63739,bayern,"63739 Bayern, Aschaffenburg",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,936c8b6e4a152063f90fb4e3dcb6ee48,Schrobenhausen - Stadtzentrum - 2-Zimmerwohnun...,86529,367000.0,2.0,86.25,,"[{'name': 'ebk', 'id': 'ebkea9ee7c8edb3a31bbcf...",10.353623,4255.072464,893.0,,,Bayern,False,2.92,,2019.0,APARTMENT,FIRST_TIME_USE_AFTER_REFURBISHMENT,,False,,,True,False,True,False,2021-02-19T09:39:18.404Z,False,False,94.587629,,[],"[{'buyingPrice': 367000, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:39:19.028Z,0,,-440.38,-14.399455,-440.38,False,DE,DEU,place,city,Europe,Deutschland,de,Landkreis Neuburg-Schrobenhausen,,European Union,86529,Bayern,BY,,48.561341,11.265449,"86529 Schrobenhausen, Deutschland",17226.0,5.0,10.0,False,3.0,0.0,54.0,25.0,0.0,8.333,20.83,Schrobenhausen,301875.0,3500.0,3.188571,,388000.0,4498.550725,2.761856,,86529,,"86529 Bayern, Schrobenhausen",,86529,,"86529 Bayern, Schrobenhausen",,,Schrobenhausen,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,4c2991448a537acb85a343f231b1e510,Neubauprojekt: Exklusive 4 Zimmer Wohnung mit ...,33607,348440.0,4.0,88.0,,"[{'name': 'ebk', 'id': 'ebkc33ce14a0b63f301d07...",7.636364,3959.545455,672.0,,,Nordrhein-Westfalen,False,2.31,,,APARTMENT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-19T09:38:13.865Z,False,False,163.586854,,[],"[{'buyingPrice': 348440, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:38:14.885Z,0,,-563.62,-19.410515,-563.62,False,DE,DEU,postcode,postcode,Europe,Deutschland,de,,,European Union,33607,Nordrhein-Westfalen,NW,,52.02232,8.565865,"33607 Bielefeld, Deutschland",334195.0,-5.0,5.0,True,8.9,40611.0,67.0,0.0,25.0,25.0,16.664,Bielefeld,184800.0,2100.0,4.228571,,213000.0,2420.454545,3.785915,,33607,bielefeld,"33607 Bielefeld, Mitte",,33607,bielefeld,"33607 Bielefeld, Mitte",Bielefeld,Mitte,,Mitte,162800.0,1850.0,4.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [76]:
df_raw.to_csv("../data/immo.csv")

In [71]:
df_raw.isnull().sum()

id                                           0
title                                        0
zip                                          0
buyingPrice                                 16
rooms                                       19
squareMeter                                  3
comission                                  322
platforms                                    0
rentPricePerSqm                              1
pricePerSqm                                 16
rentPrice                                    4
rentPriceCurrent                           549
rentPriceCurrentPerSqm                     549
region                                       1
foreClosure                                  0
grossReturn                                 19
grossReturnCurrent                         549
constructionYear                           119
apartmentType                                0
condition                                    0
lastRefurbishment                          514
lift         

In [63]:
df_raw['platforms'][0][0]

{'name': 'ebk',
 'id': 'ebk9172d28a8c5c4e69b37c7a65bb0ca6c9',
 'url': 'https://www.ebay-kleinanzeigen.de/s-anzeige/moderne-1-zkb-dachgeschosswohnung-fuer-singles-/1672091692-196-5386',
 'creationDate': '2021-02-19T09:39:48.811Z',
 'publishDate': '2021-02-19T09:39:48.811Z',
 'active': True}

In [67]:
df_raw['buyingPriceHistory'][4]

[{'buyingPrice': 348440,
  'platformName': 'ebk',
  'creationDate': '2021-02-19T09:38:13.865Z'}]

In [37]:
df_platforms = json_normalize(data=data, record_path=['platforms'],record_prefix="platform_", meta=['id'])
df_platforms.head(3)

Unnamed: 0,platform_name,platform_id,platform_url,platform_creationDate,platform_publishDate,platform_active,platform_deactivationDate,id
0,ebk,ebk9172d28a8c5c4e69b37c7a65bb0ca6c9,https://www.ebay-kleinanzeigen.de/s-anzeige/mo...,2021-02-19T09:39:48.811Z,2021-02-19T09:39:48.811Z,True,,a6403543c394e8f9dc0607d2d0b4d296
1,ebk,ebk34a7c7315353548e1fa72f14a863f2d2,https://www.ebay-kleinanzeigen.de/s-anzeige/ge...,2021-02-19T09:39:39.691Z,2021-02-19T09:39:39.691Z,True,,a0fa2a2e9a03e1e51183319d389b9b8d
2,ebk,ebkd4472406fcbf00c60bea81b6be57abce,https://www.ebay-kleinanzeigen.de/s-anzeige/ch...,2021-02-19T09:39:31.366Z,2021-02-19T09:39:31.366Z,True,,f36467918129e396a5997571c6765a65


In [38]:
df_buyingPriceHistory = json_normalize(data=data, record_path=['buyingPriceHistory'],record_prefix="history_", meta=['id'])
df_buyingPriceHistory.head(3)

Unnamed: 0,history_buyingPrice,history_platformName,history_creationDate,id
0,90000,ebk,2021-02-19T09:39:48.811Z,a6403543c394e8f9dc0607d2d0b4d296
1,282425,ebk,2021-02-19T09:39:39.691Z,a0fa2a2e9a03e1e51183319d389b9b8d
2,540000,ebk,2021-02-19T09:39:31.366Z,f36467918129e396a5997571c6765a65


In [40]:
json_normalize(df['aggregations']).head()

Unnamed: 0,location.name,location.buyingPrice,location.pricePerSqm,location.grossReturn,similarListing.name,similarListing.buyingPrice,similarListing.pricePerSqm,similarListing.grossReturn,district.name,district.buyingPrice,district.pricePerSqm,district.grossReturn,district
0,Altrip,75500.0,2500.0,3.936,,76000.0,2516.556291,5.131579,,,,,
1,Salzkotten,188184.0,2400.0,3.8,,235000.0,2997.066701,2.818723,Salzkotten,141138.0,1800.0,5.066667,
2,,385000.0,3500.0,3.154286,,440000.0,4000.0,3.016364,,,,,
3,Schrobenhausen,301875.0,3500.0,3.188571,,388000.0,4498.550725,2.761856,,,,,
4,Bielefeld,184800.0,2100.0,4.228571,,213000.0,2420.454545,3.785915,Mitte,162800.0,1850.0,4.8,


In [39]:
# TODO fix the normalization here for location factor
# df_locationFactor = json_normalize(data=data, record_path=['locationFactor'],record_prefix="locationFactor_", meta=['id'])
# df_locationFactor.head(3)

In [45]:
json_normalize(df['aggregations']).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 13 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   location.name               590 non-null    object 
 1   location.buyingPrice        599 non-null    float64
 2   location.pricePerSqm        599 non-null    float64
 3   location.grossReturn        599 non-null    float64
 4   similarListing.name         0 non-null      float64
 5   similarListing.buyingPrice  599 non-null    float64
 6   similarListing.pricePerSqm  599 non-null    float64
 7   similarListing.grossReturn  599 non-null    float64
 8   district.name               346 non-null    object 
 9   district.buyingPrice        346 non-null    float64
 10  district.pricePerSqm        346 non-null    float64
 11  district.grossReturn        346 non-null    float64
 12  district                    0 non-null      float64
dtypes: float64(11), object(2)
memory us

In [41]:
json_normalize(df['oAddress']).head()

Unnamed: 0,ebk.street,ebk.postcode,ebk.location,ebk.locationQuery,is24.street,is24.postcode,is24.location,is24.district,ivd24.street,ivd24.postcode,ivd24.location,ivd24.district,immobilio.street,immobilio.postcode,immobilien.street,immobilien.postcode,immobilien.location,immobilienmarkt1a.street,immobilienmarkt1a.postcode,immobilienmarkt1a.location,sueddeutsche.street,sueddeutsche.postcode,sueddeutsche.location,faz.street,faz.postcode,faz.location,immobilio.location,regionalimmobilien.street,regionalimmobilien.postcode,regionalimmobilien.location,immoticket24.street,immoticket24.postcode,immoticket24.location,ohnemakler.street,ohnemakler.postcode,ohnemakler.location,reedb.street,reedb.postcode,reedb.location,meinestadt.street,meinestadt.postcode,meinestadt.location,immostar.street,immostar.postcode,immostar.location,immobilien.district,vrwohnen.street,vrwohnen.postcode,vrwohnen.location
0,,67122,altrip,"67122 Rheinland-Pfalz, Altrip",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,,33154,,"33154 Nordrhein-Westfalen, Salzkotten",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,63739,bayern,"63739 Bayern, Aschaffenburg",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,86529,,"86529 Bayern, Schrobenhausen",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,33607,bielefeld,"33607 Bielefeld, Mitte",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [48]:
df['address'].head()

0    {'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...
1    {'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...
2    {'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...
3    {'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...
4    {'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...
Name: address, dtype: object

In [44]:
json_normalize(df['oAddress']).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 49 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   ebk.street                   58 non-null     object 
 1   ebk.postcode                 231 non-null    object 
 2   ebk.location                 172 non-null    object 
 3   ebk.locationQuery            231 non-null    object 
 4   is24.street                  77 non-null     object 
 5   is24.postcode                263 non-null    object 
 6   is24.location                263 non-null    object 
 7   is24.district                143 non-null    object 
 8   ivd24.street                 14 non-null     object 
 9   ivd24.postcode               144 non-null    object 
 10  ivd24.location               144 non-null    object 
 11  ivd24.district               48 non-null     object 
 12  immobilio.street             8 non-null      object 
 13  immobilio.postcode  

In [42]:
json_normalize(df['originalAddress']).head()

Unnamed: 0,street,postcode,location,locationQuery,district
0,,67122,altrip,"67122 Rheinland-Pfalz, Altrip",
1,,33154,,"33154 Nordrhein-Westfalen, Salzkotten",
2,,63739,bayern,"63739 Bayern, Aschaffenburg",
3,,86529,,"86529 Bayern, Schrobenhausen",
4,,33607,bielefeld,"33607 Bielefeld, Mitte",


In [43]:
json_normalize(df['originalAddress']).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   street         303 non-null    object
 1   postcode       600 non-null    object
 2   location       552 non-null    object
 3   locationQuery  208 non-null    object
 4   district       186 non-null    object
dtypes: object(5)
memory usage: 23.6+ KB


In [15]:
df['zip'].unique().shape

(453,)

In [16]:
df[df['comission'].isnull()].sample(3)

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,address,region,foreClosure,locationFactor,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,aggregations,leasehold,priceInMarket,oAddress,originalAddress,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages
387,42ee722ca3d814072fc0fab531a9ccea,"Neuwertige 4-Zimmer-Wohnung mit Balkon, EBK & ...",40235,775000.0,4.0,119.0,,"[{'name': 'is24', 'id': '126688358', 'url': 'h...",14.462185,6512.605042,1721.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Nordrhein-Westfalen,False,"{'population': 621877, 'populationTrend': {'fr...",2.66,,2015.0,APARTMENT,MINT_CONDITION,,True,4.0,5.0,True,True,True,False,2021-02-18T19:51:14.000Z,True,"{'district': {'name': 'Stadtbezirk 2', 'buying...",False,106.456044,"{'is24': {'street': 'Röpkestraße 00', 'postcod...","{'street': 'Röpkestraße 00', 'postcode': '4023...",300.0,"[{'id': '126d34bc6367262f31990e487f3a61a2', 'o...","[{'buyingPrice': 775000, 'platformName': 'is24...",False,False,1,2021-02-19T08:35:18.174Z,0,,-1106.77,-17.137032,-1106.77,True
586,90c0c34576e1cb74e408991bc1d41c96,Schöner Wohnen in Rhöndorf - Ihres neues Domiz...,53604,624000.0,3.0,135.5,,"[{'name': 'ivd24', 'publishDate': '2021-02-18T...",11.601476,4605.166052,1572.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Nordrhein-Westfalen,False,"{'population': 25812, 'populationTrend': {'fro...",3.02,,2020.0,APARTMENT,FIRST_TIME_USE,,False,,,True,False,True,False,2021-02-18T17:04:24.682Z,False,"{'location': {'name': 'Bad Honnef', 'buyingPri...",False,96.89441,"{'ebk': {'street': None, 'postcode': '53604', ...","{'street': '', 'postcode': '53604', 'location'...",,[],"[{'buyingPrice': 624000, 'platformName': 'ivd2...",False,False,0,2021-02-19T07:52:39.612Z,0,,-771.95,-14.845192,-771.95,False
426,4082cfb446dbb9deba1a6b3d4207ecd2,NEUBAU -2ZKB- Barrierefreie Erdgeschosswohnung...,66119,200000.0,2.0,50.0,,"[{'name': 'ebk', 'id': 'ebk9587d3147f3654a8872...",11.02,4000.0,551.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Saarland,False,"{'population': 180374, 'populationTrend': {'fr...",3.31,,2021.0,GROUND_FLOOR,FIRST_TIME_USE,,False,,,False,True,True,False,2021-02-18T19:20:31.023Z,False,"{'district': {'name': 'Güdingen', 'buyingPrice...",False,192.307692,"{'ebk': {'street': 'irgentalweg', 'postcode': ...","{'street': 'irgentalweg', 'postcode': '66119',...",,[],"[{'buyingPrice': 200000, 'platformName': 'ebk'...",False,False,0,2021-02-19T05:13:44.380Z,0,,-216.43,-12.986,-216.43,False
1,a0fa2a2e9a03e1e51183319d389b9b8d,Gemütliches Zuhause zum Verweilen und Entspannen,33154,282425.0,3.0,78.41,,"[{'name': 'ebk', 'id': 'ebk34a7c7315353548e1fa...",7.039918,3601.900268,552.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Nordrhein-Westfalen,False,"{'population': 24956, 'populationTrend': {'fro...",2.35,,,APARTMENT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-19T09:39:39.691Z,False,"{'district': {'name': 'Salzkotten', 'buyingPri...",False,120.180851,"{'ebk': {'street': None, 'postcode': '33154', ...","{'street': '', 'postcode': '33154', 'locationQ...",,[],"[{'buyingPrice': 282425, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:39:40.122Z,0,,-452.03,-19.206515,-452.03,False
526,943d1af9cf5228febae796063d1ba85b,Eine Top-Investition im München muss nicht ans...,81549,345600.0,2.0,32.06,,"[{'name': 'immobilien', 'publishDate': '2021-0...",25.171553,10779.787898,807.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Bayern,False,"{'population': 1484226, 'populationTrend': {'f...",2.8,,2021.0,APARTMENT,FIRST_TIME_USE,,True,,,False,False,True,False,2021-02-18T17:56:54.994Z,False,{'district': {'name': 'Obergiesing-Fasangarten...,False,115.2,"{'ebk': {'street': 'stümpflingstraße 5', 'post...","{'street': 'stümpflingstraße 5', 'postcode': '...",,[],"[{'buyingPrice': 345600, 'platformName': 'ebk'...",False,False,0,2021-02-19T09:31:14.214Z,0,,-437.95,-15.206597,-437.95,False


In [17]:
# Check null values and unique values
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 50 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   id                      600 non-null    object 
 1   title                   600 non-null    object 
 2   zip                     600 non-null    object 
 3   buyingPrice             584 non-null    float64
 4   rooms                   581 non-null    float64
 5   squareMeter             597 non-null    float64
 6   comission               278 non-null    float64
 7   platforms               600 non-null    object 
 8   rentPricePerSqm         599 non-null    float64
 9   pricePerSqm             584 non-null    float64
 10  rentPrice               596 non-null    float64
 11  rentPriceCurrent        51 non-null     float64
 12  rentPriceCurrentPerSqm  51 non-null     float64
 13  address                 599 non-null    object 
 14  region                  599 non-null    ob

In [18]:
# group by city
# group by type
df.groupby("zip")['cashFlow'].max().sort_values()[:10]

zip
22301   -8702.43
81245   -5402.95
80639   -5040.90
22529   -4031.55
10785   -3959.82
50933   -2775.55
82131   -2601.13
60598   -2539.73
14167   -2485.93
10719   -2206.43
Name: cashFlow, dtype: float64

# Database
Here we want to save the CSV in the new creatd datbase schema.  

In [19]:
import psycopg2
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv()

DB_HOST = os.getenv('DB_HOST')
DB_NAME  = os.getenv('DB_NAME')
DB_USER  = os.getenv('DB_USER')
DB_PASSWORD  = os.getenv('DB_PASSWORD')

connection_parameter = {
    "host"      : DB_HOST,
    "database"  : DB_NAME,
    "user"      : DB_USER,
    "password"  : DB_PASSWORD
}

def connect(connection_parameter):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**connection_parameter)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1)
    print("Connection successful")
    return conn

def postgresql_to_dataframe(conn, select_query):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    # cursor = conn.cursor()
    # try:
    #     cursor.execute(select_query)
    # except (Exception, psycopg2.DatabaseError) as error:
    #     print("Error: %s" % error)
    #     cursor.close()
    #     return 1
    
    # # Naturally we get a list of tupples
    # tupples = cursor.fetchall()
    # cursor.close()
    print("Read from table")
    df = pd.read_sql_query(select_query,con=conn)
    
    # We just need to turn it into a pandas dataframe
    # df = pd.DataFrame(tupples, columns=column_names)
    return df

In [20]:
# Connect to the database
conn = connect(connection_parameter)
column_names = []#["id", "source", "datetime", "mean_temp"]
# Execute the "SELECT *" query
df = postgresql_to_dataframe(conn, "SELECT * FROM public.api_liveaiproperty")
df.head()

Connecting to the PostgreSQL database...
Connection successful
Read from table


Unnamed: 0,id,active,property_id,property_url,source_id,source_portal,property_type,image_urls,image_captions,city,title,address,rooms,living_space,floor,no_of_floors,construction_year,condition,latitude,longitude,zipcode,foreclosure,is_rented_out,created_at,updated_at,price_id
0,45857,True,6a867704a532a627f707d778d30d8439,https://www.immobilienscout24.de/expose/116894250,116894250,is24,flat,https://pictures.immobilienscout24.de/listings...,1AA1\n3ABD\n7\nLogopartner\nLogopartner,Berlin,HERRSCHAFTLICHE GARTENRESIDENZ mit Privatgarte...,"Tiergarten, Berlin, Deutschland",2.0,73.0,,4.0,1880.0,FIRST_TIME_USE,52.509778,13.35726,10785,False,False,2020-12-15 15:59:06.680017+00:00,2020-12-15 15:59:06.684562+00:00,46854.0
1,33503,True,f65bb574710bc2bb043bea8069d10131,https://www.immobilienscout24.de/expose/120330418,120330418,is24,flat,https://pictures.immobilienscout24.de/listings...,Gartenperspektive\nInnenraumperspektive\nVogel...,Obersontheim,Exklusives 5-Zimmer-Penthouse mit Dachterrasse...,"Frankenstraße, 74423 Obersontheim, Deutschland",5.0,127.79,,,2021.0,FIRST_TIME_USE,49.058834,9.887907,74423,False,False,2020-12-13 15:34:46.452927+00:00,2020-12-15 15:38:05.368774+00:00,33650.0
2,33504,True,5835cc06d03607cc80f642d982de4eff,https://www.immobilienscout24.de/expose/120331198,120331198,is24,flat,https://pictures.immobilienscout24.de/listings...,Gartenperspektive\nInnenraumperspektive\nVogel...,Obersontheim,Raum & Lebensfreunde! Repräsentatives 5-Zimmer...,"Frankenstraße, 74423 Obersontheim, Deutschland",4.0,138.44,,,2021.0,FIRST_TIME_USE,49.058834,9.887907,74423,False,False,2020-12-13 15:34:46.464072+00:00,2020-12-15 15:38:05.374792+00:00,33651.0
3,34809,True,68e621da2bb7a3b7bb8557f04b74f3bd,https://www.immobilienscout24.de/expose/122422682,122422682,is24,flat,https://pictures.immobilienscout24.de/listings...,Diele\nHausflur\nKüche\nBadezimmer\nGäste-WC\n...,Düsseldorf,Sanierte und barrierefreie 4-Zimmer-Wohnung mi...,"Holthausen, 40589 Düsseldorf, Deutschland",4.0,90.0,,,1967.0,NO_INFORMATION,51.175156,6.83236,40589,False,False,2020-12-13 15:37:15.498003+00:00,2020-12-15 15:40:34.740057+00:00,34956.0
4,35827,True,4813f6cbf82e17c704360dcad5aa5480,https://www.immobilienscout24.de/expose/122184987,122184987,is24,flat,https://pictures.immobilienscout24.de/listings...,Wohnzimmer\nAußenansicht\nBadezimmer Beispiel\...,Nieste,Wohnen im Herzen von Nieste,"34329 Nieste, Deutschland",3.0,93.9,,3.0,2020.0,FIRST_TIME_USE,51.312294,9.670994,34329,False,False,2020-12-13 15:39:42.735693+00:00,2020-12-15 15:43:02.089328+00:00,35974.0


In [21]:
df.shape

(108045, 26)

In [22]:
df.columns[df.dtypes == object]

Index(['property_id', 'property_url', 'source_id', 'source_portal',
       'property_type', 'image_urls', 'image_captions', 'city', 'title',
       'address', 'condition', 'zipcode', 'foreclosure', 'is_rented_out'],
      dtype='object')

In [23]:
#columns_strings = df.columns[df.dtypes == object]
columns_strings = ['property_id', 'property_url', 'source_id', 'source_portal',
       'property_type', 'image_urls', 'image_captions', 'city', 'title',
       'address', 'condition', 'zipcode']
for column in columns_strings:
    print(f"Column: {column}: {df[column].str.len().max()}")

Column: property_id: 32.0
Column: property_url: 288.0
Column: source_id: 50.0
Column: source_portal: 18.0
Column: property_type: 5
Column: image_urls: 6995.0
Column: image_captions: 1727.0
Column: city: 48.0
Column: title: 199.0
Column: address: 176.0
Column: condition: 34.0
Column: zipcode: 5.0


# ROI Calculation
This is part of the transformation. We will add more columns for the ROI/cashflow calculation and later use those methods to updte the ROI/cashflow on the fly for the API endpoints.

In [25]:
from transformer import get_roi_gross, get_cashflow_after_operating_expenses, get_cashflow_after_taxes, get_cashflow_after_reserves, get_tax_year


In [40]:
# FROM EXPOSE
purchase_price = 150000         # required
living_space_sqm = 60           # required
cold_rent_given = 700           # can be given or not; DEFAULT: None
house_allowance_given = 300     # can be given or not; DEFAULT: None


# OUR DEFAULT VALUES
rent_price_index_per_sqm = 11               # varies from city and living space
vacancy_rate = 0.02                         # 2% standard = no rent per year
house_allowance_rate = 0.35                 # 35% of cold rent
interest_rate = 0.02                        # 2% as standard
mortgage_rate = 0.02                        # 2% as standard
equity_rate = 0.2                           # 20% of purchase price 
maintenance_reserve_per_sqm_per_year = 10   # 10€ per sqm and per year as standard
tax_rate = 0.3                              # could be 20-30%
tax_write_off_rate = 0.02                   # 2% (has to be validated by professionals)

# VALUES FOR KEY METRICS CALCULATION
#   COLD RENT
cold_rent_month = cold_rent_given if cold_rent_given else cold_rent_estimated
cold_rent_year = cold_rent_month * 12

#   EQUITY
equity = purchase_price * equity_rate
#   LOAN
loan = purchase_price - equity

# ESTIMATED VALUES
cold_rent_estimated = living_space_sqm * rent_price_index_per_sqm
house_allowance_estimated = cold_rent_month * house_allowance_rate
maintenance_reserve_private_year = living_space_sqm * maintenance_reserve_per_sqm_per_year
maintenance_reserve_private_month = maintenance_reserve_private_year / 12
vacancy_year = cold_rent_year * vacancy_rate
vacancy_month = vacancy_year / 12
tax_write_off_year = tax_write_off_rate * loan
tax_write_off_month = tax_write_off_year / 12

#   HOUSE ALLOWANCE 
house_allowance_month = house_allowance_given if house_allowance_given else house_allowance_estimated
house_allowance_year = house_allowance_month * 12
#   INTEREST
interest_year = loan * interest_rate
interest_month = interest_year / 12
#   MORTGAGE
mortgage_year = loan * mortgage_rate
mortgage_month = mortgage_year / 12
#   HOUSE ALLOWANCE (Hausgeld)
tax = get_tax_year(cold_rent_year, vacancy_year, house_allowance_month, interest_year, tax_write_off_year, maintenance_reserve_private_year)

NameError: name 'get_tax_year' is not defined

In [41]:

roi_gross = get_roi_gross(cold_rent_year,purchase_price)

cashflow_after_operating_expenses = get_cashflow_after_operating_expenses(cold_rent_month, vacancy_month, house_allowance_month, interest_month, mortgage_month)

cashflow_after_taxes = get_cashflow_after_taxes(cold_rent_month, vacancy_month, house_allowance_month, interest_month, mortgage_month, tax)

cashflow_after_reserves = get_cashflow_after_reserves(cold_rent_month, vacancy_month, house_allowance_month, interest_month, mortgage_month, tax, maintenance_reserve_private_month)

print(roi_gross)
print(cashflow_after_operating_expenses)
print(cashflow_after_taxes)
print(cashflow_after_reserves)

NameError: name 'get_roi_gross' is not defined

In [42]:
df.sample(10)

Unnamed: 0,id,title,zip,buyingPrice,rooms,squareMeter,comission,platforms,rentPricePerSqm,pricePerSqm,rentPrice,rentPriceCurrent,rentPriceCurrentPerSqm,address,region,foreClosure,locationFactor,grossReturn,grossReturnCurrent,constructionYear,apartmentType,condition,lastRefurbishment,lift,floor,numberOfFloors,balcony,garden,active,rented,publishDate,privateOffer,aggregations,leasehold,priceInMarket,oAddress,originalAddress,houseMoney,images,buyingPriceHistory,priceReduced,priceIncreased,runningTime,lastUpdatedAt,favorite,favoriteDate,cashFlow,ownCapitalReturn,cashFlowPerLivingUnit,hasImages
478,5c2607457bdb53f2f05fb2eaed988743,Vermietete Einzimmerwohnung in zentraler Lage ...,68159,129000.0,1.0,36.0,1.95,[{'deactivationDate': '2021-02-19T09:01:08.690...,11.722222,3583.333333,422.0,310.0,8.611111,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Baden-Württemberg,False,"{'population': 310658, 'populationTrend': {'fr...",3.93,2.88,1988.0,NO_INFORMATION,WELL_KEPT,,True,7.0,7.0,False,False,True,True,2021-02-18T18:21:23.000Z,False,"{'district': {'name': 'Innenstadt/Jungbusch', ...",False,126.470588,"{'is24': {'street': None, 'postcode': '68159',...","{'postcode': '68159', 'location': 'Mannheim', ...",,"[{'id': '81d34d244d9d038c57140ed89b51e533', 'o...","[{'buyingPrice': 129000, 'platformName': 'is24...",False,False,1,2021-02-18T18:24:19.755Z,0,,-171.83,-15.984496,-171.83,True
107,ba8f834dc6d0c7deec3fd79bfe113bd7,Geräumige 3-Zimmer-Eigentumswohnung in MA-Waldhof,68305,85000.0,3.0,79.26,,"[{'name': 'is24', 'id': '126717302', 'url': 'h...",10.017663,1072.419884,794.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Baden-Württemberg,True,"{'population': 310658, 'populationTrend': {'fr...",11.21,,1900.0,APARTMENT,NEED_OF_RENOVATION,,False,,,False,False,True,False,2021-02-19T08:09:04.000Z,False,"{'district': {'name': 'Waldhof', 'buyingPrice'...",False,34.274194,"{'is24': {'street': None, 'postcode': '68305',...","{'postcode': '68305', 'location': 'Mannheim', ...",,"[{'id': 'd588b00303587dd4319af6fd4b650163', 'o...","[{'buyingPrice': 85000, 'platformName': 'is24'...",False,False,0,2021-02-19T08:12:33.302Z,0,,276.93,39.096471,276.93,True
159,68527ade9cbb5d85e2580bc27eb51388,"🏡🌳GRÜN, RUHIG; ZENTRAL🌳: Neubau-Maisonette mit...",13156,941500.0,4.0,201.0,,"[{'name': 'is24', 'id': '126714433', 'url': 'h...",15.522388,4684.079602,3120.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Berlin,False,"{'population': 3669491, 'populationTrend': {'f...",3.98,,2021.0,NO_INFORMATION,FIRST_TIME_USE,2021.0,True,,,True,True,True,False,2021-02-19T06:52:16.000Z,False,"{'district': {'name': 'Niederschönhausen', 'bu...",False,78.13278,"{'is24': {'street': None, 'postcode': '13156',...","{'postcode': '13156', 'location': 'Berlin - Pa...",,"[{'id': '71b1520678bdfd05aa3bdc74d465eafe', 'o...","[{'buyingPrice': 941500, 'platformName': 'is24...",False,False,0,2021-02-19T08:56:49.252Z,0,,-664.08,-8.464153,-664.08,True
148,5e5e9e7b211959aa794428fd73d7e820,Moderne 2-Zimmer-Wohnung auf ca. 60 m² mit gro...,12529,287800.0,2.0,59.41,,"[{'name': 'is24', 'id': '126668719', 'url': 'h...",12.943949,4844.302306,769.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Brandenburg,False,"{'population': 0, 'populationTrend': {'from': ...",3.21,,2023.0,APARTMENT,FIRST_TIME_USE,,True,6.0,,True,False,True,False,2021-02-19T07:13:48.000Z,False,"{'district': {'name': 'Schönefeld', 'buyingPri...",False,96.254181,"{'is24': {'street': 'Rathausgasse 3', 'postcod...","{'street': 'Rathausgasse 3', 'postcode': '1252...",,"[{'id': '3a28f1df5b796d153714bbf1cc7f04fc', 'o...","[{'buyingPrice': 287800, 'platformName': 'is24...",False,False,0,2021-02-19T09:17:24.653Z,0,,-325.57,-13.574705,-325.57,True
229,8f3b9ec2069a33c6fd363b29a5717d8d,Großzügige Wohnmöglichkeit über zwei Ebenen...,22529,2175000.0,7.0,250.0,6.12,"[{'name': 'ebk', 'id': 'ebkf295daea30d13c350dc...",15.252,8700.0,3813.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Hamburg,False,"{'population': 1847253, 'populationTrend': {'f...",2.1,,2011.0,GROUND_FLOOR,WELL_KEPT,,False,,,True,False,True,False,2021-02-18T22:19:54.454Z,False,"{'district': {'name': 'Eimsbüttel', 'buyingPri...",False,110.969388,"{'ebk': {'street': None, 'postcode': '22529', ...","{'street': '', 'postcode': '22529', 'location'...",,[],"[{'buyingPrice': 2175000, 'platformName': 'ebk...",False,False,0,2021-02-19T05:56:51.715Z,0,,-4031.55,-22.243034,-4031.55,False
411,21618ae01e987ba8a6cede3b60ce7c4a,"Zwangsversteigerung Wohnung, Bürgermeister-Kut...",67059,156000.0,3.0,91.0,,"[{'name': 'immobilien', 'publishDate': '2021-0...",8.703297,1714.285714,792.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Rheinland-Pfalz,True,"{'population': 172253, 'populationTrend': {'fr...",6.09,,,APARTMENT,NO_INFORMATION,,False,,,False,False,True,False,2021-02-18T19:27:59.223Z,False,{'district': {'name': 'Ludwigshafen-Südliche-I...,False,68.122271,"{'ivd24': {'street': None, 'postcode': '67059'...","{'postcode': '67059', 'location': 'Ludwigshafen'}",,"[{'id': 'c16955ccf813ddf86dace4ecb7f9c9cb', 'o...","[{'buyingPrice': 156000, 'platformName': 'ivd2...",False,False,0,2021-02-19T08:37:25.164Z,0,,74.38,5.721795,74.38,True
82,0ec32c15b880eda6a7e19b9be990369e,Hochwertige Neubauwohnung - inkl. Erstvermietung,48167,291887.0,2.0,61.45,,"[{'name': 'is24', 'id': '126718144', 'url': 'h...",11.863303,4749.991863,729.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Nordrhein-Westfalen,False,"{'population': 315293, 'populationTrend': {'fr...",3.0,,2021.0,NO_INFORMATION,FIRST_TIME_USE,,True,2.0,4.0,True,False,True,True,2021-02-19T08:35:33.000Z,False,"{'district': {'name': 'Münster-Südost', 'buyin...",False,113.134496,"{'is24': {'street': 'Schlesienstraße 21', 'pos...","{'street': 'Schlesienstraße 21', 'postcode': '...",,"[{'id': 'bd79628d93da2c974b0e5b4a3307f34f', 'o...","[{'buyingPrice': 291887, 'platformName': 'is24...",False,False,0,2021-02-19T08:38:32.190Z,0,,-366.15,-15.053086,-366.15,True
193,c91b3f34f9a6fcf14892b77b38b50c6b,Kapitalanleger aufgepasst: DG-Whg. mit TOP-Re...,10551,399000.0,3.0,93.0,6.99,"[{'name': 'immobilienmarkt1a', 'id': 'immobili...",11.55914,4290.322581,1075.0,,,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Berlin,False,"{'population': 3669491, 'populationTrend': {'f...",3.23,,1918.0,ROOF_STOREY,NO_INFORMATION,,False,5.0,,False,False,True,False,2021-02-19T02:16:48.519Z,False,"{'location': {'name': 'Berlin', 'buyingPrice':...",False,101.269036,"{'immobilienmarkt1a': {'street': None, 'postco...","{'postcode': '10551', 'location': 'Berlin'}",,"[{'id': '9779fc1c2a9940a83ef2885980127866', 'o...","[{'buyingPrice': 399000, 'platformName': 'immo...",False,False,0,2021-02-19T02:16:50.189Z,0,,-523.33,-15.739348,-523.33,True
558,3f5a72754cb254fb98e9d79d110cac28,"Wiesbaden – Solide vermietete, pfiffige Wohnun...",65189,145000.0,1.0,51.0,3.57,"[{'name': 'ivd24', 'publishDate': '2021-02-18T...",11.45098,2843.137255,584.0,166.916667,3.272876,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Hessen,False,"{'population': 278474, 'populationTrend': {'fr...",4.83,1.38,1983.0,GROUND_FLOOR,NO_INFORMATION,,False,,,False,False,True,True,2021-02-18T17:24:44.502Z,False,"{'district': {'name': 'Südost', 'buyingPrice':...",False,81.460674,"{'immobilio': {'street': 'Friedenstraße 24b', ...","{'street': 'Friedenstraße 24b', 'postcode': '6...",182.0,"[{'id': '8d65156e36ea47a80ac8a12c67f2dd01', 'o...","[{'buyingPrice': 145000, 'platformName': 'ivd2...",False,False,0,2021-02-19T08:14:29.114Z,0,,-320.25,-26.503793,-320.25,True
456,4e7d3a5cedba1ac58b5a8690ccfd6303,Voll ausgestattete Zwei-Zimmer-Wohnung mit Bal...,85598,389000.0,2.0,57.0,1.95,"[{'name': 'is24', 'publishDate': '2021-02-18T1...",13.982456,6824.561404,797.0,764.0,13.403509,"{'ISO_3166-1_alpha-2': 'DE', 'ISO_3166-1_alpha...",Bayern,False,"{'population': 0, 'populationTrend': {'from': ...",2.46,2.36,1972.0,NO_INFORMATION,WELL_KEPT,,True,2.0,5.0,True,False,True,True,2021-02-18T18:51:15.000Z,False,"{'location': {'name': 'Vaterstetten', 'buyingP...",False,123.492063,"{'is24': {'street': None, 'postcode': '85598',...","{'postcode': '85598', 'location': 'Vaterstette...",220.0,"[{'id': 'f93bc1ef2776a0e92b30fa38e1d46159', 'o...","[{'buyingPrice': 389000, 'platformName': 'is24...",False,False,1,2021-02-19T08:51:23.374Z,0,,-608.82,-18.780977,-608.82,True


In [43]:
df_cashflow_comparisson = df[["id","title","zip","buyingPrice","cashFlow"]] 
df_cashflow_comparisson["liveAiCashflow"] = get_cashflow_after_operating_expenses(df_cashflow_comparisson["buyingPrice"],)
df_cashflow_comparisson.head()

NameError: name 'get_cashflow_after_operating_expenses' is not defined

# Next steps

- Clean null values. 
- Transform into clean schema.
- Load clean csv into S3 bucket.
