# Getting data

Includes scraping, renaming of variables and data cleaning from:
- https://www.boligsiden.dk/
- http://hvorlangterder.dk
- http://dingeo.dk/

### Import packages

In [1]:
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import urllib.request
import re
import pandas as pd
import math
import requests 
import json
import numpy as np
from numpy import NaN
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn import datasets
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

from bs4 import BeautifulSoup
from requests import get
from pandas import DataFrame
from tabulate import tabulate

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium import webdriver

import itertools
from itertools import chain
from IPython.core.display import HTML
import xlrd

import time
import sys
from tqdm import tqdm
from functools import wraps

from decimal import getcontext, Decimal
getcontext().prec = 3

### Scraping boligsiden.dk

#### Helper Functions

In [2]:
def get_url_boligsiden(kommune, startdate, enddate, p):
    url = 'http://www.boligsiden.dk/salgspris/solgt/alle/{p}?periode.from={startdate}&periode.to={enddate}&displaytab=mergedtab&sort=salgsdato&salgstype=%5Bobject%20Object%5D&kommune={kommune}'
    return url.format(kommune = kommune, startdate = startdate, enddate = enddate, p = p)

#print(get_url_boligsiden('københavn', '2017-01-01', '2017-01-10', 1))

In [3]:
def get_max_pages(url):

    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-gpu')
    
    driver = webdriver.Chrome('/Applications/chromedriver', options=options) 
    driver.get(url)
    tekst = driver.find_element_by_class_name("salesprice-result").text

    boliger_fundet = tekst.split("\n")[5]
    sidsteside = (tekst.split("af ")[1]).split("\n")[0]
    return sidsteside

#get_max_pages('https://www.boligsiden.dk/salgspris/solgt/villa/3?periode.from=2011-01-01&periode.to=2017-01-10&displaytab=mergedtab&sort=salgsdato&salgstype=%5Bobject%20Object%5D&kommune=k%C3%B8benhavn')


In [4]:
def get_all_urls_on_page(url):
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-gpu')
    
    driver = webdriver.Chrome('/Applications/chromedriver', options=options) 
    driver.get(url)
    
    all_https = [elem.get_attribute("href") for elem in driver.find_elements_by_tag_name('a')]
    dobbelte_https =[all_https[i] for i in range(len(all_https)) if all_https[i] in all_https[:i]][1:]
    
    forkortet = list(dict.fromkeys(dobbelte_https[:]))
    
    krav = 'https://www.boligsiden.dk/boligen'
    boliger_https = [i for i in forkortet if krav in i] 
    
    return boliger_https


In [5]:
def get_all_links(kommune, startdate, enddate): 
        
    # Returns first https-page with given variables
    first_page = get_url_boligsiden(kommune, startdate, enddate, 1)

    # Getting number of total pages
    total_pages = get_max_pages(first_page)

    # Empty lists 
    link_to_all_pages=[]
    list_of_all_pages=[]

    # Collects a list with all the pages that we want to collect
    for x in tqdm(range(0, int(total_pages))):  
        all_pages = get_url_boligsiden(kommune, startdate, enddate, x+1)
        link_to_all_pages.append(all_pages)

        page_list = get_all_urls_on_page(link_to_all_pages[x])
        list_of_all_pages.extend(page_list)
        
    # Returns list with all the wanted url's
    return (list_of_all_pages)

In [6]:
def get_simple_single_page(url):
    
    url = url
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html5lib')
    
    # Dictionary 
    data = {}
    
    # Find Type and Adresse
    try:
        data['Boligtype'] = [soup.find("div", {"class": "pre"}).get_text(strip=True)]
        data['Adresse'] = [soup.find('h1', {'class': 'title'}).contents[0].strip()]
        data['Post_nr'] = [(soup.find("h1", {"class": "title"})).find("span").text]

        # Table about "Prisudvikling og historik"
        udvikling_table = soup.find_all('table')[0] 
        table = pd.read_html(str(udvikling_table))[0]
        table.set_axis(['Tekst', 'Værdi'], axis=1, inplace=True)

        salg_dato = table.iloc[7, 0]
        salg_pris = table.iloc[7, 1]
        reg_udbud = table.iloc[11, 0]

        table = pd.concat([table[0:7], table[8:]])
        table.dropna(inplace = True) 


        new_data = {'Tekst': [ 'Seneste_salgspris', 'Seneste_salgsdato'], 
                    'Værdi': [salg_pris, salg_dato]}

        new_table = pd.DataFrame(new_data, columns = ['Tekst','Værdi'])

        table = pd.concat([table, new_table])

        mydict = dict(zip(table.Tekst, list(table.Værdi)))

        data.update(mydict)

        if 'Ingen registrerede udbud på Boligsiden' in reg_udbud:
            data['Udbud_Start'] = 'Ingen registrerede udbud'
            data['Udbud_Slut'] = 'Ingen registrerede udbud'
        else:
            try:
                data['Udbud_Start'] = data['Periode'][0:10]
            except:
                data['Udbud_Start'] = np.nan

            try:
                data['Udbud_Slut'] = data['Periode'][12:]
            except:
                data['Udbud_Slut'] = np.nan

        # Table about "Boligfakta"
        fakta_table = soup.find_all('table')[1] 
        table = pd.read_html(str(fakta_table))[0]
        table.dropna(inplace = True) 
        table = table.set_axis(['Tekst', 'Værdi'], axis=1, inplace=False)

        mydict = dict(zip(table.Tekst, list(table.Værdi)))
        data.update(mydict)

        data['Link'] = str(url[:])

        # Geography
        try:
            oururl= urllib.request.urlopen(url).read()
            oursoup = BeautifulSoup(oururl, "lxml")
            info = oursoup.find_all('script', type='application/ld+json')[1]
            geo = json.loads(info.text)['geo']
            data['Breddegrad'] = geo['latitude']
            data['Længdegrad'] = geo['longitude']
        except:
            data['Breddegrad'] = [np.nan]
            data['Længdegrad'] = [np.nan]
        
        
    except Exception: 
        pass
    
    # Make dataframe
    df_page = pd.DataFrame(data)

    return df_page

#### Main function

In [7]:
def get_data(links):
    
    df = pd.DataFrame()

    for x in tqdm(range(0, len(links)), mininterval=30.0):  
        df_pages = get_simple_single_page(links[x])
        df = pd.concat([df, df_pages])

    return df

#### Collect data

In [3]:
##############################################
############## Frederiksberg #################
##############################################

################# Get links ##################
#print(get_url_boligsiden('Frederiksberg', '2014-01-01', '2019-12-31', 1))
#links_frb_14_19 = get_all_links('Frederiksberg', '2014-01-01', '2019-12-31')       #6866 links takes 25 min

################ Save links ##################
#with open('links_frb_14_19.txt', 'w') as file:
#        file.write(str(links_frb_14_19))

################ Load links ##################
with open("Endeligt data/links_frb_14_19.txt", "r") as file:
    links_frb_14_19 = eval(file.readline())
#print(len(links_frb_14_19), (228*30 + 26))         # Number of houses = 6866

################## Get data ##################
#df_frb_14_19 = get_data(links_frb_14_19) # takes 2 hours

################# Save data ##################
#df_frb_14_19.to_csv('Frederiksberg_14_19.csv', index=False)

################# Load data ##################
df_frb_14_19 = pd.read_csv("Endeligt data/Frederiksberg_14_19.csv") 

############### Display data #################
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_frb_14_19.head(10).to_html()))

print(df_frb_14_19.shape, len(links_frb_14_19))

(6866, 25) 6866


In [4]:
##############################################
################ København ###################
##############################################

################# Get links ##################
#print(get_url_boligsiden('København', '2014-01-01', '2019-12-31', 1))
#links_kbh_14_19 = get_all_links('København', '2014-01-01', '2019-12-31')

################ Save links ##################
#with open('links_kbh_14_19.txt', 'w') as file:
#        file.write(str(links_kbh_14_19))

################ Load links ##################
with open("Endeligt data/links_kbh_14_19.txt", "r") as file:
    links_kbh_14_19 = eval(file.readline())
#print(len(links_kbh_14_19), (1305*30+1))            # Number of houses = 39151

############## Get and save data #############
#df_kbh_14_19_11 = get_data(links_kbh_14_19[0:2500])
#df_kbh_14_19_11.to_csv('Endeligt data/København_14_19_11.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_12 = get_data(links_kbh_14_19[2500:5000])
#df_kbh_14_19_12.to_csv('Endeligt data/København_14_19_12.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_2 = get_data(links_kbh_14_19[5000:10000])
#df_kbh_14_19_2.to_csv('Endeligt data/København_14_19_2.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_3 frg= get_data(links_kbh_14_19[10000:15000])
#df_kbh_14_19_3.to_csv('Endeligt data/København_14_19_3.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_4 = get_data(links_kbh_14_19[15000:20000])
#df_kbh_14_19_4.to_csv('Endeligt data/København_14_19_4.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_5 = get_data(links_kbh_14_19[20000:25000])
#df_kbh_14_19_5.to_csv('Endeligt data/København_14_19_5.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_6 = get_data(links_kbh_14_19[25000:30000])
#df_kbh_14_19_6.to_csv('Endeligt data/København_14_19_6.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_7 = get_data(links_kbh_14_19[30000:35000])
#df_kbh_14_19_7.to_csv('Endeligt data/København_14_19_7.csv', index=False)
#time.sleep(60)
#df_kbh_14_19_8 = get_data(links_kbh_14_19[35000:])
#df_kbh_14_19_8.to_csv('Endeligt data/København_14_19_8.csv', index=False)

############# Load and merge data ############
df_kbh_14_19_11 = pd.read_csv("Endeligt data/København_14_19_11.csv") 
df_kbh_14_19_12 = pd.read_csv("Endeligt data/København_14_19_12.csv") 
df_kbh_14_19_2 = pd.read_csv("Endeligt data/København_14_19_2.csv") 
df_kbh_14_19_3 = pd.read_csv("Endeligt data/København_14_19_3.csv") 
df_kbh_14_19_4 = pd.read_csv("Endeligt data/København_14_19_4.csv") 
df_kbh_14_19_5 = pd.read_csv("Endeligt data/København_14_19_5.csv") 
df_kbh_14_19_6 = pd.read_csv("Endeligt data/København_14_19_6.csv") 
df_kbh_14_19_7 = pd.read_csv("Endeligt data/København_14_19_7.csv") 
df_kbh_14_19_8 = pd.read_csv("Endeligt data/København_14_19_8.csv") 

df_kbh_14_19 = pd.concat([df_kbh_14_19_11, df_kbh_14_19_12, df_kbh_14_19_2, 
                          df_kbh_14_19_3, df_kbh_14_19_4, df_kbh_14_19_5,
                          df_kbh_14_19_6, df_kbh_14_19_7, df_kbh_14_19_8], sort=False)

############## Save merged data ############## 
#df_kbh_14_19.to_csv('Endeligt data/København_14_19.csv', index=False)

################# Load data ##################
#df_kbh_14_19 = pd.read_csv("Endeligt data/København_14_19.csv") 

############### Display data #################
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_kbh_14_19.head(10).to_html()))

print(df_kbh_14_19.shape, len(links_kbh_14_19))

(39151, 25) 39151


In [15]:
##############################################
################# Together ###################
##############################################

############ Merge Kbh + Frb data ############
#df_14_19 = pd.concat([df_frb_14_19, df_kbh_14_19], sort=False)

################# Save data ##################
#df_14_19.to_csv('Endeligt data/Boligsiden_Kbh+Frb_14_19_raw.csv', index=False)

################# Load data ##################
df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19_raw.csv") 

print(df_14_19.isnull().sum(axis=0))

len(df_14_19)

Boligtype             1996
Adresse                  0
Post_nr               1996
Vurderingsdato        1996
Ejendomsværdi         1996
Grundværdi            1996
Periode               8252
Udbudspris            8252
Seneste_salgspris     1996
Seneste_salgsdato     1996
Udbud_Start           8252
Udbud_Slut            8252
Vægtet areal          1996
Samlet Boligareal     1996
Kælderareal           1996
Grund                 1996
Antal værelser        1996
Antal plan            1996
Byggeår               1996
Om-/tilbygningsår     1996
Varmekilde            1996
Link                  1996
Breddegrad            1998
Længdegrad            1998
Ydervægsmateriale    40633
dtype: int64


46017

In [3]:
df_14_19[df_14_19.Boligtype.isnull()]


Unnamed: 0,Boligtype,Adresse,Post_nr,Vurderingsdato,Ejendomsværdi,Grundværdi,Periode,Udbudspris,Seneste_salgspris,Seneste_salgsdato,...,Grund,Antal værelser,Antal plan,Byggeår,Om-/tilbygningsår,Varmekilde,Link,Breddegrad,Længdegrad,Ydervægsmateriale
210,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
318,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
410,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
411,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
855,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
1087,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
1088,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
1089,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
1372,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,
1407,,Adressen er ikke tilgængelig,,,,,,,,,...,,,,,,,,,,


### Renaming variables from boligsiden.dk

In [3]:
df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19_raw.csv") 

df_14_19.rename(columns={'Boligtype':'Type',
                         'Adresse':'Address',
                         'Post_nr': 'ZipCity',
                         'Vurderingsdato':'ValDate_x',
                         'Ejendomsværdi':'TaxableValue',
                         'Grundværdi':'LandValue',
                         'Periode':'Time',
                         'Udbudspris':'AskingPrice',
                         'Seneste_salgspris':'SalePrice',
                         'Seneste_salgsdato':'SaleDate',
                         'Udbud_Start':'OfferingStart',
                         'Udbud_Slut':'OfferingEnd',
                         'Vægtet areal':'WeightedFloorArea_x',
                         'Samlet Boligareal': 'TotalFloorArea_x',
                         'Kælderareal':'BasementArea_x',
                         'Grund':'Land',
                         'Antal værelser':'Rooms_x',
                         'Antal plan':'Levels',
                         'Byggeår':'YearBuilt_x',
                         'Om-/tilbygningsår':'YearRemodAdd_x',
                         'Varmekilde':'HeatSource_x',
                         'Link':'LinkBoligsiden',
                         'Breddegrad':'Latitude',
                         'Længdegrad': 'Longitude',
                         'Ydervægsmateriale':'OuterWallMat_x'}, inplace=True)
    

### Cleaning boligsiden.dk

In [5]:
##############################################
############### Data cleaning ################
##############################################

########## Check for empty values  ###########
print(df_14_19.isnull().sum(axis=0))
# The number 1996 recurs i.e. for "Boligtype"

###### Look where Boligtype is empty  ########
val_columns = (df_14_19.columns.values)
df_udvalgt = df_14_19[df_14_19['Type'].isnull()][val_columns]
print(df_udvalgt['Address'].value_counts())
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_udvalgt.head(10).to_html()))

# We have 1996 cases where the address is named "Adressen er ikke tilgængelig"
# -> We remove these lines
df_14_19_ny = (df_14_19[df_14_19['Type'].notnull()][val_columns])
print(df_14_19_ny.isnull().sum(axis=0))

################# Save data ##################
#df_14_19_ny.to_csv('Endeligt data/Boligsiden_Kbh+Frb_14_19.csv', index=False)

# LOAD DATA
df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19.csv") 
#print(df_14_19.shape)

#display(HTML(df_14_19.head(100).to_html()))  
len(df_14_19)

Type                    1996
Address                    0
ZipCity                 1996
ValDate_x               1996
TaxableValue            1996
LandValue               1996
Time                    8252
AskingPrice             8252
SalePrice               1996
SaleDate                1996
OfferingStart           8252
OfferingEnd             8252
WeightedFloorArea_x     1996
TotalFloorArea_x        1996
BasementArea_x          1996
Land                    1996
Rooms_x                 1996
Levels                  1996
YearBuilt_x             1996
YearRemodAdd_x          1996
HeatSource_x            1996
LinkBoligsiden          1996
Latitude                1998
Longitude               1998
OuterWallMat_x         40633
dtype: int64
Adressen er ikke tilgængelig    1996
Name: Address, dtype: int64
Type                       0
Address                    0
ZipCity                    0
ValDate_x                  0
TaxableValue               0
LandValue                  0
Time                   

44021

In [14]:
#df_14_19[df_14_19['Address'].duplicated()]
#df_14_19[df_14_19.Address.duplicated()]

df_14_19[df_14_19.duplicated(['Address'])]
df_14_19[df_14_19.Address == 'Randersgade 19, st.']

Unnamed: 0,Type,Address,ZipCity,ValDate_x,TaxableValue,LandValue,Time,AskingPrice,SalePrice,SaleDate,...,Land,Rooms_x,Levels,YearBuilt_x,YearRemodAdd_x,HeatSource_x,LinkBoligsiden,Latitude,Longitude,OuterWallMat_x
22682,Ejerlejlighed,"Randersgade 19, st.",2100 København Ø,01-10-2018,5.250.000 kr.,334.600 kr.,09-06-2015 - 30-11-2015,4.248.000 kr.,3.998.000 kr.,13-11-2015,...,-,4,-,-,-,-,https://www.boligsiden.dk/boligen/101-139930,55.703304,12.581133,
37573,Ejerlejlighed,"Randersgade 19, st.",2100 København Ø,01-10-2018,48.000 kr.,14.300 kr.,,,3.325.000 kr.,14-05-2019,...,-,-,-,-,-,-,https://www.boligsiden.dk/boligen/101-844234,55.703304,12.581133,


In [8]:
##############################################
######## Add offering dates cleaning #########
##############################################

df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19.csv") 

val_columns = (df_14_19.columns.values)
df_opdater = (df_14_19[df_14_19["OfferingEnd"].isnull()][val_columns])
df_uden = (df_14_19[df_14_19["OfferingEnd"].notnull()][val_columns])

print('We want to update ' + str(len(df_opdater) ) + ' houses out of ' + str(len(df_14_19)) )

# List with links we want to update 
#opdater_link = df_opdater.Link.tolist() 

#with open('opdater_link.txt', 'w') as file:
#    file.write(str(opdater_link))

# Run function
#df_opdateret = get_data(opdater_link)

#df_samlet = pd.concat([df_uden, df_opdateret])

#print('The size of the updated dataset is ' + str(len(df_samlet) ) + ' (det gamle: ' + str(len(df_14_19)) + ')')

# GEM DATA
#df_samlet.to_csv('Endeligt data/Boligsiden_Kbh+Frb_14_19_opdateret.csv', index=False)

# LOAD DATA
df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19_opdateret.csv") 

print(df_14_19.shape)


We want to update 6256 houses out of 44021
(44021, 25)


In [11]:
##############################################
######### Remove houses for sale  ############
##############################################

df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19_opdateret.csv") 
#print(df_14_19_ny.isnull().sum(axis=0))
print(df_14_19.shape)

val_columns = (df_14_19.columns.values)
df_udvalgt = df_14_19[df_14_19['Time'].isnull()][val_columns]
len(df_udvalgt)
print(df_udvalgt['OfferingEnd'].value_counts())

#df_14_19 = df_14_19[df_14_19.Udbud_Slut != ' Nu']
#df_14_19[df_14_19.Udbud_Slut == ' Nu']

# Save data
#df_14_19.to_csv('Endeligt data/Boligsiden_Kbh+Frb_14_19_klar.csv', index=False)

# LOAD DATA
df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19_klar.csv") 

print(df_14_19.isnull().sum(axis=0))
print(df_14_19.shape)

(44021, 25)
Ingen registrerede udbud    6199
Name: OfferingEnd, dtype: int64
Address                    0
Levels                     0
Rooms_x                    0
Type                       0
Latitude                   3
YearBuilt_x                0
TaxableValue               0
Land                       0
LandValue                  0
BasementArea_x             0
LinkBoligsiden             0
Longitude                  3
YearRemodAdd_x             0
Time                    6199
ZipCity                    0
TotalFloorArea_x           0
SaleDate                   0
SalePrice                  2
OfferingEnd                0
OfferingStart              0
AskingPrice             6199
HeatSource_x               0
ValDate_x                  0
WeightedFloorArea_x        0
OuterWallMat_x         37405
dtype: int64
(42708, 25)


### Formatting

In [13]:

df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19_klar.csv") 

df_14_19 = df_14_19.replace('-', np.nan)

print(df_14_19.shape)
table = (df_14_19.isnull().sum(axis=0))
print(table.to_latex(index=True))


(42708, 25)
\begin{tabular}{lr}
\toprule
{} &      0 \\
\midrule
Address             &      0 \\
Levels              &  37406 \\
Rooms\_x             &   5392 \\
Type                &      0 \\
Latitude            &      3 \\
YearBuilt\_x         &  37405 \\
TaxableValue        &    140 \\
Land                &  42708 \\
LandValue           &    140 \\
BasementArea\_x      &  42708 \\
LinkBoligsiden      &      0 \\
Longitude           &      3 \\
YearRemodAdd\_x      &  41436 \\
Time                &   6199 \\
ZipCity             &      0 \\
TotalFloorArea\_x    &     93 \\
SaleDate            &      0 \\
SalePrice           &      2 \\
OfferingEnd         &      0 \\
OfferingStart       &      0 \\
AskingPrice         &   6199 \\
HeatSource\_x        &  37406 \\
ValDate\_x           &    140 \\
WeightedFloorArea\_x &  42708 \\
OuterWallMat\_x      &  37405 \\
\bottomrule
\end{tabular}



In [15]:
df_14_19 = pd.read_csv("Endeligt data/Boligsiden_Kbh+Frb_14_19_klar.csv") 

df_14_19 = df_14_19.replace('-', np.nan)

df_14_19['Levels'] = df_14_19['Levels'].astype(float)
df_14_19['Rooms_x'] = df_14_19['Rooms_x'].astype(float)
df_14_19['Type'] = df_14_19['Type'].astype(str)
df_14_19['Latitude'] = df_14_19['Latitude'].astype(float)
df_14_19['YearBuilt_x'] = df_14_19['YearBuilt_x'].astype(float)
df_14_19['TaxableValue'] = df_14_19['TaxableValue'].str.replace('.','').str.replace(' kr','').astype(float)
df_14_19['Land'] = df_14_19['Land'].astype(float)
df_14_19['LandValue'] = df_14_19['LandValue'].str.replace('.','').str.replace(' kr','').astype(float)
df_14_19['BasementArea_x'] = df_14_19['BasementArea_x'].astype(float)
df_14_19['Longitude'] = df_14_19['Longitude'].astype(float)
df_14_19['YearRemodAdd_x'] = df_14_19['YearRemodAdd_x'].astype(float)
df_14_19['TotalFloorArea_x'] = df_14_19['TotalFloorArea_x'].str.extract('(\d+)', expand=True).astype(float)
df_14_19['SaleDate'] = pd.to_datetime(df_14_19['SaleDate'], format='%d-%m-%Y', errors='coerce')
df_14_19['SalePrice'] = df_14_19['SalePrice'].str.replace('.','').str.replace(' kr','').astype(float)
df_14_19['OfferingStart'] = pd.to_datetime(df_14_19['OfferingStart'], format='%d-%m-%Y', errors='coerce')
df_14_19['OfferingEnd'] = pd.to_datetime(df_14_19['OfferingEnd'].str.strip(), format='%d-%m-%Y', errors='coerce')
df_14_19['AskingPrice'] = df_14_19['AskingPrice'].str.replace('.','').str.replace(' kr','').astype(float)
df_14_19['HeatSource_x'] = df_14_19['HeatSource_x']
df_14_19['ValDate_x'] = pd.to_datetime(df_14_19['ValDate_x'], errors='coerce')
df_14_19['WeightedFloorArea_x'] = df_14_19['WeightedFloorArea_x'].astype(float)
df_14_19['OuterWallMat_x'] = df_14_19['OuterWallMat_x']


table = (df_14_19.isnull().sum(axis=0))
table
# Save data
#df_14_19.to_csv('Endeligt data/Boligsiden_data.csv', index=False)

# Load data
#df_14_19 = pd.read_csv("Endeligt data/Boligsiden_data.csv") 

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_14_19.head(10).to_html()))    

Address                    0
Levels                 37406
Rooms_x                 5392
Type                       0
Latitude                   3
YearBuilt_x            37405
TaxableValue             140
Land                   42708
LandValue                140
BasementArea_x         42708
LinkBoligsiden             0
Longitude                  3
YearRemodAdd_x         41436
Time                    6199
ZipCity                    0
TotalFloorArea_x          93
SaleDate                   2
SalePrice                  2
OfferingEnd             6199
OfferingStart           6199
AskingPrice             6199
HeatSource_x           37406
ValDate_x                140
WeightedFloorArea_x    42708
OuterWallMat_x         37405
dtype: int64

In [17]:
df_14_19_ny = (df_14_19[df_14_19['Type'].notnull()][val_columns])
print(df_14_19_ny.isnull().sum(axis=0))

df_14_19_ny.shape

Address                    0
Levels                 37406
Rooms_x                 5392
Type                       0
Latitude                   3
YearBuilt_x            37405
TaxableValue             140
Land                   42708
LandValue                140
BasementArea_x         42708
LinkBoligsiden             0
Longitude                  3
YearRemodAdd_x         41436
Time                    6199
ZipCity                    0
TotalFloorArea_x          93
SaleDate                   2
SalePrice                  2
OfferingEnd             6199
OfferingStart           6199
AskingPrice             6199
HeatSource_x           37406
ValDate_x                140
WeightedFloorArea_x    42708
OuterWallMat_x         37405
dtype: int64


(42708, 25)

### Scraping dingeo.dk

#### Helper functions

In [17]:
def get_geolink(x):
    if ',' in (df['Address'][x]):
        ad_part = df['ZipCity'][x].replace(" ", "-") + '/' + df['Address'].str.split(',').str[0][x].replace(" ", "-") + '/' + df['Address'].str.split(', ').str[1][x].replace(".", "").replace(" ", "-")
        url = 'https://www.dingeo.dk/adresse/' + ad_part
    elif 'Adressen er ikke tilgængelig' in (df['Address'][x]):
        url = 'Utilgængelig'
    else:
        ad_part = df['ZipCity'][x].replace(" ", "-") + '/' + df['Address'].str.split(',').str[0][x].replace(" ", "-")
        url = 'https://www.dingeo.dk/adresse/' + ad_part
    return url


In [18]:
def dingeo_page(url):
    url = url
    
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html5lib')

    # Dictionary 
    data = {}
    data['geolink'] = url
    try: 
        data['Radonrisiko'] = [soup.find_all("div", {"id": 'radon'})[0].find_all("strong")[0].get_text()]
    except:
        pass

    if 'ikke registreret trafikstøj' in soup.find_all("div", {"id": 'trafikstoej'})[0].get_text():
        data['Støjmåling'] = ['Ingen trafikstøj']
    elif 'mangler desværre at indsamle trafikstøj' in soup.find_all("div", {"id": 'trafikstoej'})[0].get_text():
        data['Støjmåling'] = ['Mangler']
    else:
        data['Støjmåling'] = [soup.find_all("div", {"id": 'trafikstoej'})[0].find_all("b")[1].get_text()]

    data['Oversvømmelsesrisiko_skybrud'] = [soup.find_all("div", {"id": 'skybrud'})[0].find_all("b")[0].get_text()]
    data['Meter_over_havet'] = [soup.find_all("div", {"id": 'stormflod'})[0].find_all("b")[0].get_text()]

    table_0 = pd.read_html(str(soup.find_all('table')))[0].iloc[:,0:2]
    table_0 = table_0.set_axis(['Tekst', 'Værdi'], axis=1, inplace=False)

    table_1 = pd.read_html(str(soup.find_all('table')))[1].iloc[:,0:2]
    table_1 = table_1.set_axis(['Tekst', 'Værdi'], axis=1, inplace=False)

    table_2 = pd.read_html(str(soup.find_all('table')))[2].iloc[:,0:2]
    table_2 = table_2.set_axis(['Tekst', 'Værdi'], axis=1, inplace=False)

    table_3 = pd.read_html(str(soup.find_all('table')))[3:-2]
    table_3 = pd.concat(table_3).iloc[:,0:2]
    table_3 = table_3.set_axis(['Tekst', 'Værdi'], axis=1, inplace=False)
    
    table = pd.concat([table_0, table_1, table_2, table_3])

    table = table.loc[table['Tekst'].isin(['Anvendelse','Opførselsesår', 'Ombygningsår','Fredning', 
                                   'Køkkenforhold', 'Antal Etager', 'Antal toiletter', 'Antal badeværelser', 'Antal værelser',
                                   'Ydervægsmateriale', 'Tagmateriale', 'Varmeinstallation', 
                                   'Bygning, Samlet areal', 'Boligstørrelse', 'Kælder', 'Vægtet Areal'])]
    mydict = dict(zip(table.Tekst, list(table.Værdi)))
    data.update(mydict)

    if 'ikke finde energimærke' in soup.find_all("div", {"id": 'energimaerke'})[0].get_text():
        data['Energimærke'] = ['Mangler']
    else:
        data['Energimærke'] = [soup.find_all("div", {"id": 'energimaerke'})[0].find_all("p")[0].get_text()[-3:-2]]
    data['Indbrudsrisiko'] = [soup.find_all("div", {"id": 'indbrud'})[0].find_all("u")[0].get_text()]

    if 'ikke fredet eller bevaringsværdig' in str(soup.find_all("div", {"id": 'fbb'})[0].find_all("h2")[0]):
        data['Bevaringsværdig'] = [0]
    elif 'Bygningen er Bevaringsværdig' in str(soup.find_all("div", {"id": 'fbb'})[0].find_all("h2")[0]):
        data['Bevaringsværdig'] = re.findall(r'\d+', str(soup.find_all("div", {"id": 'fbb'})[0].find_all("p")[4]))
    elif 'Fejl ved opslag af Frededede' in str(soup.find_all("div", {"id": 'fbb'})[0].find_all("h2")[0]):
        data['Bevaringsværdig'] = 'Mangler'
    else:
        data['Bevaringsværdig'] = 'Ukendt'

    try:
        data['Største_parti'] = re.findall(r'valg/(.*?)(?<!\\).png', str(soup.find_all("div", {"id": 'valgdata'})[0].find_all('h2')[0]))
        data['Valgdeltagelse'] = re.findall("\d+.\d+", str(soup.find_all("div", {"id": 'valgdata'})[0].find_all('p')[1]))[1]
        data['Afstemningsområde'] = [soup.find_all("div", {"id": 'valgdata'})[0].find_all("strong")[0].get_text()]
    except:
        pass

    try:
        url_vurdering = url + '/vurdering'
        resp_vurdering = requests.get(url_vurdering)
        soup_vurdering = BeautifulSoup(resp_vurdering.text, 'html5lib')
        data['AVM_pris'] = soup_vurdering.find_all("div", {"class": 'col-md-10 col-md-offset-1 bg-info'})[0].find_all('h1')[0].get_text()
    except:
        pass

        # Make dataframe
    df_page = pd.DataFrame(data)

    return df_page


#### Collect data

In [18]:
################# Load data ##################
df_Boligsiden = pd.read_csv("Endeligt data/Boligsiden_data.csv") 

############## Choose lines ################## #1000 lines takes 2,5 hour)
#df = df_Boligsiden.iloc[0:] 
#df = df.reset_index(drop=True)

# Add geolink-variable to df
#df['geolink'] = pd.concat([pd.DataFrame([(get_geolink(i))], columns=['geolink']) for i in range(0, len(df))], ignore_index=True)

#df_geo = pd.DataFrame()

#for x in tqdm(range(0, len(df))):  
#    df_pages = dingeo_page(get_geolink(x))
#    df_geo = pd.concat([df_geo, df_pages])
#    time.sleep(1)

#df
# Print samlet merged datasæt baseret på 'geolink'-variabel = 
#df['geolink'] = pd.concat([pd.DataFrame([(get_geolink(i))], columns=['geolink']) for i in range(0, len(df))], ignore_index=True)
#df_samlet = pd.merge(df, df_geo, how='inner', on='geolink', right_index=False).drop_duplicates()

#df_samlet = df_samlet.reindex(sorted(df_samlet.columns), axis=1)

# Gem datafil
#df_samlet.to_csv('df_30000_raw.csv', index = False) 

df_10000_raw = pd.read_csv("Endeligt data/df_10000_raw.csv")
df_20000_raw = (pd.read_csv("Endeligt data/df_20000_raw.csv"))
df_30000_raw = (pd.read_csv("Endeligt data/df_30000_raw.csv"))
df_35000_raw = (pd.read_csv("Endeligt data/df_35000_raw.csv"))
df_40000_raw = (pd.read_csv("Endeligt data/df_40000_raw.csv"))
df_BoligsidenDinGeo_raw = pd.concat([df_10000_raw, df_20000_raw, df_30000_raw, df_35000_raw, df_40000_raw])
df_BoligsidenDinGeo_raw.to_csv('Endeligt data/df_BoligsidenDinGeo_raw.csv', index = False)


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [19]:
df_BoligsidenDinGeo = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_raw.csv")

df_BoligsidenDinGeo.rename(columns={'Boligtype':'Type',
                         'Adresse':'Address',
                         'Post_nr': 'ZipCity',
                         'Vurderingsdato':'ValDate',
                         'Ejendomsværdi':'TaxableValue',
                         'Grundværdi':'LandValue',
                         'Periode':'Time',
                         'Udbudspris':'AskingPrice',
                         'Seneste_salgspris':'SalePrice',
                         'Seneste_salgsdato':'SaleDate',
                         'Udbud_Start':'OfferingStart',
                         'Udbud_Slut':'OfferingEnd',
                         'Vægtet areal':'WeightedFloorArea_x',
                         'Samlet Boligareal': 'TotalFloorArea_x',
                         'Kælderareal':'BasementArea_x',
                         'Grund':'Land',
                         'Antal værelser_x':'Rooms_x',
                         'Antal plan':'Levels',
                         'Byggeår':'YearBuilt_x',
                         'Om-/tilbygningsår':'YearRemodAdd_x',
                         'Varmekilde':'HeatSource_x',
                         'Link':'LinkBoligsiden',
                         'Breddegrad':'Latitude',
                         'Længdegrad': 'Longitude',
                         'Ydervægsmateriale_x':'OuterWallMat_x'}, inplace=True)
    
df_BoligsidenDinGeo.rename(columns={'AVM_pris':'GeomaticAVMPrice',
                         'Afstemningsområde':'VoteDistrict',
                         'Antal Etager': 'FloorsTotal',
                         'Antal badeværelser':'Bathrooms',
                         'Antal toiletter':'Toilets',
                         'Antal værelser_y':'Rooms_y',
                         'Anvendelse':'TypeHouse',
                         'Bevaringsværdig':'WorthPreserving',
                         'Boligstørrelse':'TotalFloorArea_y',
                         'Bygning, Samlet areal':'FloorAreaBuilding',
                         'Energimærke':'EnergyLabel',
                         'Fredning':'Listing',
                         'Indbrudsrisiko':'BurglaryRisk',
                         'Kælder': 'BasementArea_y',
                         'Køkkenforhold':'Kitchen',
                         'Meter_over_havet':'MASL',
                         'Ombygningsår':'YearRemodAdd_y',
                         'Opførselsesår':'YearBuilt_y',
                         'Oversvømmelsesrisiko_skybrud':'CloudburstRisk',
                         'Radonrisiko':'RadonRisk',
                         'Støjmåling':'NoiseMeasurement',
                         'Største_parti':'LargestParty',
                         'Tagmateriale':'Roofing',
                         'Valgdeltagelse': 'VoterTurnout',
                         'Varmeinstallation':'HeatSource_y',
                         'Vægtet Areal':'WeightedFloorArea_y',
                         'Ydervægsmateriale_y':'OuterWallMat_y',
                         'geolink':'LinkGeo'}, inplace=True)
        
df_BoligsidenDinGeo = df_BoligsidenDinGeo.reindex(sorted(df_BoligsidenDinGeo.columns), axis=1)

#df_BoligsidenDinGeo.to_csv('Endeligt data/df_BoligsidenDinGeo.csv', index = False)


#### Compare values from boligsiden and dingeo

In [20]:
##############################################
############# Compare lost data ##############
##############################################
df_Boligsiden = pd.read_csv("Endeligt data/Boligsiden_data.csv") 
df_BoligsidenDinGeo = pd.read_csv("Endeligt data/df_BoligsidenDinGeo.csv")

print('We have lost ' +  str(len(df_Boligsiden) - len(df_BoligsidenDinGeo)) + ' observations out of ' + str(len(df_Boligsiden)))

list_mangler = (list(set(df_Boligsiden.Address.tolist()) - set(df_BoligsidenDinGeo.Address.tolist())))

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_manlger.head(50).to_html()))

#df_tabt = pd.DataFrame()

#for i in tqdm(range(0, len(list_mangler))):
#    df_side = df_Boligsiden[df_Boligsiden.Address == list_mangler[i]]
#    df_tabt = pd.concat([df_tabt, df_side])

# Gem datafil
#df_tabt.to_csv('df_BoligsidenDinGeo_tabt.csv', index = False) 

We have lost 221 observations out of 42708


In [21]:
##########################################
############# TILRET URL'ER ##############
##########################################
df_tabt = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_tabt.csv")

#df = df_tabt
#df = df.reset_index(drop=True)

#df['geolink'] = pd.concat([pd.DataFrame([(get_geolink(i))], columns=['geolink']) for i in range(0, len(df))], ignore_index=True)

#df.at[1, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-københavn-ø/lange--müllers-gade-27/1-th/'
#df.at[6, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/st-th/'
#df.at[7, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-21/4-th/'
#df.at[8, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/3-th'
#df.at[12, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-21/st-th/'
#df.at[18, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-23/1-tv/'
#df.at[20, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-19/st-tv/'
#df.at[24, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/st-tv/'
#df.at[26, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-frederiksberg/troels--lunds-vej-20/'
#df.at[27, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/4-tv/'
#df.at[29, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-27/2-th/'
#df.at[31, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/st-tv'
#df.at[33, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/2-th'
#df.at[35, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/3-th'
#df.at[39, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/1-tv'
#df.at[40, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/1-tv'
#df.at[43, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/3-th'
#df.at[48, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/4-th'
#df.at[49, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/st-tv'
#df.at[50, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/4-tv'
#df.at[52, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/st-th'
#df.at[53, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/2-th'
#df.at[61, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/st-tv'
#df.at[62, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/2-th'
#df.at[64, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/2-tv'
#df.at[66, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/4-tv'
#df.at[71, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/1-th'
#df.at[75, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/st-th'
#df.at[81, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/4-tv'
#df.at[82, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34a/2-tv/'
#df.at[85, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/4-tv'
#df.at[87, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/2-tv'
#df.at[89, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-frederiksberg/troels--lunds-vej-18/'
#df.at[99, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/4-th'
#df.at[108, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/3-th'
#df.at[109, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-Frederiksberg/Troels--Lunds-Vej-34'
#df.at[112, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/4-tv'
#df.at[114, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/1-tv/'
#df.at[118, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/3-mf/'
#df.at[119, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/3-tv'
#df.at[120, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/1-tv'
#df.at[125, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/2-tv'
#df.at[126, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/st-th'
#df.at[129, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/st-th'
#df.at[130, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/3-th'
#df.at[136, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/1-tv'
#df.at[137, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/2-tv'
#df.at[138, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/3-tv'
#df.at[139, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/2-tv'
#df.at[140, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34a/1-th/'
#df.at[143, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-27/4-tv'
#df.at[145, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/4-th'
#df.at[147, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-27/3-tv'
#df.at[148, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/2-tv/'
#df.at[154, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/3-th'
#df.at[158, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/4-th'
#df.at[164, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/st-th'
#df.at[165, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/1-th'
#df.at[168, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/1-th'
#df.at[171, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/2-th/'
#df.at[174, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/4-tv'
#df.at[175, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/4-th/'
#df.at[176, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/4-th'
#df.at[178, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/4-th'
#df.at[185, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/2-th'
#df.at[188, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/1-th'
#df.at[192, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-27/st'
#df.at[208, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-København-K/Danneskiold--Samsøes-Allé-34B/3-tv'
#df.at[208, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/2-tv'
#df.at[209, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/3-tv'
#df.at[212, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/1-th'
#df.at[214, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-Frederiksberg/Troels--Lunds-Vej-32'
#df.at[219, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/3-tv'
#df = df.drop(df.index[[14, 36, 55, 111, 115, 191]])

#df = df.reset_index(drop=True)
#df_ny = df

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_ny.to_html()))


##########################################
######### TO ADRESSER MANUELT ############
##########################################

# Lombardigade 3, 4. tv, 2300 København S
# Steenbergsvej 14, 2500 Valby
#df_ny = df_ny.drop(df_ny.index[[181, 199]])
#df_ny = df_ny.reset_index(drop=True)


In [22]:
##########################################
######### HENT MANGLENDE DATA ############
##########################################

#df_n = df_ny
#n_min = 0
#n_max = len(df_ny)

#df = df_n.iloc[n_min:n_max] 
#df = df.reset_index(drop=True)
#df_gem = pd.DataFrame()

#for n in tqdm(range(0, (n_max-n_min))):  
#    df_pages = dingeo_page(df.geolink[n])
#    df_gem = pd.concat([df_gem, df_pages])
#    time.sleep(1)

#df_samlet = pd.merge(df, df_gem, how='inner', on='geolink', right_index=False).drop_duplicates()
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_samlet.to_html()))  
    
# Gem datafil
#df_samlet.to_csv('df_BoligsidenDinGeo_mangler.csv', index = False) 

##########################################
############### MERGE DATA ###############
##########################################

df_BoligsidenDinGeo_mangler = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_mangler.csv")
df_BoligsidenDinGeo = pd.read_csv("Endeligt data/df_BoligsidenDinGeo.csv")
df_BoligsidenDinGeo_alle = pd.concat([df_BoligsidenDinGeo_mangler, df_BoligsidenDinGeo])

# Gem datafil
#df_BoligsidenDinGeo_alle.to_csv('df_BoligsidenDinGeo_alle.csv', index = False) 

#df_BoligsidenDinGeo_alle = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_alle.csv")

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_BoligsidenDinGeo_alle.head(20).to_html()))  


In [23]:
#df_BoligsidenDinGeo_alle = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_alle.csv")

df_BoligsidenDinGeo_alle = df_BoligsidenDinGeo_alle.replace('-', np.nan)

# Boligsiden
df_BoligsidenDinGeo_alle['Levels'] = df_BoligsidenDinGeo_alle['Levels'].astype(float)
df_BoligsidenDinGeo_alle['Rooms_x'] = df_BoligsidenDinGeo_alle['Rooms_x'].astype(float)
df_BoligsidenDinGeo_alle['Type'] = df_BoligsidenDinGeo_alle['Type'].astype(str)
df_BoligsidenDinGeo_alle['Latitude'] = df_BoligsidenDinGeo_alle['Latitude'].astype(float)
df_BoligsidenDinGeo_alle['YearBuilt_x'] = df_BoligsidenDinGeo_alle['YearBuilt_x'].astype(float)
df_BoligsidenDinGeo_alle['TaxableValue'] = df_BoligsidenDinGeo_alle['TaxableValue'].str.replace('.','').str.replace(' kr','').astype(float)
df_BoligsidenDinGeo_alle['Land'] = df_BoligsidenDinGeo_alle['Land'].astype(float)
df_BoligsidenDinGeo_alle['LandValue'] = df_BoligsidenDinGeo_alle['LandValue'].str.replace('.','').str.replace(' kr','').astype(float)
df_BoligsidenDinGeo_alle['BasementArea_x'] = df_BoligsidenDinGeo_alle['BasementArea_x'].astype(float)
df_BoligsidenDinGeo_alle['Longitude'] = df_BoligsidenDinGeo_alle['Longitude'].astype(float)
df_BoligsidenDinGeo_alle['YearRemodAdd_x'] = df_BoligsidenDinGeo_alle['YearRemodAdd_x'].astype(float)
df_BoligsidenDinGeo_alle['TotalFloorArea_x'] = df_BoligsidenDinGeo_alle['TotalFloorArea_x'].str.extract('(\d+)', expand=True).astype(float)
df_BoligsidenDinGeo_alle['SaleDate'] = pd.to_datetime(df_BoligsidenDinGeo_alle['SaleDate'], format='%d-%m-%Y', errors='coerce')
df_BoligsidenDinGeo_alle['SalePrice'] = df_BoligsidenDinGeo_alle['SalePrice'].str.replace('.','').str.replace(' kr','').astype(float)
df_BoligsidenDinGeo_alle['OfferingStart'] = pd.to_datetime(df_BoligsidenDinGeo_alle['OfferingStart'], format='%d-%m-%Y', errors='coerce')
df_BoligsidenDinGeo_alle['OfferingEnd'] = pd.to_datetime(df_BoligsidenDinGeo_alle['OfferingEnd'].str.strip(), format='%d-%m-%Y', errors='coerce')
df_BoligsidenDinGeo_alle['AskingPrice'] = df_BoligsidenDinGeo_alle['AskingPrice'].str.replace('.','').str.replace(' kr','').astype(float)
df_BoligsidenDinGeo_alle['HeatSource_x'] = df_BoligsidenDinGeo_alle['HeatSource_x']
df_BoligsidenDinGeo_alle['ValDate'] = pd.to_datetime(df_BoligsidenDinGeo_alle['ValDate'], errors='coerce')
df_BoligsidenDinGeo_alle['WeightedFloorArea_x'] = df_BoligsidenDinGeo_alle['WeightedFloorArea_x'].astype(float)
df_BoligsidenDinGeo_alle['OuterWallMat_x'] = df_BoligsidenDinGeo_alle['OuterWallMat_x']

# DinGeo
df_BoligsidenDinGeo_alle['GeomaticAVMPrice'] = df_BoligsidenDinGeo_alle['GeomaticAVMPrice'].str.replace('.','').str.replace(' kr','').astype(float)
df_BoligsidenDinGeo_alle['FloorsTotal'] = df_BoligsidenDinGeo_alle['FloorsTotal'].astype(float)
df_BoligsidenDinGeo_alle['Bathrooms'] = df_BoligsidenDinGeo_alle['Bathrooms'].astype(float)
df_BoligsidenDinGeo_alle['Toilets'] = df_BoligsidenDinGeo_alle['Toilets'].astype(float)
df_BoligsidenDinGeo_alle['Rooms_y'] =  df_BoligsidenDinGeo_alle['Rooms_y'].astype(float)
df_BoligsidenDinGeo_alle['TypeHouse'] = df_BoligsidenDinGeo_alle['TypeHouse'].astype(str)
df_BoligsidenDinGeo_alle['WorthPreserving'] = df_BoligsidenDinGeo_alle['WorthPreserving'].astype(str)
df_BoligsidenDinGeo_alle['TotalFloorArea_y'] = df_BoligsidenDinGeo_alle['TotalFloorArea_y'].str.extract('(\d+)', expand=True).astype(float)
df_BoligsidenDinGeo_alle['FloorAreaBuilding'] = df_BoligsidenDinGeo_alle['FloorAreaBuilding'].str.extract('(\d+)', expand=True).astype(float)
df_BoligsidenDinGeo_alle['EnergyLabel'] = df_BoligsidenDinGeo_alle['EnergyLabel'].astype(str)
df_BoligsidenDinGeo_alle['Listing'] = df_BoligsidenDinGeo_alle['Listing'].astype(str)
df_BoligsidenDinGeo_alle['BurglaryRisk'] = df_BoligsidenDinGeo_alle['BurglaryRisk'].astype(str)
df_BoligsidenDinGeo_alle['BasementArea_y'] = df_BoligsidenDinGeo_alle['BasementArea_y'].str.extract('(\d+)', expand=True).astype(float)
df_BoligsidenDinGeo_alle['Kitchen'] = df_BoligsidenDinGeo_alle['Kitchen'].astype(str)
df_BoligsidenDinGeo_alle['MASL'] = df_BoligsidenDinGeo_alle['MASL'].str.extract('(\d+\.*\d*)', expand=True).astype(float)
df_BoligsidenDinGeo_alle['YearBuilt_y'] = df_BoligsidenDinGeo_alle['YearBuilt_y'].astype(float)
df_BoligsidenDinGeo_alle['YearRemodAdd_y'] = df_BoligsidenDinGeo_alle['YearRemodAdd_y'].astype(float)
df_BoligsidenDinGeo_alle['CloudburstRisk'] = df_BoligsidenDinGeo_alle['CloudburstRisk'].astype(str)
df_BoligsidenDinGeo_alle['RadonRisk'] = df_BoligsidenDinGeo_alle['RadonRisk'].astype(str)
df_BoligsidenDinGeo_alle['NoiseMeasurement'] = df_BoligsidenDinGeo_alle['NoiseMeasurement'].astype(str)
df_BoligsidenDinGeo_alle['Roofing'] = df_BoligsidenDinGeo_alle['Roofing'].astype(str)
df_BoligsidenDinGeo_alle['HeatSource_y'] = df_BoligsidenDinGeo_alle['HeatSource_y']
df_BoligsidenDinGeo_alle['WeightedFloorArea_y'] = df_BoligsidenDinGeo_alle['WeightedFloorArea_y'].str.extract('(\d+)', expand=True).astype(float)
df_BoligsidenDinGeo_alle['OuterWallMat_y'] = df_BoligsidenDinGeo_alle['OuterWallMat_y']


# Save data
df_BoligsidenDinGeo_alle.to_csv('Endeligt data/df_BoligsidenDinGeo_alle.csv', index = False) 



In [4]:
df_BoligsidenDinGeo_alle = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_alle.csv")
print(df_BoligsidenDinGeo_alle.shape)

with pd.option_context('display.max_colwidth', -1): 
    display(HTML(df_BoligsidenDinGeo_alle.head(10).to_html()))    

(42700, 53)


Unnamed: 0,Address,AskingPrice,BasementArea_x,BasementArea_y,Bathrooms,BurglaryRisk,CloudburstRisk,EnergyLabel,FloorAreaBuilding,FloorsTotal,GeomaticAVMPrice,HeatSource_x,HeatSource_y,Kitchen,Land,LandValue,LargestParty,Latitude,Levels,LinkBoligsiden,LinkGeo,Listing,Longitude,MASL,NoiseMeasurement,OfferingEnd,OfferingStart,OuterWallMat_x,OuterWallMat_y,RadonRisk,Roofing,Rooms_x,Rooms_y,SaleDate,SalePrice,TaxableValue,Time,Toilets,TotalFloorArea_x,TotalFloorArea_y,Type,TypeHouse,ValDate,VoteDistrict,VoterTurnout,WeightedFloorArea_x,WeightedFloorArea_y,WorthPreserving,YearBuilt_x,YearBuilt_y,YearRemodAdd_x,YearRemodAdd_y,ZipCity
0,"Bentzonsvej 37, 2. tv",3395000.0,,,1.0,lav,kan være risiko,D,685.0,5.0,3402740.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,157200.0,venstre,55.685782,,https://www.boligsiden.dk/boligen/147-8570,https://www.dingeo.dk/adresse/2000-Frederiksberg/Bentzonsvej-37/2-tv,,12.529234,12.1,Ingen trafikstøj,2017-04-06,2017-03-20,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Built-up,3.0,3.0,2017-03-25,3395000.0,1550000.0,20-03-2017 - 06-04-2017,1.0,67.0,67.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Nyelandsvej",89.2,,67.0,Mangler,,1902.0,,,2000 Frederiksberg
1,"Lange-Müllers Gade 27, 1. th",2395000.0,,,1.0,lav,kan være risiko,Mangler,3366.0,5.0,2927600.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,106900.0,radikale,55.712541,,https://www.boligsiden.dk/boligen/101-336123,https://www.dingeo.dk/adresse/2100-københavn-ø/lange--müllers-gade-27/1-th/,,12.566848,11.4,55-60 dB,2016-08-29,2016-08-01,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2.0,2016-08-25,2250000.0,1300000.0,01-08-2016 - 29-08-2016,1.0,65.0,65.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,,65.0,Mangler,,1932.0,,,2100 København Ø
2,"Amsterdamvej 3, 3.",3495000.0,,,1.0,lav,kan være risiko,Mangler,1316.0,3.0,4775417.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,144300.0,socialdemokratiet,55.64873,,https://www.boligsiden.dk/boligen/101-24059,https://www.dingeo.dk/adresse/2300-København-S/Amsterdamvej-3/3,,12.61399,4.8,Mangler,2016-11-01,2016-07-10,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,6.0,6.0,2016-10-07,3400000.0,2050000.0,10-07-2016 - 01-11-2016,1.0,123.0,123.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,4. Syd,83.0,,123.0,Mangler,,1929.0,,,2300 København S
3,"Langelandsvej 20B, 1. tv",4295000.0,,,1.0,lav,kan være risiko,D,1791.0,5.0,4292535.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,169300.0,venstre,55.684792,,https://www.boligsiden.dk/boligen/147-71132,https://www.dingeo.dk/adresse/2000-Frederiksberg/Langelandsvej-20B/1-tv,,12.530195,12.5,60-65 dB,2018-08-29,2018-08-01,,"Mursten (tegl, kalksten, cementsten)",Meget lav,"Fibercement, herunder asbest (bølge- eller skifer-eternit)",4.0,4.0,2018-08-27,4295000.0,1950000.0,01-08-2018 - 29-08-2018,1.0,83.0,83.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Nyelandsvej",89.2,,83.0,Mangler,,1908.0,,,2000 Frederiksberg
4,"Porcelænshaven 5G, st. tv",5395000.0,,,2.0,lav,kan være risiko,C,5760.0,5.0,4187106.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,405500.0,venstre,55.677771,,https://www.boligsiden.dk/boligen/147-257946,https://www.dingeo.dk/adresse/2000-Frederiksberg/Porcelænshaven-5G/st-tv,,12.523563,14.5,55-60 dB,2014-07-12,2014-05-22,,"Mursten (tegl, kalksten, cementsten)",Meget lav,"Metalplader (bølgeblik, aluminium, o.lign.)",3.0,3.0,2014-08-26,5395000.0,2500000.0,22-05-2014 - 12-07-2014,2.0,99.0,99.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Rådhuset",84.4,,99.0,Mangler,,2006.0,,,2000 Frederiksberg
5,"Nordre Digevej 54, st. tv",,,,1.0,høj,kan være risiko,C,594.0,3.0,3594297.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,326500.0,,55.650154,,https://www.boligsiden.dk/boligen/101-734790,https://www.dingeo.dk/adresse/2300-København-S/Nordre-Digevej-54/st-tv,,12.588153,2.4,Mangler,,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Built-up,3.0,3.0,2014-08-28,1125000.0,1500000.0,,1.0,85.0,85.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,,,,85.0,Mangler,,2004.0,,,2300 København S
6,"Lange-Müllers Gade 20, st. th",3245000.0,,,1.0,lav,kan være risiko,Mangler,4537.0,5.0,3229777.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,120400.0,radikale,55.712375,,https://www.boligsiden.dk/boligen/101-335534,https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/st-th/,,12.567279,11.6,Ingen trafikstøj,2018-07-08,2018-07-05,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,3.0,3.0,2018-07-08,3200000.0,1600000.0,05-07-2018 - 08-07-2018,1.0,88.0,88.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,,88.0,Mangler,,1926.0,,,2100 København Ø
7,"Lange-Müllers Gade 21, 4. th",,,,1.0,lav,kan være risiko,Mangler,3366.0,5.0,2418264.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,85300.0,radikale,55.712206,,https://www.boligsiden.dk/boligen/101-335720,https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-21/4-th/,,12.566929,11.8,Ingen trafikstøj,,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2.0,2014-06-19,1745000.0,1150000.0,,1.0,56.0,56.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,,56.0,Mangler,,1932.0,,,2100 København Ø
8,"Lange-Müllers Gade 22, 3. th",3420000.0,,,1.0,lav,kan være risiko,Mangler,4537.0,5.0,3929067.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,120400.0,radikale,55.712548,,https://www.boligsiden.dk/boligen/101-335828,https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/3-th,,12.567231,11.5,Ingen trafikstøj,2016-06-02,2016-01-20,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,3.0,3.0,2016-03-26,3400000.0,1750000.0,20-01-2016 - 02-06-2016,1.0,87.0,87.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,,87.0,Mangler,,1926.0,,,2100 København Ø
9,"Rumæniensgade 12, 5. th",1595000.0,,,1.0,lav,kan være risiko,C,3263.0,5.0,2163203.0,,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,112600.0,socialdemokratiet,55.659712,,https://www.boligsiden.dk/boligen/101-472701,https://www.dingeo.dk/adresse/2300-København-S/Rumæniensgade-12/5-th,,12.612983,4.0,Ingen trafikstøj,2014-03-04,2014-02-24,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2.0,2014-03-02,1600000.0,940000.0,24-02-2014 - 04-03-2014,1.0,53.0,53.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,4. Sundbyøster,81.4,,53.0,Mangler,,1935.0,,,2300 København S


In [6]:
########################################################################
#################### Comparison of 8 duplicates ########################
########################################################################
df_BoligsidenDinGeo_alle = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_alle.csv")

df_ny = df_BoligsidenDinGeo_alle
# Remember: _x is  fromboligsiden, _y is from dingeo.dk

########################################################################
################## Comparison of Rooms_x and Rooms_y  ##################
########################################################################
x = 'Rooms_x'
y = 'Rooms_y'
df_ny['is_equal'] = (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


########################################################################
######### Comparison of TotalFloorArea_x and TotalFloorArea_y  #########
########################################################################
x = 'TotalFloorArea_x'
y = 'TotalFloorArea_y'
df_ny['is_equal']= (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


########################################################################
########### Comparison of BasementArea_x and BasementArea_y  ###########
########################################################################
x = 'BasementArea_x'
y = 'BasementArea_y'
df_ny['is_equal']= (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


########################################################################
############# Comparison of YearBuilt_x and YearBuilt_y  ###############
########################################################################
x = 'YearBuilt_x'
y = 'YearBuilt_y'
df_ny['is_equal']= (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


########################################################################
########## Comparison of YearRemodAdd_x and YearRemodAdd_y  ##########
########################################################################
x = 'YearRemodAdd_x'
y = 'YearRemodAdd_y'
df_ny['is_equal']= (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


########################################################################
############ Comparison of HeatSource_x and HeatSource_y  ##############
########################################################################
x = 'HeatSource_x'
y = 'HeatSource_y'
df_ny['is_equal']= (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


########################################################################
###### Comparison of WeightedFloorArea_x and WeightedFloorArea_y  ######
########################################################################
x = 'WeightedFloorArea_x'
y = 'WeightedFloorArea_y'
df_ny['is_equal']= (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


########################################################################
########### Comparison of OuterWallMat_x and OuterWallMat_y  ###########
########################################################################
x = 'OuterWallMat_x'
y = 'OuterWallMat_y'
df_ny['is_equal']= (df_ny[x]==df_ny[y])
df_selected = df_ny[df_ny['is_equal'] ==False]
dff = df_selected.loc[:, [x, y, 'LinkBoligsiden', 'LinkGeo']]
df_x_nan = dff.dropna(subset=[y]) 
df_x_nan = df_x_nan[df_x_nan[x].isnull()]
df_y_nan = dff.dropna(subset=[x]) 
df_y_nan = df_y_nan[df_y_nan[y].isnull()]
df_no_nan = dff.dropna(subset=[x,y]) 
df_all_nan = dff[dff[y].isnull()][dff[x].isnull()]                                                   
print(len(df_ny[df_ny['is_equal'] == True]), len(df_no_nan), len(df_x_nan), len(df_y_nan), len(df_all_nan))


##############################################################
# Printer der hvor Levels og FloorsTotal er forskellige #
##############################################################

df_ny['is_equal']= (df_ny['Levels']==df_ny['FloorsTotal'])
df_plan = df_ny[df_ny['is_equal'] ==False]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_plan.loc[:, ['Levels', 'FloorsTotal', 'LinkBoligsiden', 'LinkGeo']].to_html()))





36789 520 5360 5 26
41840 203 64 564 29
0 0 3575 0 39125
5288 8 37373 1 30




1268 1 4385 3 37043
5286 3 37311 7 93
0 0 42108 0 592
5240 50 37312 7 91




In [26]:
#############################################
############# REMOVE DUBLICATES #############
#############################################
df_BoligsidenDinGeo_alle = pd.read_csv("Endeligt data/df_BoligsidenDinGeo_alle.csv")

# Note that data from boligsiden - if it exits - always exceeds data from dingeo.dk

df = df_BoligsidenDinGeo_alle

df['Rooms'] = df.Rooms_x.combine_first(df.Rooms_y) 
df.drop('Rooms_x', axis=1, inplace=True)
df.drop('Rooms_y', axis=1, inplace=True)
df['TotalFloorArea'] = df.TotalFloorArea_x.combine_first(df.TotalFloorArea_y) 
df.drop('TotalFloorArea_x', axis=1, inplace=True)
df.drop('TotalFloorArea_y', axis=1, inplace=True)
df['BasementArea'] = df.BasementArea_x.combine_first(df.BasementArea_y) 
df.drop('BasementArea_x', axis=1, inplace=True)
df.drop('BasementArea_y', axis=1, inplace=True)
df['YearBuilt'] = df.YearBuilt_x.combine_first(df.YearBuilt_y) 
df.drop('YearBuilt_x', axis=1, inplace=True)
df.drop('YearBuilt_y', axis=1, inplace=True)
df['YearRemodAdd'] = df.YearRemodAdd_x.combine_first(df.YearRemodAdd_y) 
df.drop('YearRemodAdd_x', axis=1, inplace=True)
df.drop('YearRemodAdd_y', axis=1, inplace=True)
df['HeatSource'] = df.HeatSource_x.combine_first(df.HeatSource_y) 
df.drop('HeatSource_x', axis=1, inplace=True)
df.drop('HeatSource_y', axis=1, inplace=True)
df['WeightedFloorArea'] = df.WeightedFloorArea_x.combine_first(df.WeightedFloorArea_y) 
df.drop('WeightedFloorArea_x', axis=1, inplace=True)
df.drop('WeightedFloorArea_y', axis=1, inplace=True)
df['OuterWallMat'] = df.OuterWallMat_x.combine_first(df.OuterWallMat_y) 
df.drop('OuterWallMat_x', axis=1, inplace=True)
df.drop('OuterWallMat_y', axis=1, inplace=True)

df = df.reindex(sorted(df.columns), axis=1)

with pd.option_context('display.max_colwidth', -1): 
    display(HTML(df.head(5).to_html()))

df.isnull().sum(axis=0)
#df.to_csv('Endeligt data/Boligsiden_DinGeo_data.csv', index = False) 

df.shape

Unnamed: 0,Address,AskingPrice,BasementArea,Bathrooms,BurglaryRisk,CloudburstRisk,EnergyLabel,FloorAreaBuilding,FloorsTotal,GeomaticAVMPrice,HeatSource,Kitchen,Land,LandValue,LargestParty,Latitude,Levels,LinkBoligsiden,LinkGeo,Listing,Longitude,MASL,NoiseMeasurement,OfferingEnd,OfferingStart,OuterWallMat,RadonRisk,Roofing,Rooms,SaleDate,SalePrice,TaxableValue,Time,Toilets,TotalFloorArea,Type,TypeHouse,ValDate,VoteDistrict,VoterTurnout,WeightedFloorArea,WorthPreserving,YearBuilt,YearRemodAdd,ZipCity
0,"Bentzonsvej 37, 2. tv",3395000.0,,1.0,lav,kan være risiko,D,685.0,5.0,3402740.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,157200.0,venstre,55.685782,,https://www.boligsiden.dk/boligen/147-8570,https://www.dingeo.dk/adresse/2000-Frederiksberg/Bentzonsvej-37/2-tv,,12.529234,12.1,Ingen trafikstøj,2017-04-06,2017-03-20,"Mursten (tegl, kalksten, cementsten)",Meget lav,Built-up,3.0,2017-03-25,3395000.0,1550000.0,20-03-2017 - 06-04-2017,1.0,67.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Nyelandsvej",89.2,67.0,Mangler,1902.0,,2000 Frederiksberg
1,"Lange-Müllers Gade 27, 1. th",2395000.0,,1.0,lav,kan være risiko,Mangler,3366.0,5.0,2927600.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,106900.0,radikale,55.712541,,https://www.boligsiden.dk/boligen/101-336123,https://www.dingeo.dk/adresse/2100-københavn-ø/lange--müllers-gade-27/1-th/,,12.566848,11.4,55-60 dB,2016-08-29,2016-08-01,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2016-08-25,2250000.0,1300000.0,01-08-2016 - 29-08-2016,1.0,65.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,65.0,Mangler,1932.0,,2100 København Ø
2,"Amsterdamvej 3, 3.",3495000.0,,1.0,lav,kan være risiko,Mangler,1316.0,3.0,4775417.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,144300.0,socialdemokratiet,55.64873,,https://www.boligsiden.dk/boligen/101-24059,https://www.dingeo.dk/adresse/2300-København-S/Amsterdamvej-3/3,,12.61399,4.8,Mangler,2016-11-01,2016-07-10,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,6.0,2016-10-07,3400000.0,2050000.0,10-07-2016 - 01-11-2016,1.0,123.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,4. Syd,83.0,123.0,Mangler,1929.0,,2300 København S
3,"Langelandsvej 20B, 1. tv",4295000.0,,1.0,lav,kan være risiko,D,1791.0,5.0,4292535.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,169300.0,venstre,55.684792,,https://www.boligsiden.dk/boligen/147-71132,https://www.dingeo.dk/adresse/2000-Frederiksberg/Langelandsvej-20B/1-tv,,12.530195,12.5,60-65 dB,2018-08-29,2018-08-01,"Mursten (tegl, kalksten, cementsten)",Meget lav,"Fibercement, herunder asbest (bølge- eller skifer-eternit)",4.0,2018-08-27,4295000.0,1950000.0,01-08-2018 - 29-08-2018,1.0,83.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Nyelandsvej",89.2,83.0,Mangler,1908.0,,2000 Frederiksberg
4,"Porcelænshaven 5G, st. tv",5395000.0,,2.0,lav,kan være risiko,C,5760.0,5.0,4187106.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,405500.0,venstre,55.677771,,https://www.boligsiden.dk/boligen/147-257946,https://www.dingeo.dk/adresse/2000-Frederiksberg/Porcelænshaven-5G/st-tv,,12.523563,14.5,55-60 dB,2014-07-12,2014-05-22,"Mursten (tegl, kalksten, cementsten)",Meget lav,"Metalplader (bølgeblik, aluminium, o.lign.)",3.0,2014-08-26,5395000.0,2500000.0,22-05-2014 - 12-07-2014,2.0,99.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Rådhuset",84.4,99.0,Mangler,2006.0,,2000 Frederiksberg


(42700, 45)

In [5]:
Boligsiden_DinGeo_data = pd.read_csv("Endeligt data/Boligsiden_DinGeo_data.csv")

print(Boligsiden_DinGeo_data.isnull().sum(axis=0))
Boligsiden_DinGeo_data.shape

Address                  0
AskingPrice           6198
BasementArea         39125
Bathrooms               31
BurglaryRisk             0
CloudburstRisk           0
EnergyLabel              0
FloorAreaBuilding      137
FloorsTotal             31
GeomaticAVMPrice       396
HeatSource              93
Kitchen                590
Land                 42700
LandValue              140
LargestParty          2751
Latitude                 3
Levels               37404
LinkBoligsiden           0
LinkGeo                  0
Listing              42700
Longitude                3
MASL                     0
NoiseMeasurement         0
OfferingEnd           6198
OfferingStart         6198
OuterWallMat            91
RadonRisk                0
Roofing                 98
Rooms                   26
SaleDate                 2
SalePrice                2
TaxableValue           140
Time                  6198
Toilets                 31
TotalFloorArea          29
Type                     0
TypeHouse               31
V

(42700, 45)

In [7]:
Boligsiden_DinGeo_data = pd.read_csv("Endeligt data/Boligsiden_DinGeo_data.csv")

df = Boligsiden_DinGeo_data.copy()

def missing_percentage(df):
    """This function takes a DataFrame(df) as input and returns two columns, total missing values and total missing values percentage"""
    ## the two following line may seem complicated but its actually very simple. 
    total = df.isnull().sum().sort_values(ascending = False)[df.isnull().sum().sort_values(ascending = False) != 0]
    percent = round(df.isnull().sum().sort_values(ascending = False)/len(df)*100,2)[round(df.isnull().sum().sort_values(ascending = False)/len(df)*100,2) != 0]
    return pd.concat([total, percent], axis=1, keys=['Total','Percent'])

# Check missing values
#df.info()
print(missing_percentage(df))

df.shape

                   Total  Percent
AskingPrice         6198    14.52
BasementArea       39125    91.63
Bathrooms             31     0.07
FloorAreaBuilding    137     0.32
FloorsTotal           31     0.07
GeomaticAVMPrice     396     0.93
HeatSource            93     0.22
Kitchen              590     1.38
Land               42700   100.00
LandValue            140     0.33
LargestParty        2751     6.44
Latitude               3     0.01
Levels             37404    87.60
Listing            42700   100.00
Longitude              3     0.01
OfferingEnd         6198    14.52
OfferingStart       6198    14.52
OuterWallMat          91     0.21
Roofing               98     0.23
Rooms                 26     0.06
SaleDate               2      NaN
SalePrice              2      NaN
TaxableValue         140     0.33
Time                6198    14.52
Toilets               31     0.07
TotalFloorArea        29     0.07
TypeHouse             31     0.07
ValDate              140     0.33
VoteDistrict  

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  # Remove the CWD from sys.path while we load stuff.


(42700, 45)

In [8]:
df = Boligsiden_DinGeo_data.copy()

df_selected = df[df["Time"].isnull()][df.columns.values]
with pd.option_context('display.max_colwidth', -1): 
    display(HTML(df_selected.head(250).to_html()))

Unnamed: 0,Address,AskingPrice,BasementArea,Bathrooms,BurglaryRisk,CloudburstRisk,EnergyLabel,FloorAreaBuilding,FloorsTotal,GeomaticAVMPrice,HeatSource,Kitchen,Land,LandValue,LargestParty,Latitude,Levels,LinkBoligsiden,LinkGeo,Listing,Longitude,MASL,NoiseMeasurement,OfferingEnd,OfferingStart,OuterWallMat,RadonRisk,Roofing,Rooms,SaleDate,SalePrice,TaxableValue,Time,Toilets,TotalFloorArea,Type,TypeHouse,ValDate,VoteDistrict,VoterTurnout,WeightedFloorArea,WorthPreserving,YearBuilt,YearRemodAdd,ZipCity
5,"Nordre Digevej 54, st. tv",,,1.0,høj,kan være risiko,C,594.0,3.0,3594297.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,326500.0,,55.650154,,https://www.boligsiden.dk/boligen/101-734790,https://www.dingeo.dk/adresse/2300-København-S/Nordre-Digevej-54/st-tv,,12.588153,2.4,Mangler,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Built-up,3.0,2014-08-28,1125000.0,1500000.0,,1.0,85.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,,,85.0,Mangler,2004.0,,2300 København S
7,"Lange-Müllers Gade 21, 4. th",,,1.0,lav,kan være risiko,Mangler,3366.0,5.0,2418264.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,85300.0,radikale,55.712206,,https://www.boligsiden.dk/boligen/101-335720,https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-21/4-th/,,12.566929,11.8,Ingen trafikstøj,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2014-06-19,1745000.0,1150000.0,,1.0,56.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,56.0,Mangler,1932.0,,2100 København Ø
14,Thomas Koppels Allé 16A,,,,lav,kan være risiko,Mangler,,,,,,,,enhedslisten,55.644259,,https://www.boligsiden.dk/boligen/101-29082,https://www.dingeo.dk/adresse/2450-København-SV/Thomas-Koppels-Allé-16A,,12.530455,2.7,Ingen trafikstøj,,,,lav,,,2019-08-23,2873000.0,,,,,Rækkehus,,,9. Syd,76.4,,0,,,2450 København SV
18,"Weidekampsgade 43, 2. th",,,1.0,lav,kan være risiko,C,8607.0,6.0,4565628.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,447800.0,venstre,55.666689,,https://www.boligsiden.dk/boligen/101-1014,https://www.dingeo.dk/adresse/2300-København-S/Weidekampsgade-43/2-th,,12.582064,2.4,Ingen trafikstøj,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Built-up,3.0,2014-10-07,3107600.0,2300000.0,,1.0,84.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,2. Vest,88.3,84.0,Mangler,2007.0,,2300 København S
20,Hyltebjerg Allé 16,,,,lav,kan være risiko,Mangler,,,,,,,620700.0,socialdemokratiet,55.682821,,https://www.boligsiden.dk/boligen/101-249832,https://www.dingeo.dk/adresse/2720-Vanløse/Hyltebjerg-Allé-16,,12.486873,12.3,55-60 dB,,,,medium,,3.0,2017-02-08,2495000.0,1950000.0,,,90.0,Ejerlejlighed,,2018-01-10,7. Vanløse,84.3,,Mangler,,,2720 Vanløse
24,Thomas Koppels Allé 24B,,,,lav,kan være risiko,Mangler,,,,,,,,enhedslisten,55.643327,,https://www.boligsiden.dk/boligen/101-29088,https://www.dingeo.dk/adresse/2450-København-SV/Thomas-Koppels-Allé-24B,,12.531294,2.7,Ingen trafikstøj,,,,lav,,,2019-08-20,2873000.0,,,,,Rækkehus,,,9. Syd,76.4,,Mangler,,,2450 København SV
29,"Ingerslevsgade 124, st. th",,,1.0,lav,kan være risiko,D,2705.0,5.0,4386657.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,411700.0,enhedslisten,55.664737,,https://www.boligsiden.dk/boligen/101-263045,https://www.dingeo.dk/adresse/1705-København-V/Ingerslevsgade-124/st-th,,12.553028,3.0,Mangler,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Andet materiale,4.0,2018-07-30,850000.0,1000000.0,,1.0,106.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,9. Nord,86.5,106.0,Mangler,1907.0,,1705 København V
30,"Lange-Müllers Gade 21, st. tv",,,1.0,lav,kan være risiko,Mangler,3366.0,5.0,2459173.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,85300.0,radikale,55.712206,,https://www.boligsiden.dk/boligen/101-335674,https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/st-tv,,12.566929,11.8,Ingen trafikstøj,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2016-07-14,2100000.0,1050000.0,,1.0,57.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,57.0,Mangler,1932.0,,2100 København Ø
34,"Lange-Müllers Gade 23, 3. th",,,1.0,lav,kan være risiko,Mangler,3366.0,5.0,2485074.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),,106900.0,radikale,55.712315,,https://www.boligsiden.dk/boligen/101-335941,https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/3-th,,12.566891,11.7,Ingen trafikstøj,,,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2016-09-03,1408000.0,1300000.0,,1.0,65.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,65.0,Mangler,1932.0,,2100 København Ø
43,Thomas Koppels Allé 24A,,,,lav,kan være risiko,Mangler,,,,,,,,enhedslisten,55.643334,,https://www.boligsiden.dk/boligen/101-29089,https://www.dingeo.dk/adresse/2450-København-SV/Thomas-Koppels-Allé-24A,,12.531358,2.6,Ingen trafikstøj,,,,lav,,,2019-08-19,2873000.0,,,,,Rækkehus,,,9. Syd,76.4,,0,,,2450 København SV


In [29]:
#######################################################################
################### DECISIONS ABOUT MISSING VALUES ####################
#######################################################################
df = Boligsiden_DinGeo_data.copy()

df.drop('Land', axis=1, inplace=True)
df.drop('Listing', axis=1, inplace=True)
df.isnull().sum(axis=0)

###################################################################
# Update TotalFloorArea with WeightedFloorArea (=592) #
###################################################################
# Boligarealet vurderes at være så væsentligt for en boligs pris, at det ikke kan undværes.
#df_selected = (df[df["WeightedFloorArea"].isnull()][df.columns.values])
#df_selected.shape
df.WeightedFloorArea.fillna(df.TotalFloorArea, inplace=True)
df.isnull().sum(axis=0)

###################################################################
# Update WeightedFloorArea with TotalFloorArea (=29) #
###################################################################
#df_selected = (df[df["TotalFloorArea"].isnull()][df.columns.values])
#df_selected.shape
df.TotalFloorArea.fillna(df.WeightedFloorArea, inplace=True)
df.isnull().sum(axis=0)

##################################################
## Remove houses without WeightedFloorArea(=29) ##
##################################################
#df_udvalgt = (df[df["WeightedFloorArea"].isnull()][df.columns.values])
df = df[df["WeightedFloorArea"].notnull()][df.columns.values]
df.isnull().sum(axis=0)


##################################################
###### Remove houses without SalePrice (=2) ######
##################################################
#df_udvalgt = (df[df["WeightedFloorArea"].isnull()][df.columns.values])
df = df[df["SalePrice"].notnull()][df.columns.values]
df.isnull().sum(axis=0)

##################################################
#############      BasementArea      #############
##################################################
#df_selected = df[df["BasementArea"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.BasementArea.fillna(0, inplace=True)
df.isnull().sum(axis=0)

##################################################
### 3 obs, where geo-coordinated are missing   ###
##################################################
#df_selected = df[df["Latitude"].isnull()][df.columns.values]
#display(HTML(df_selected.to_html()))  
# Source: https://www.latlong.net/
df.at[6951, 'Latitude'] = 55.671980
df.at[6951, 'Longitude'] = 12.556570
df.at[13613, 'Latitude'] = 55.662810
df.at[13613, 'Longitude'] = 12.614750
df.at[38975, 'Latitude'] = 55.658710
df.at[38975, 'Longitude'] = 12.611070
df.isnull().sum(axis=0)

##################################################
#############        Bathrooms       #############
##################################################
#df_selected = df[df["Bathrooms"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.at[20, 'Bathrooms'] = 2
df.at[33, 'Bathrooms'] = 2
df.at[70, 'Bathrooms'] = 1
df.at[80, 'Bathrooms'] = 1
df.at[181, 'Bathrooms'] = 1
df.at[212, 'Bathrooms'] = 1

##################################################
#############         Toilets        #############
##################################################
df.at[20, 'Toilets'] = 2
df.at[33, 'Toilets'] = 3
df.at[70, 'Toilets'] = 1
df.at[80, 'Toilets'] = 1
df.at[181, 'Toilets'] = 1
df.at[212, 'Toilets'] = 1
(df.isnull().sum(axis=0))

##################################################
############### FloorAreaBuilding ################
##################################################
#df_selected = df[df["FloorAreaBuilding"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.FloorAreaBuilding.fillna(df.TotalFloorArea, inplace=True)
(df.isnull().sum(axis=0)) 

##################################################
#############       FloorsTotal      #############
##################################################
#df_selected = df[df["FloorsTotal"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.at[20, 'FloorsTotal'] = 1
df.at[33, 'FloorsTotal'] = 2
df.at[70, 'FloorsTotal'] = 3
df.at[80, 'FloorsTotal'] = 5
df.at[181, 'FloorsTotal'] = 8
df.at[212, 'FloorsTotal'] = 8
df.isnull().sum(axis=0)

##################################################
#############        YearBuilt       #############
##################################################
#df_selected = df[df["YearBuilt"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.at[20, 'YearBuilt'] = 1940
df.at[70, 'YearBuilt'] = 1964
df.at[80, 'YearBuilt'] = 1902
df.at[181, 'YearBuilt'] = 1956
df.at[212, 'YearBuilt'] = 1956
df.isnull().sum(axis=0)

##################################################
#############        YearRemodAdd       #############
##################################################
#df_selected = (df[df["YearRemodAdd"].isnull()][df.columns.values])
#df_selected.shape
df.YearRemodAdd.fillna(df.YearBuilt, inplace=True)
df.isnull().sum(axis=0)

##################################################
#############        HeatSource      #############
##################################################
#df_selected = df[df["HeatSource"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.HeatSource.loc[df.HeatSource.isna()] = 'Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg)'
df['HeatSource'] = df.HeatSource.astype('category')
df['HeatSource'].value_counts()
df.isnull().sum(axis=0)

##################################################
#############          Kitchen       #############
##################################################
df.loc[df.Kitchen.isnull(), 'Kitchen'] = df.groupby("ZipCity").Kitchen.transform(lambda x: x.fillna(x.value_counts().index[0]))
df['Kitchen'] = df.Kitchen.astype('category')
df['Kitchen'].value_counts()
df.isnull().sum(axis=0)


##################################################
#############          Levels        #############
##################################################
#df_selected = df[df["Levels"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.Levels.fillna(1, inplace=True)
df['HasMultipleLevels'] = df['Levels'].apply(lambda x: 1 if x > 1 else 0)
df.isnull().sum(axis=0)


##################################################
#############       TypeHouse        #############
##################################################
#df_selected = df[df["TypeHouse"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.at[20, 'TypeHouse'] = 'Bolig i etageejendom, flerfamiliehus eller to-familiehus'
df.at[33, 'TypeHouse'] = 'Række-, kæde- eller dobbelthus (lodret adskillelse mellem enhederne).'
df.at[70, 'TypeHouse'] = 'Bolig i etageejendom, flerfamiliehus eller to-familiehus'
df.at[80, 'TypeHouse'] = 'Bolig i etageejendom, flerfamiliehus eller to-familiehus'
df.at[181, 'TypeHouse'] = 'Bolig i etageejendom, flerfamiliehus eller to-familiehus'
df.at[212, 'TypeHouse'] = 'Bolig i etageejendom, flerfamiliehus eller to-familiehus'
df['TypeHouse'] = df.TypeHouse.astype('category')
df['TypeHouse'].value_counts()
df.isnull().sum(axis=0)

##################################################
#############          Rooms         #############
##################################################
#df_selected = df[df["Rooms"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df.at[33, 'Rooms'] = 6
df.isnull().sum(axis=0)


##################################################
#############       OuterWallMat     #############
##################################################
df.loc[df.OuterWallMat.isnull(), 'OuterWallMat'] = df.groupby("ZipCity").OuterWallMat.transform(lambda x: x.fillna(x.value_counts().index[0]))
df['OuterWallMat'] = df.OuterWallMat.astype('category')
df['OuterWallMat'].value_counts()
df.isnull().sum(axis=0)

##################################################
#############          Roofing       #############
##################################################
df.loc[df.Roofing.isnull(), 'Roofing'] = df.groupby("ZipCity").Roofing.transform(lambda x: x.fillna(x.value_counts().index[0]))
df['Roofing'] = df.Roofing.astype('category')
df['Roofing'].value_counts()
df.isnull().sum(axis=0)


# Delete houses that are not "Udbudt til salg på Boligsiden" #
##################################################
#############          Time          #############
##################################################
#df_selected = df[df["Time"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected.head(250).to_html()))
df = df[df["Time"].notnull()][df.columns.values]
df.isnull().sum(axis=0)

##################################################
#############      VoteDistrict      #############
##################################################
#df['VoteDistrict'].value_counts()
#df_selected = (df[df["VoteDistrict"].isnull()][df.columns.values])
#df_selected['ZipCity'] = df_selected.ZipCity.astype('category')
#df_selected['ZipCity'].value_counts()

# By looking at http://kbhkort.kk.dk/cbkort? we conclude
#df_2100 = df_selected.loc[df['ZipCity'] == '2100 København Ø']
#df_2150 = df_selected.loc[df['ZipCity'] == '2150 Nordhavn']
#df_2200 = df_selected.loc[df['ZipCity'] == '2200 København N']
#df_2300 = df_selected.loc[df['ZipCity'] == '2300 København S']
#df_2400 = df_selected.loc[df['ZipCity'] == '2400 København NV']
#df_2770 = df_selected.loc[df['ZipCity'] == '2770 Kastrup']

#plt.rcParams["figure.figsize"] = (16,16)
#plt.style.use('ggplot')
#sns.lmplot( x="Longitude", y="Latitude", data=df_2770, fit_reg=False, 
#           legend=False,  height=10, scatter_kws={'alpha':0.2})
#plt.legend(loc='lower right')
#plt.show()

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_2770.head(50).to_html()))
df.loc[(df['VoteDistrict'].isnull()) & (df['ZipCity'] == '2100 København Ø'), 'VoteDistrict'] = '1. Øst'
df.loc[(df['VoteDistrict'].isnull()) & (df['ZipCity'] == '2150 Nordhavn'), 'VoteDistrict'] = '1. Øst'
df.loc[(df['VoteDistrict'].isnull()) & (df['ZipCity'] == '2200 København N'), 'VoteDistrict'] = '5. Nørrebrohallen'
df.loc[(df['VoteDistrict'].isnull()) & (df['ZipCity'] == '2300 København S'), 'VoteDistrict'] = '2. Øst'
df.loc[(df['VoteDistrict'].isnull()) & (df['ZipCity'] == '2400 København NV'), 'VoteDistrict'] = '6. Vest'
df.loc[(df['VoteDistrict'].isnull()) & (df['ZipCity'] == '2770 Kastrup'), 'VoteDistrict'] = '2. Øst'


##################################################
#############      VoterTurnout      #############
##################################################
df['VoterTurnout'].value_counts()
#df_selected = (df[df["VoterTurnout"].isnull()][df.columns.values])
#df_selected['ZipCity'] = df_selected.ZipCity.astype('category')
#df_selected['ZipCity'].value_counts()

# We use https://www.kmdvalg.dk/fv/2019/KMDValgFV.html to conclude
df.loc[(df['VoterTurnout'].isnull()) & (df['ZipCity'] == '2100 København Ø'), 'VoterTurnout'] = 88.7
df.loc[(df['VoterTurnout'].isnull()) & (df['ZipCity'] == '2150 Nordhavn'), 'VoterTurnout'] = 88.7
df.loc[(df['VoterTurnout'].isnull()) & (df['ZipCity'] == '2200 København N'), 'VoterTurnout'] = 83.4
df.loc[(df['VoterTurnout'].isnull()) & (df['ZipCity'] == '2300 København S'), 'VoterTurnout'] = 86.0
df.loc[(df['VoterTurnout'].isnull()) & (df['ZipCity'] == '2400 København NV'), 'VoterTurnout'] = 75.6
df.loc[(df['VoterTurnout'].isnull()) & (df['ZipCity'] == '2770 Kastrup'), 'VoterTurnout'] = 86.0

##################################################
#############      LargestParty      #############
##################################################
df['LargestParty'].value_counts()
# We use https://www.kmdvalg.dk/fv/2019/KMDValgFV.html to conclude
df.loc[(df['LargestParty'].isnull()) & (df['ZipCity'] == '2100 København Ø'), 'LargestParty'] = 'venstre'
df.loc[(df['LargestParty'].isnull()) & (df['ZipCity'] == '2150 Nordhavn'), 'LargestParty'] = 'venstre'
df.loc[(df['LargestParty'].isnull()) & (df['ZipCity'] == '2200 København N'), 'LargestParty'] = 'enhedslisten'
df.loc[(df['LargestParty'].isnull()) & (df['ZipCity'] == '2300 København S'), 'LargestParty'] = 'socialdemokratiet'
df.loc[(df['LargestParty'].isnull()) & (df['ZipCity'] == '2400 København NV'), 'LargestParty'] = 'enhedslisten'
df.loc[(df['LargestParty'].isnull()) & (df['ZipCity'] == '2770 Kastrup'), 'LargestParty'] = 'socialdemokratiet'



##################################################
#############    GeomaticAVMPrice    #############
##################################################

#df_selected = df[df["GeomaticAVMPrice"].isnull()][df.columns.values]
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_selected[['Address', 'ZipCity', 'GeomaticAVMPrice', 'LinkGeo']].head(250).to_html()))

df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Lykkebovej 3'), 'GeomaticAVMPrice'] = 5.62 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Halgreensgade 14'), 'GeomaticAVMPrice'] = 5.20 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Ægirsgade 37, 5.'), 'GeomaticAVMPrice'] = 4.81  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Dronningens Tværgade 50, 3. 3'), 'GeomaticAVMPrice'] = 3.90 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Emil Slomanns Vej 1A, 2. th'), 'GeomaticAVMPrice'] = 4.70 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Vesterbrogade 192, 2. 8'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Emil Slomanns Vej 1A, 1. th'), 'GeomaticAVMPrice'] = 4.65 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Roskildevej 53, 1. 105'), 'GeomaticAVMPrice'] = 2.26 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Falkoner Alle 61, 4. tv'), 'GeomaticAVMPrice'] = 3.80 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Falkoner Alle 57, 4. tv'), 'GeomaticAVMPrice'] = 5.35 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Bülowsvej 40, 3. th'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Alhambravej 5, 2.'), 'GeomaticAVMPrice'] =  9.29 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Gammel Kongevej 139, 1.'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Alhambravej 5, 1. tv'), 'GeomaticAVMPrice'] = 10.88 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Ewaldsensvej 8, st. th'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Venøgade 28, 2. tv'), 'GeomaticAVMPrice'] = 4.92 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Wildersgade 64'), 'GeomaticAVMPrice'] = 12.48 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Bodenhoffs Plads 8, 4. th'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Dronningens Tværgade 6B, 5.'), 'GeomaticAVMPrice'] = 3.20 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Moselgade 34, 3. tv'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Hessensgade 15, st. 14'), 'GeomaticAVMPrice'] = 2.85 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Puggaardsgade 10, 3. th'), 'GeomaticAVMPrice'] = 4.60 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Teglholmsgade 32, st. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Amerika Plads 3A, 2. th'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Gammel Mønt 41, 2. tv'), 'GeomaticAVMPrice'] = 2.73 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Jens Otto Krags Gade 15, 4. 1'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Amerika Plads 3E, 1. th'), 'GeomaticAVMPrice'] = 4.42 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Amerika Plads 3B, 4. th'), 'GeomaticAVMPrice'] = 5.02 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Sallingvej 69, 2. th'), 'GeomaticAVMPrice'] = 1.94  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Adelgade 1, 3. 36'), 'GeomaticAVMPrice'] = 3.06  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Bredgade 35A, 2.'), 'GeomaticAVMPrice'] = 17.47 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Hyltebjerg Allé 43, st. tv'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Venøgade 26, 2. th'), 'GeomaticAVMPrice'] = 5.50 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Venøgade 26, 1. tv'), 'GeomaticAVMPrice'] = 4.02 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Venøgade 28, st. th'), 'GeomaticAVMPrice'] = 4.67 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 18, 2. 2'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 5. 3'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 2. 2'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 8. 3'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Amerika Plads 3D, 2. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Lille Kongensgade 4, 2. tv'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Adelgade 1, 3. 34'), 'GeomaticAVMPrice'] = 3.35 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Klaksvigsgade 14, 1. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Dronningens Tværgade 6B, 4.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 7. 3'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Vigerslevvej 35, st. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 18, 9. 3'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Reventlowsgade 16, 3. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Victor Bendix Gade 20, 2. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Valby Langgade 39, 1. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Store Mølle Vej 17, st. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Horsensgade 6, st. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Robert Jacobsens Vej 6D'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Studiestræde 21, st.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Thoravej 14, 1. 3'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Kristianiagade 22, 1. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Studiestræde 21, 4.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Hamletsgade 4, 2. 159'), 'GeomaticAVMPrice'] =  * 1000000
##df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 11. 1'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helga Larsens Plads 14, 1. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 7. 4'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Hessensgade 15, 2. 212'), 'GeomaticAVMPrice'] = 1.70 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 18, 4. 1'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Doris Lessings Vej 35, 2. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Rådmandsgade 40C, 3. 113'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Thorshavnsgade 23, st. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Trelleborggade 14, st. mf'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Malmøgade 7, 2. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 6. 4'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'C.F. Møllers Allé 30, 7. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Kenny Drews Vej 75, st.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Klerkegade 2F, st. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 7. 1'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Kildevældsgade 56, 1.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Livjægergade 41, 3. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 18, 10. 3'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Hyltebjerg Allé 43, 1. th'), 'GeomaticAVMPrice'] = 2.60  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Doris Lessings Vej 35, 1. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Edvard Thomsens Vej 87, 2. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 8. 4'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 9. 4'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Doris Lessings Vej 35, st. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Doris Lessings Vej 31, 1. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Helsinkigade 20, 5. 4'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Robert Jacobsens Vej 22, 4. mf'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Doris Lessings Vej 33, st. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Bjelkes Allé 34, 1. 25'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Sankt Petersborg Plads 4, 4. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Aggersborggade 9, 4. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Sortedam Dossering 65B, 1.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Nordre Frihavnsgade 20, 5.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 61, st.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 47'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 63, 1.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 69, 3.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 37'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 69, 1.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 65, 1. tv'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Gothersgade 149, 2.'), 'GeomaticAVMPrice'] = 5.93 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Islands Brygge 75B, 3. 1'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Sonnerupvej 78B'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Sønder Boulevard 53, 2. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Lily Brobergs Vej 55, 3. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Ørestads Boulevard 57A, 6. 602'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Rigensgade 9E, 1. th'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Lundedalsvej 36, 1. 2'), 'GeomaticAVMPrice'] = 4.11 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Krudtløbsvej 65'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Trelleborggade 15, 2. 7'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Krudtløbsvej 81D'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Krudtløbsvej 79D'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Amagerfælledvej 35, 1. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Krudtløbsvej 83C'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Bohrsgade 2, 20. 2'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Bohrsgade 2, 29. 3'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Lundsfrydvej 9B, 1. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Sundkaj 85, st. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Amagerfælledvej 166, 6. 4'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Krudtløbsvej 28, st. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Rigensgade 9B, 2. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Stockholmsgade 59, st. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Stockholmsgade 61, 1. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Stockholmsgade 59, 1. mf'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Krudtløbsvej 83A'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Stockholmsgade 59, st. mf'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Trelleborggade 15, 4. 1'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Korsørgade 38, 5. th'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Skovbogårds Allé 11A, 1.'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Venøgade 26, 4. tv'), 'GeomaticAVMPrice'] = 3.81 * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Amerika Plads 3C, 3. tv'), 'GeomaticAVMPrice'] = 4.56 * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Fenrisgade 10, 5.'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Nørregade 37B, 3. tv'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Strandpromenaden 43'), 'GeomaticAVMPrice'] =  * 1000000
#df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Ørestads Boulevard 61A, 5. tv'), 'GeomaticAVMPrice'] =  * 1000000
df.loc[(df['GeomaticAVMPrice'].isnull()) & (df['Address'] == 'Mozartsvej 24, st. th'), 'GeomaticAVMPrice'] = 2.20 * 1000000


##################################################
#############           New          #############
##################################################
df['SquareMetrePrice'] = df.SalePrice / df.WeightedFloorArea
bins = [0, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 75000, 200000]
names = ['<15000','15000-20000','20000-25000','25000-30000', '30000-35000','35000-40000','40000-45000','45000-50000','50000-75000', '75000-100000']
d = dict(enumerate(names,1))
df['SquareMetrePrice_range'] = np.vectorize(d.get)(np.digitize(df['SquareMetrePrice'],bins))
df['GeomaticAVMPricePerMetre'] = df.GeomaticAVMPrice / df.WeightedFloorArea
df['GeomaticAVMPricePerMetre_range'] = np.vectorize(d.get)(np.digitize(df['GeomaticAVMPricePerMetre'],bins))
df['SaleDate'] = pd.to_datetime(df['SaleDate'],format='%Y-%m-%d', errors='coerce')
df['OfferingEnd'] = pd.to_datetime(df['OfferingEnd'],format='%Y-%m-%d', errors='coerce')
df['OfferingStart'] = pd.to_datetime(df['OfferingStart'],format='%Y-%m-%d', errors='coerce')
df['Quarter'] = df['SaleDate'].dt.quarter
df['Year'] = df['SaleDate'].dt.year
df['YearQuarter'] = df['Year'].map(str)+ 'Q' +df['Quarter'].map(str)
df['TurnoverTime'] = (df.OfferingEnd - df.OfferingStart)
df['PriceReduction'] = ((df.AskingPrice / df.SalePrice)-1)*100


print(missing_percentage(df))

df.shape

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


                          Total  Percent
GeomaticAVMPricePerMetre    102     0.28
GeomaticAVMPrice            102     0.28
LandValue                    63     0.17
TaxableValue                 63     0.17
ValDate                      63     0.17


(36492, 53)

In [30]:
# Evaluating the missing values
print(tabulate(missing_percentage(df), tablefmt="latex", floatfmt=".2f"))

\begin{tabular}{lrr}
\hline
 GeomaticAVMPricePerMetre & 102.00 & 0.28 \\
 GeomaticAVMPrice         & 102.00 & 0.28 \\
 LandValue                &  63.00 & 0.17 \\
 TaxableValue             &  63.00 & 0.17 \\
 ValDate                  &  63.00 & 0.17 \\
\hline
\end{tabular}


In [120]:
##########################################
#### Save data without missing values ####
##########################################

# GEM DATA
#df.to_csv('Endeligt data/Boligsiden&DinGeo_full.csv', index=False)

# LOAD DATA
df = pd.read_csv("Endeligt data/Boligsiden&DinGeo_full.csv") 
df.isnull().sum(axis=0)
print(df.shape)

with pd.option_context('display.max_colwidth', -1): 
    display(HTML(df.head(250).to_html()))

(36492, 53)


Unnamed: 0,Address,AskingPrice,BasementArea,Bathrooms,BurglaryRisk,CloudburstRisk,EnergyLabel,FloorAreaBuilding,FloorsTotal,GeomaticAVMPrice,HeatSource,Kitchen,LandValue,LargestParty,Latitude,Levels,LinkBoligsiden,LinkGeo,Longitude,MASL,NoiseMeasurement,OfferingEnd,OfferingStart,OuterWallMat,RadonRisk,Roofing,Rooms,SaleDate,SalePrice,TaxableValue,Time,Toilets,TotalFloorArea,Type,TypeHouse,ValDate,VoteDistrict,VoterTurnout,WeightedFloorArea,WorthPreserving,YearBuilt,YearRemodAdd,ZipCity,PriceReduction,HasMultipleLevels,SquareMetrePrice,SquareMetrePrice_range,GeomaticAVMPricePerMetre,GeomaticAVMPricePerMetre_range,Quarter,Year,YearQuarter,TurnoverTime
0,"Bentzonsvej 37, 2. tv",3395000.0,0.0,1.0,lav,kan være risiko,D,685.0,5.0,3402740.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),157200.0,venstre,55.685782,1.0,https://www.boligsiden.dk/boligen/147-8570,https://www.dingeo.dk/adresse/2000-Frederiksberg/Bentzonsvej-37/2-tv,12.529234,12.1,Ingen trafikstøj,2017-04-06,2017-03-20,"Mursten (tegl, kalksten, cementsten)",Meget lav,Built-up,3.0,2017-03-25,3395000.0,1550000.0,20-03-2017 - 06-04-2017,1.0,67.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Nyelandsvej",89.2,67.0,Mangler,1902.0,1902.0,2000 Frederiksberg,0.0,0,50671.641791,50000-75000,50787.164179,50000-75000,1,2017,2017Q1,17 days 00:00:00.000000000
1,"Lange-Müllers Gade 27, 1. th",2395000.0,0.0,1.0,lav,kan være risiko,Mangler,3366.0,5.0,2927600.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),106900.0,radikale,55.712541,1.0,https://www.boligsiden.dk/boligen/101-336123,https://www.dingeo.dk/adresse/2100-københavn-ø/lange--müllers-gade-27/1-th/,12.566848,11.4,55-60 dB,2016-08-29,2016-08-01,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2016-08-25,2250000.0,1300000.0,01-08-2016 - 29-08-2016,1.0,65.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,65.0,Mangler,1932.0,1932.0,2100 København Ø,6.444444,0,34615.384615,30000-35000,45040.0,45000-50000,3,2016,2016Q3,28 days 00:00:00.000000000
2,"Amsterdamvej 3, 3.",3495000.0,0.0,1.0,lav,kan være risiko,Mangler,1316.0,3.0,4775417.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),144300.0,socialdemokratiet,55.64873,1.0,https://www.boligsiden.dk/boligen/101-24059,https://www.dingeo.dk/adresse/2300-København-S/Amsterdamvej-3/3,12.61399,4.8,Mangler,2016-11-01,2016-07-10,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,6.0,2016-10-07,3400000.0,2050000.0,10-07-2016 - 01-11-2016,1.0,123.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,4. Syd,83.0,123.0,Mangler,1929.0,1929.0,2300 København S,2.794118,0,27642.276423,25000-30000,38824.528455,35000-40000,4,2016,2016Q4,114 days 00:00:00.000000000
3,"Langelandsvej 20B, 1. tv",4295000.0,0.0,1.0,lav,kan være risiko,D,1791.0,5.0,4292535.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),169300.0,venstre,55.684792,1.0,https://www.boligsiden.dk/boligen/147-71132,https://www.dingeo.dk/adresse/2000-Frederiksberg/Langelandsvej-20B/1-tv,12.530195,12.5,60-65 dB,2018-08-29,2018-08-01,"Mursten (tegl, kalksten, cementsten)",Meget lav,"Fibercement, herunder asbest (bølge- eller skifer-eternit)",4.0,2018-08-27,4295000.0,1950000.0,01-08-2018 - 29-08-2018,1.0,83.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Nyelandsvej",89.2,83.0,Mangler,1908.0,1908.0,2000 Frederiksberg,0.0,0,51746.987952,50000-75000,51717.289157,50000-75000,3,2018,2018Q3,28 days 00:00:00.000000000
4,"Porcelænshaven 5G, st. tv",5395000.0,0.0,2.0,lav,kan være risiko,C,5760.0,5.0,4187106.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),405500.0,venstre,55.677771,1.0,https://www.boligsiden.dk/boligen/147-257946,https://www.dingeo.dk/adresse/2000-Frederiksberg/Porcelænshaven-5G/st-tv,12.523563,14.5,55-60 dB,2014-07-12,2014-05-22,"Mursten (tegl, kalksten, cementsten)",Meget lav,"Metalplader (bølgeblik, aluminium, o.lign.)",3.0,2014-08-26,5395000.0,2500000.0,22-05-2014 - 12-07-2014,2.0,99.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"11. Kreds, Rådhuset",84.4,99.0,Mangler,2006.0,2006.0,2000 Frederiksberg,0.0,0,54494.949495,50000-75000,42294.0,40000-45000,3,2014,2014Q3,51 days 00:00:00.000000000
5,"Lange-Müllers Gade 20, st. th",3245000.0,0.0,1.0,lav,kan være risiko,Mangler,4537.0,5.0,3229777.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),120400.0,radikale,55.712375,1.0,https://www.boligsiden.dk/boligen/101-335534,https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/st-th/,12.567279,11.6,Ingen trafikstøj,2018-07-08,2018-07-05,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,3.0,2018-07-08,3200000.0,1600000.0,05-07-2018 - 08-07-2018,1.0,88.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,88.0,Mangler,1926.0,1926.0,2100 København Ø,1.40625,0,36363.636364,35000-40000,36702.011364,35000-40000,3,2018,2018Q3,3 days 00:00:00.000000000
6,"Lange-Müllers Gade 22, 3. th",3420000.0,0.0,1.0,lav,kan være risiko,Mangler,4537.0,5.0,3929067.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),120400.0,radikale,55.712548,1.0,https://www.boligsiden.dk/boligen/101-335828,https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/3-th,12.567231,11.5,Ingen trafikstøj,2016-06-02,2016-01-20,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,3.0,2016-03-26,3400000.0,1750000.0,20-01-2016 - 02-06-2016,1.0,87.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,1. Nordvest,83.0,87.0,Mangler,1926.0,1926.0,2100 København Ø,0.588235,0,39080.45977,35000-40000,45161.689655,45000-50000,1,2016,2016Q1,134 days 00:00:00.000000000
7,"Rumæniensgade 12, 5. th",1595000.0,0.0,1.0,lav,kan være risiko,C,3263.0,5.0,2163203.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),112600.0,socialdemokratiet,55.659712,1.0,https://www.boligsiden.dk/boligen/101-472701,https://www.dingeo.dk/adresse/2300-København-S/Rumæniensgade-12/5-th,12.612983,4.0,Ingen trafikstøj,2014-03-04,2014-02-24,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,2.0,2014-03-02,1600000.0,940000.0,24-02-2014 - 04-03-2014,1.0,53.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,4. Sundbyøster,81.4,53.0,Mangler,1935.0,1935.0,2300 København S,-0.3125,0,30188.679245,30000-35000,40815.150943,40000-45000,1,2014,2014Q1,8 days 00:00:00.000000000
8,"Vesterbrogade 188, 1. th",4580000.0,0.0,1.0,lav,kan være risiko,B,1421.0,7.0,4197590.0,Fjernvarme/blokvarme (radiatorsystemer el. varmluftanlæg),Eget køkken (med afløb og kogeinstallation),669900.0,venstre,55.670691,1.0,https://www.boligsiden.dk/boligen/147-259072,https://www.dingeo.dk/adresse/1800-Frederiksberg-C/Vesterbrogade-188/1-th,12.534636,12.4,65-70 dB,2018-06-11,2018-06-07,"Mursten (tegl, kalksten, cementsten)",Meget lav,Built-up,2.0,2018-06-05,4200000.0,2700000.0,07-06-2018 - 11-06-2018,1.0,91.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,"10. Kreds, Ny Hollænder",89.9,91.0,Mangler,2014.0,2014.0,1800 Frederiksberg C,9.047619,0,46153.846154,45000-50000,46127.362637,45000-50000,2,2018,2018Q2,4 days 00:00:00.000000000
9,"Sankt Peders Stræde 18, 2. th",2695000.0,0.0,1.0,lav,kan være risiko,Mangler,684.0,4.0,4213507.0,"Elovne, elpaneler",Eget køkken (med afløb og kogeinstallation),264700.0,radikale,55.679088,1.0,https://www.boligsiden.dk/boligen/101-488020,https://www.dingeo.dk/adresse/1453-København-K/Sankt-Peders-Stræde-18/2-th,12.568494,8.6,60-65 dB,2009-04-21,2009-01-18,"Mursten (tegl, kalksten, cementsten)",Meget lav,Tegl,3.0,2014-03-02,3100000.0,1800000.0,18-01-2009 - 21-04-2009,1.0,79.0,Ejerlejlighed,"Bolig i etageejendom, flerfamiliehus eller to-familiehus",2018-01-10,3. Indre By,76.9,79.0,Mangler,1798.0,1798.0,1453 København K,-13.064516,0,39240.506329,35000-40000,53335.531646,50000-75000,1,2014,2014Q1,93 days 00:00:00.000000000


### Categorical variables - inspection

In [34]:
##################################################
#### Inspection of the categorical variables #####
##################################################

df = pd.read_csv("Endeligt data/Boligsiden&DinGeo_full.csv") 

##################################################
#####               Bathrooms                #####
##################################################
#print(df['Bathrooms'].value_counts())
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df.loc[df.Bathrooms == -1].to_html()))

# 5 cases where df.Bathrooms == -1
df.loc[(df.Bathrooms == -1) & (df['Address'] == 'Sortedam Dossering 65B, st.'), 'Bathrooms'] = 2
df.loc[(df.Bathrooms == -1) & (df['Address'] == 'Nordre Digevej 43'), 'Bathrooms'] = 2
df.loc[(df.Bathrooms == -1) & (df['Address'] == 'Kløverbladsgade 46, 2.'), 'Bathrooms'] = 2
df.loc[(df.Bathrooms == -1) & (df['Address'] == 'Trondhjemsgade 3, 1. th'), 'Bathrooms'] = 3
df.loc[(df.Bathrooms == -1) & (df['Address'] == 'Rodosvej 60, 1.'), 'Bathrooms'] = 1
#df.Bathrooms.astype('category')
ordered_Bathrooms = list(range(0, 5))
df['Bathrooms'] = df.Bathrooms.astype('category').cat.set_categories(ordered_Bathrooms, ordered = True)
df['Bathrooms'].value_counts()
# NB: many houses with 0 bathrooms
#df.loc[(df.Bathrooms == 0) & (df['TotalFloorArea'] > 80)] 


##################################################
#####            BurglaryRisk                #####
##################################################
#df['BurglaryRisk'].value_counts()
ordered_BurglaryRisk = ['lav','mellem','høj','meget høj']
df['BurglaryRisk'] = df.BurglaryRisk.astype('category').cat.set_categories(ordered_BurglaryRisk, ordered = True)
df['BurglaryRisk'].value_counts()


##################################################
#####             CloudburstRisk             #####
##################################################
df['CloudburstRisk'].value_counts()
ordered_CloudburstRisk = ['er lav risiko','kan være risiko','er høj risiko']
df['CloudburstRisk'] = df.CloudburstRisk.astype('category').cat.set_categories(ordered_CloudburstRisk, ordered = True)
df['CloudburstRisk'].value_counts()

##################################################
#####               EnergyLabel              #####
##################################################
#df.EnergyLabel.loc[df.EnergyLabel == 'Mangler'] = np.nan
#ordered_EnergyLabel = ['0','2','5','A','B','C','D','E','F','G']
#df['EnergyLabel'] = df.EnergyLabel.astype('category').cat.set_categories(ordered_EnergyLabel, ordered = True)
#df['EnergyLabel'].value_counts()


##################################################
#####               FloorsTotal              #####
##################################################
df['FloorsTotal'].value_counts()
df.loc[df.FloorsTotal == 65, 'FloorsTotal'] = 5
df.loc[df.FloorsTotal == 23, 'FloorsTotal'] = 10
df.loc[df.FloorsTotal == 0, 'FloorsTotal'] = 1
df['FloorsTotal'].value_counts()
ordered_FloorsTotal = list(range(1, 31))
df['FloorsTotal'] = df.FloorsTotal.astype('category').cat.set_categories(ordered_FloorsTotal, ordered = True)
df['FloorsTotal'].value_counts()

##################################################
#####                HeatSource              #####
##################################################
df['HeatSource'].value_counts()
df['HeatSource'] = df.HeatSource.astype('category')

##################################################
#####           HasMultipleLevels            #####
##################################################
df['HasMultipleLevels'] = df.HasMultipleLevels.astype('category')


##################################################
#####                 Kitchen                #####
##################################################
df['Kitchen'].value_counts()
ordered_Kitchen= ['Eget køkken (med afløb og kogeinstallation)',
                  'Fast kogeinstallation i værelse eller på gang',
                  'Adgang til fælles køkken',
                  'Ingen fast kogeinstallation']
df['Kitchen'] = df.Kitchen.astype('category').cat.set_categories(ordered_Kitchen, ordered = True)
df['Kitchen'].value_counts()


##################################################
#####             LargestParty               #####
##################################################
df['LargestParty'].value_counts()
ordered_LargestParty = ['enhedslisten', 'socialdemokratiet', 'radikale', 'venstre']
df['LargestParty'] = df.LargestParty.astype('category').cat.set_categories(ordered_LargestParty, ordered = True)
df['LargestParty'].value_counts()


##################################################
#####                 Levels                 #####
##################################################
df['Levels'].value_counts()
ordered_Levels = [1,2,3]
df['Levels'] = df.Levels.astype('category').cat.set_categories(ordered_Levels, ordered = True)
df['Levels'].value_counts()


##################################################
#####             NoiseMeasurement           #####
##################################################
df['NoiseMeasurement'].value_counts()
# 40 x "Vi mangler desværre at indsamle trafikstøj på adressen."
# -> antag: ingen støj
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df.loc[df.NoiseMeasurement == 'Mangler'].to_html()))
df.NoiseMeasurement.loc[df.NoiseMeasurement == 'Mangler'] = 'Ingen trafikstøj'
ordered_NoiseMeasurement = ['Ingen trafikstøj','55-60 dB','60-65 dB','65-70 dB','70-75 dB','over 75 dB']
df['NoiseMeasurement'] = df.NoiseMeasurement.astype('category').cat.set_categories(ordered_NoiseMeasurement, ordered = True)
df['NoiseMeasurement'].value_counts()


##################################################
#####               OuterWallMat             #####
##################################################
df['OuterWallMat'].value_counts()
df['OuterWallMat'] = df.OuterWallMat.astype('category')
df['OuterWallMat'].value_counts()

##################################################
#####               RadonRisk                #####
##################################################
df['RadonRisk'].value_counts()
ordered_RadonRisk = ['Meget lav','lav','medium','høj','meget høj']
df['RadonRisk'] = df.RadonRisk.astype('category').cat.set_categories(ordered_RadonRisk, ordered = True)
df['RadonRisk'].value_counts()


##################################################
#####                 Roofing                #####
##################################################
df['Roofing'].value_counts()
df['Roofing'] = df.Roofing.astype('category')


##################################################
#####                 Rooms                  #####
##################################################
df['Rooms'].value_counts()
df.loc[(df.Rooms == -1) & (df['Address'] == 'Astrupvej 40'), 'Rooms'] = 4
df.loc[(df.Rooms == -2) & (df['Address'] == 'Kildeløbet 6'), 'Rooms'] = 7
df.loc[(df.Rooms == -4) & (df['Address'] == 'Italiensvej 88'), 'Rooms'] = 11
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df.loc[df.Rooms == 0].to_html()))
df.loc[(df.Rooms == 0) & (df['Address'] == 'Tybjergvej 8'), 'Rooms'] = 7
df.loc[(df.Rooms == 0) & (df['Address'] == 'Risvangen 3'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Dronning Elisabeths Allé 5'), 'Rooms'] = 9
df.loc[(df.Rooms == 0) & (df['Address'] == 'Krogebjerg 39'), 'Rooms'] = 6
df.loc[(df.Rooms == 0) & (df['Address'] == 'Bækkeskovvej 18'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Bjørnsonsvej 17'), 'Rooms'] = 3
df.loc[(df.Rooms == 0) & (df['Address'] == 'Bulbjergvej 23'), 'Rooms'] = 5
df.loc[(df.Rooms == 0) & (df['Address'] == 'Mosesvinget 70'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Skensvedvej 41'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Kongovej 9'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Bavnevangen 13'), 'Rooms'] = 6
df.loc[(df.Rooms == 0) & (df['Address'] == 'Skolevangen 14'), 'Rooms'] = 6
df.loc[(df.Rooms == 0) & (df['Address'] == 'Bakkekammen 21'), 'Rooms'] = 7
df.loc[(df.Rooms == 0) & (df['Address'] == 'Tryggevældevej 14'), 'Rooms'] = 6
df.loc[(df.Rooms == 0) & (df['Address'] == 'Brunevang 8'), 'Rooms'] = 6
df.loc[(df.Rooms == 0) & (df['Address'] == 'Nøddehaven 48'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Per Døvers Vej 5'), 'Rooms'] = 5
df.loc[(df.Rooms == 0) & (df['Address'] == 'Olufsvej 38'), 'Rooms'] = 6
df.loc[(df.Rooms == 0) & (df['Address'] == 'Clematisvej 5'), 'Rooms'] = 5
df.loc[(df.Rooms == 0) & (df['Address'] == 'Wiedeweltsgade 45'), 'Rooms'] = 6
df.loc[(df.Rooms == 0) & (df['Address'] == 'Küchlersgade 14'), 'Rooms'] = 10
df.loc[(df.Rooms == 0) & (df['Address'] == 'Grøndalsvænge Allé 5'), 'Rooms'] = 8
df.loc[(df.Rooms == 0) & (df['Address'] == 'Bækkeskovvej 46'), 'Rooms'] = 5
df.loc[(df.Rooms == 0) & (df['Address'] == 'Portlandsvej 19'), 'Rooms'] = 5
df.loc[(df.Rooms == 0) & (df['Address'] == 'Svend Dyrings Vej 8'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Olufsvej 12'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Vejlands Allé 17'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Risvangen 29'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Æginavej 21'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Langagervej 28'), 'Rooms'] = 5
df.loc[(df.Rooms == 0) & (df['Address'] == 'Hellelidenvej 10'), 'Rooms'] = 3
df.loc[(df.Rooms == 0) & (df['Address'] == 'Rødtjørnevej 7'), 'Rooms'] = 5
df.loc[(df.Rooms == 0) & (df['Address'] == 'Iranvej 12B'), 'Rooms'] = 3
df.loc[(df.Rooms == 0) & (df['Address'] == 'Vestervang 65'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Højlandsvangen 51'), 'Rooms'] = 4
df.loc[(df.Rooms == 0) & (df['Address'] == 'Selsøvej 14'), 'Rooms'] = 6
ordered_Rooms = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,17]
df['Rooms'] = df.Rooms.astype('category').cat.set_categories(ordered_Rooms, ordered = True)
df['Rooms'].value_counts()


##################################################
#####                 Toilets                #####
##################################################
df['Toilets'].value_counts()
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df.loc[df.Toilets == -1].to_html()))
df.loc[(df.Toilets == -1) & (df['Address'] == 'Kristian Zahrtmanns Plads 81, 3. tv'), 'Toilets'] = 2
df.loc[(df.Toilets == -1) & (df['Address'] == 'Nordre Digevej 43'), 'Toilets'] = 2
df.loc[(df.Toilets == -1) & (df['Address'] == 'Kildebrøndevej 37'), 'Toilets'] = 2
df.loc[(df.Toilets == -1) & (df['Address'] == 'Kløverbladsgade 46, 2.'), 'Toilets'] = 2
df.loc[(df.Toilets == -1) & (df['Address'] == 'Rodosvej 60, 1.'), 'Toilets'] = 1
df['Toilets'].value_counts()
len(df.loc[(df.Toilets == -0) & (df['Bathrooms'] == 0), 'Bathrooms'])
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df.loc[df.Toilets == 22].to_html()))
ordered_Toilets = [0,1,2,3,4,5,22]
df['Toilets'] = df.Toilets.astype('category').cat.set_categories(ordered_Toilets, ordered = True)
df['Toilets'].value_counts()

    
##################################################
#####                  Type                  #####
##################################################
df.loc[df.Type == 'Andelsbolig', 'Type'] = 'Ejerlejlighed'
df['Type'] = df.Type.astype('category')
df['Type'].value_counts()


##################################################
#####               TypeHouse                #####
##################################################
df['TypeHouse'].value_counts()
# NB: 0 stands for "Uoplyst"
df.loc[df.TypeHouse == '0', 'TypeHouse'] = 'Uoplyst'
df['TypeHouse'] = df.TypeHouse.astype('category')
df['TypeHouse'].value_counts()


##################################################
#####               VoteDistrict             #####
##################################################
df['VoteDistrict'] = df['VoteDistrict'].astype('category')
df['VoteDistrict'].value_counts()


##################################################
#####            WorthPreserving             #####
##################################################
df['WorthPreserving'].value_counts()
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df.loc[df.WorthPreserving == '0'].head(20).to_html()))
# Obs: Manglende bevaringsværdi indikerer naturligvis ikke noget om husets brugsværdi. Den manglende bevaringsværdi kan desuden skyldes at kommunen kun i mindre omfang har registreret bygningers bevaringsværdi.
df.WorthPreserving.loc[df.WorthPreserving == 'Mangler'] = '0'
df.WorthPreserving.loc[df.WorthPreserving == 'Ukendt'] = '0'
df['WorthPreserving'] = pd.to_numeric(df['WorthPreserving'])
#df['WorthPreserving'].value_counts()
ordered_WorthPreserving = [0, 9, 8, 7, 6, 5, 4, 3, 2, 1]
df['WorthPreserving'] = df.WorthPreserving.astype('category').cat.set_categories(ordered_WorthPreserving, ordered = True)
df['WorthPreserving'].value_counts()


##################################################
#####                ZipCity                 #####
##################################################
df['ZipCode'] = df['ZipCity'].astype(str).str[0:4]
df['City'] = df['ZipCity'].astype(str).str[5:]

df['City'].value_counts()
# Delete Rødovre (=5), Hellerup (=252), Kastrup (=13)
df = (df[df['City'] != 'Rødovre'])
df = (df[df['City'] != 'Hellerup'])
df = (df[df['City'] != 'Kastrup'])
df['City'] = df['City'].astype('category')
df['City'].value_counts()


df.isnull().sum(axis=0)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


Address                             0
AskingPrice                         0
BasementArea                        0
Bathrooms                           0
BurglaryRisk                        0
CloudburstRisk                      0
EnergyLabel                         0
FloorAreaBuilding                   0
FloorsTotal                         0
GeomaticAVMPrice                  102
HeatSource                          0
Kitchen                             0
LandValue                          63
LargestParty                        0
Latitude                            0
Levels                              0
LinkBoligsiden                      0
LinkGeo                             0
Longitude                           0
MASL                                0
NoiseMeasurement                    0
OfferingEnd                         0
OfferingStart                       0
OuterWallMat                        0
RadonRisk                           0
Roofing                             0
Rooms       

In [35]:
data_crosstab = pd.crosstab(df['TypeHouse'], 
                            df['Type'],  
                            margins = False) 
(data_crosstab) 

print(data_crosstab.to_latex(index=True))  
data_crosstab

\begin{tabular}{lrrr}
\toprule
Type &  Ejerlejlighed &  Rækkehus &  Villa \\
TypeHouse                                          &                &           &        \\
\midrule
Bolig i etageejendom, flerfamiliehus eller to-f... &          30992 &         0 &      0 \\
Dobbelthus                                         &              1 &        18 &      0 \\
Fritliggende enfamilieshus (parcelhus).            &             22 &         3 &   2892 \\
Række- og kædehus                                  &             28 &        72 &      0 \\
Række-, kæde- eller dobbelthus (lodret adskille... &            332 &      1374 &      1 \\
Uoplyst                                            &            412 &         8 &     67 \\
\bottomrule
\end{tabular}



Type,Ejerlejlighed,Rækkehus,Villa
TypeHouse,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Bolig i etageejendom, flerfamiliehus eller to-familiehus",30992,0,0
Dobbelthus,1,18,0
Fritliggende enfamilieshus (parcelhus).,22,3,2892
Række- og kædehus,28,72,0
"Række-, kæde- eller dobbelthus (lodret adskillelse mellem enhederne).",332,1374,1
Uoplyst,412,8,67


In [36]:
print(df['EnergyLabel'].value_counts())


D          13342
C           7112
Mangler     6499
E           4017
B           2317
A           1277
F            991
G            350
5            169
0            140
2              8
Name: EnergyLabel, dtype: int64


In [37]:
# Missing Energylabel: 
# Det kan skyldes at ejendommen ikke endnu er energimærket, eller, at det tidligere energimærke ikke længere er gyldigt.
# Can also indicate 

df.sort_values(by=['Address'], inplace=True)

df.loc[(df['Address'].str.contains("Lange-Müllers Gade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Abildvang")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Admiralgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Agerbo 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Aggersvoldvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ahornskellet")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Ahrenkildes Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Aksel Møllers Have")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Albaniensgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Allegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Allersgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Amager Boulevard")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] > 2010), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Amager Boulevard")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Amager Boulevard")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2012), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Amager Boulevard")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2013), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Amager Strandvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorsTotal'] == 14), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Amager Strandvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorsTotal'] == 3), 'EnergyLabel'] = '2'
df.loc[(df['Address'].str.contains("Amager Strandvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Amagerbrogade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Amagerfælledvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1915), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Amagerfælledvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2018), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Amagerfælledvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2019), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Amagergade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Amalie Skrams Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Amaliegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Amicisvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Amalievej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Amsterdamvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1929), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Amsterdamvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Andreas Bjørns Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1901), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ansgars Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Antwerpengade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Bispeengen")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bissensgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Bjelkes Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Askeskellet")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Asminderødgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Astrupvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Backersvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Badstuestræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Bardenflethsgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Bedfordvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Belfastvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Berggreensgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bernhard Bangs Alle")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Betty Nansens Alle")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Biens Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Billedvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Birkedommervej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Birkegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Birkeskellet")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bispebjerg Parkallé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Blankavej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bogholder Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Borgmester Christiansens Gade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Borups Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1913), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Borups Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1991), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Borups Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2014), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Borups Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1956), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Borups Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1921), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bjernedevej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Blegdamsvej 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Bodenhoffs Plads")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Boeslundevej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Bohlendachvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Boldhusgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Boltonvej 21B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Borgergade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Borghaven")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Borgmester Fischers Vej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Borgmester Jensens Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Borrebyvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Borups Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bramslykkevej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bratskovvej 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bredgade 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Bredgade 47")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Bredgade 51")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Breidablik Allé 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Breidablik Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Breidablik Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Breidablik Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Brigadevej 20A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bristol Allé 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bristol Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bristol Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Bristol Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bristol Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Brobergsgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Brolæggerstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Brydes Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bryggervangen")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 49C")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Brønshøjholms Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Burmeistersgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Buster Larsens Vej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Byvangen 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Bådsmandsstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Bülowsvej 1,")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Bülowsvej 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Bülowsvej 40")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bülowsvej 5C")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Bülowsvej 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Bülowsvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 12,")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 2A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '2'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 2C")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '2'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé 2D")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '2'
df.loc[(df['Address'].str.contains("C.F. Møllers Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("C.F. Richs Vej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("C.J. Frandsens Vej 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Carit Etlars Vej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Carl Feilbergs Vej 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Carl Langes Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2012), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Carl Langes Vej 55")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Carl Nielsens Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Carl Plougs Vej 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Carl Th. Zahles Gade 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Carstensgade 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Christen Bergs Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Christian II's Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Christian Richardts Vej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade 33")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade 35")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade 37")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade 39")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Constantin Hansens Gade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Cæciliavej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dag Hammarskjölds Allé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dag Hammarskjölds Allé 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dag Hammarskjölds Allé 33")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dag Hammarskjölds Allé 42D")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dag Hammarskjölds Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dalføret")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dalmosevej 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Danas Plads 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Danasvej 2A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Danhaven 22A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Danneskiold-Samsøes Allé 34A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Danshøjvej 49")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Degnemose Allé 14B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Degnemose Allé 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Degnemose Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Degnemose Allé 27B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Degnemose Allé 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Degnemose Allé 2A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Degnemose Allé 69")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Degnemose Allé 72")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Degneæblevej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Delosvej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dirch Passers Allé 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dirch Passers Allé 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dirch Passers Allé 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dirch Passers Allé 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dirch Passers Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Doris Lessings Vej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Dronning Dagmars Allé 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Dronning Dagmars Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dronning Dagmars Allé 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Dronning Dagmars Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Dronning Elisabeths Allé 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Dronning Elisabeths Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dronning Olgas Vej 18A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Dronningens Tværgade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1956), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dronningens Tværgade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1957), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dronningens Tværgade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] < 1850), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Dronningens Tværgade 37")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dronningens Tværgade 39")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dronningens Tværgade 41")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dronningens Tværgade 61")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Dronningensgade 61")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Dronningensvej 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Drosselvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dublinvej 2A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Duevej 111A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Duevej 114")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Duevej 116")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Duevej 118")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Duevej 120A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Duevej 120B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dybbølsgade 43")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dybbølsgade 45")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dybendalsvej 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Dybendalsvej 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Dybensgade 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Dybensgade 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Dybensgade 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Dyrkøb 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Dyssevænget 29A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Dyssevænget 34B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Dyssevænget 61")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ebbe Rodes Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Edvard Glæsels Vej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Edvard Thomsens Vej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Egernvej 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Egernvej 75")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Egeskellet")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Eliasgade 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Ellebjergvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Enemærket 4A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Engdals Allé 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Engdals Allé 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Engdals Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Engdraget 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Englandsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1936), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Englodden 18B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Engskiftevej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Engsvinget 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Engvej 48A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Enigheds Allé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Enigheds Allé 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Enigheds Allé 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Enigheds Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Enigheds Allé 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Enigheds Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Enigheds Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Eriksholmvej 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Esbern Snares Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1904), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Esbern Snares Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1900), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Esbern Snares Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1904), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Esplanaden 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Finsensvej 81")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Fiolstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Firkløvervej 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fladstjernevej 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Flakholmen")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1938), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Flensborggade 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Flinterenden 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Floras Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Floras Allé 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Floras Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Florensvej 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Florensvej 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Flyhangargade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Forchhammersvej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frankrigsgade 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frankrigsgade 31")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frankrigshusene")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1936), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fredensborggade 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fredericiagade 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Fredericiagade 12A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Fredericiagade 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Frederiksberg Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2005), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Frederiksberg Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1888), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Frederiksborgvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Frederiksgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Frederiksgårds Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1934), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frederikssundsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1932), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frederikssundsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1952), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frederiksgårds Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frederiksgårds Allé 16B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frederiksvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1904), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frederiksvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1902), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Frilands Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frilands Allé 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frilands Allé 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frilands Allé 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Frilands Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Frilands Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Frimestervej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1937), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Frimestervej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1935), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frydendalsvej 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frederiksvej 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Frydendalsvej 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fuglsang Allé 110")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fuglsang Allé 112")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Fuglsang Allé 119")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fuglsang Allé 122")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fuglsang Allé 130")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fuglsang Allé 144")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fuglsang Allé 71")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fuglsang Allé 75")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fuglsang Allé 76")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Fuglsang Allé 79")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fuglsang Allé 80")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fuglsang Allé 85")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fuglsang Allé 87")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Fælleddiget")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1978), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Galionsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1774), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Gammel Mønt")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Gammel Strand 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Gasværksvej 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Gaunøvej 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Gerbrandsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Gernersgade 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Gernersgade 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Gimles Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1975), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Glentevej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2014), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Godthåbsvej 102")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Godthåbsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1938 ), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Godthåbsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1943 ), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Godthåbsvej 79B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Gothersgade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1883 ), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Grevingevej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Grundtvigsvej 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Gråbrødrestræde 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Gråbrødretorv")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Gråbynkevej 1B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Grøndals Parkvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1938), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Grøndalsvej 57B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Grøndalsvænge Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Grøndalsvænge Allé 70")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Grønnegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Grønnehave Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Grønnehave Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Grønnehave Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Grønnehave Allé 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Grønnehave Allé 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Grønnehave Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Grønnemose Allé 13B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Grønnemose Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Grønningen 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Grønningen 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Gudenåvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1938), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Guldborgvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1932), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Gyldenløvesgade 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Gærdebred 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Gærdebred 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Göteborg Plads")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2017), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("H.C. Ørsteds Vej 20A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("H.C. Ørsteds Vej 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("H.C. Ørsteds Vej 54A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("H.C. Ørsteds Vej 60A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("H.C. Ørsteds Vej 60B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("H.C. Ørsteds Vej 62")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("H.V. Nyholms Vej 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("H.V. Nyholms Vej 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("H.V. Nyholms Vej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hagbardvej 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hagbardvej 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Halfdansgade 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Halfdansgade 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hallandsgade 10A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Halmtorvet")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2008), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Halmtorvet 29C,")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Hamletsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1947), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hamletsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1975 ), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hammelstrupvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2006), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Hammelstrupvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1951), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hammerensgade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hannemanns Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2016), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Harespringet 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Harrestrup Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Harrestrup Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Harrestrup Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Harrestrup Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Hauser Plads")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Havdrupvej 104")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Havdrupvej 112B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Havdrupvej 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Haydnsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Heimdalsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Heklas Allé 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Heklas Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Helgesensgade 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Helikonsvej 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hellasvej 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hellelidenvej 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Helsingborggade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1999), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Helsinkigade 6A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Henrik Steffens Vej 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Henrik Steffens Vej 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Herluf Trolles Gade 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Herman Triers Plads 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hessensgade 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hessensgade 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hessensgade 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hilmar Baunsgaards Boulevard")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2014), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Hilmar Baunsgaards Boulevard 48")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Himmerlandsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1936 ), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hindegade 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hindegade 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hjortholms Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Hjortholms Allé 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hjortholms Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hjortholms Allé 48")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Holger Danskes Vej 106A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hollands Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Hollands Allé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hollands Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hollands Allé 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Hollands Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Hollænderdybet")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1918), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Holmbladsgade 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Holsteinsgade 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Horsekildevej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1973), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Horsekildevej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1932), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Horsekildevej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1958), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hostrups Vænge 5A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Houmanns Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1938), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Howitzvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1941), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hulgårdsvej 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Husumvej 65")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hvedevej 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hvedevej 43")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Hvidkildevej 95")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Hyltebjerg Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hyltebjerg Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1952), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Hyltebjerg Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1976), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hyltebjerg Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1967), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Hyskenstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Händelsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Håbets Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Håbets Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Håbets Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Håbets Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Håbets Allé 31A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Håbets Allé 40")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Håbets Allé 47")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Håbets Allé 48")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Håbets Allé 58")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Håbets Allé 62")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Håbets Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Højbro Plads 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Højdevangs Allé 34")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Højdevangs Allé 36")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Højstrupvej 154")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Ib Schønbergs Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2009), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Ilfordvej 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Indiakaj")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1999), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Indiakaj")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1994), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Ingolfs Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Ingolfs Allé 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ingolfs Allé 41")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Ingolfs Allé 45")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ingolfs Allé 48")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Ib Schønbergs Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Islands Brygge")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2002), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Italiensvej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Italiensvej 83")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Islands Brygge")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2002), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("J.P.E. Hartmanns Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("J.P.E. Hartmanns Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("J.P.E. Hartmanns Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("J.P.E. Hartmanns Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("J.P.E. Hartmanns Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("J.P.E. Hartmanns Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jagtvej 57")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jagtvej 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jakob Dannefærds Vej 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jansvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1938), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jansvej 38B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Jeppes Allé 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Jernbane Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jernbane Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1974), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jernbane Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1905), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Jernbane Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1907), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jernbane Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1975), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Jernbane Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1929), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jernbane Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1932), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Johannes V. Jensens Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1979), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Johannes V. Jensens Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1971), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Julius Bloms Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1906), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Julius Bloms Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1908), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Julius Thomsens Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1932), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jydeholmen 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Jyllingevej 100")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Jyllingevej 108")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kabbelejevej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1938), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kabbelejevej 14B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kabbelejevej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kagsvang 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kaktusvej 41")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Karensgade 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Karlstads Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kastanie Allé 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kastanie Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kastanie Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kastelsvej 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Kastrupvej 82")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1936), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kattesundet 14A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Kenny Drews Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2008), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kielgade 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Kildeløbet 59")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Kildevældsgade 74")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kastrupvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1936), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kilevej 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 36")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 37")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 39B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 43")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 47")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kirkebjerg Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Kirkegårdsvej 44")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kirkegårdsvej 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kjeldsgårdsvej 39")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Klerkegade 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Klingseyvej 30C")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Klosterstræde")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] < 1830), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Knabrostræde")) & (df['EnergyLabel'] == 'Mangler') & (df['YearBuilt'] < 1840), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Knud Kristensens Gade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Kochsvej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kompagnistræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Kongedybs Allé 23A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Kongelundsvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Kongemarksvej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kongshaven 62")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kongshaven 78B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Kongshaven 9A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Korsager Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Korsager Allé 50")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Korsager Allé 61")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Korsager Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Korsager Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Korsørgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 6198), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Korsørgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 11317), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kretavej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Krimsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearBuilt'] == 2016), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Kronprinsessegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Kronprinsensgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Kronprinsensvej 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Krudtløbsvej 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Krystalgade 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Kuhlausgade 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kuhlausgade 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kvintus Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearBuilt'] == 2006), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kvæsthusgade 3D")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Kålagervej 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kærholmen")) & (df['EnergyLabel'] == 'Mangler') & (df['YearBuilt'] == 1939), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kærskiftevej 139")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Kærskiftevej 187")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Købmagergade 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Küchlersgade 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Küchlersgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Laksegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Landemærket 53")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Langagervej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Kvintus Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Langagervej 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Langdraget")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1935), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Langelinie Allé 25A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Langelinie Allé 27A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Langelinie Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1997), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Langvaddam 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Larsbjørnsstræde 7A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Larsbjørnsstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Lauravej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1958), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lauravej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1932), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lavendelstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Lejrevej 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lemnosvej 28A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lersø Parkallé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1926), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lille Kirkestræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Lille Kongensgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Lille Strandstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Lily Brobergs Vej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Linde Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Linde Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lindebugten 53A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lindehaven 22B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Lindeskellet 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lindeskellet 16, 2.")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lindevangs Alle 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Lindholmsvej 4B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Lipkesgade 5B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Livjægergade 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Luftmarinegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Lundedalsvej 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Lundedalsvej 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Lundedalsvej 35")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1870), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1872), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1989), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1935), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1903), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1850), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1904), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lykkesholms Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1890), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lyngbyvej 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Lyngbyvej 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Lyngbyvej 32E")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lyngbyvej 35")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Lyngholmvej 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lyngvigvej 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lyshøj Allé 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Lyshøjgårdsvej 45")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Lyøvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1979), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Læderstræde 36")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Læderstræde 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lærkeskellet 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Lærkeskellet 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lærkevej 15D")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Læstedet 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Løgstørgade 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Løgstørgade 36")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Løgstørgade 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Løgstørgade 40")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Lønborg Allé 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Lønborg Allé 3B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Lønborg Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Løngangstræde 21F")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Løngangstræde 21K")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Løvetandsvej 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Madvigs Alle 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Magstræde 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Magstræde 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Magstræde 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Magstræde 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Magstræde 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Margretheholmsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] > 2014), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Maribovej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Marielystvej 4B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Markskellet")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3017), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Markskellet")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 8784), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Markskiftevej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Marstalsgade 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Martensens Alle 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Martensens Alle 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Marthagade 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Marthagade 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Marthagade 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Marthagade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Mathildevej 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Mellemforts Allé")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Merløsevej 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Mikkel Skovs Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Mindstrupvej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Mindstrupvej 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Monrads Allé 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Monrads Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Monrads Allé 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Monrads Allé 17A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Monrads Allé 17B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Monrads Allé 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Monrads Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Monrads Allé 34")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Monrads Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Monrads Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Mosesvinget 64")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Mozartsvej 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Muldager 56")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Munkensvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 13999), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Myggenæsgade 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Mølle Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1003), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Mølle Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("N.J. Fjords Alle 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Naboløs 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Naboløs 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Naboløs 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nakskovvej 99")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Nakskovvej")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nebbegårdsbakken 49")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nebraskavej 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Nedertoften")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1692), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Niels Hemmingsens Gade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Niels Neergaards Gade 39")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Nikolaj Plads 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nikolajgade 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nimbusparken")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2155), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Mølle Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nordfeldvej 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nordre Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nordre Frihavnsgade 19A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Nordre Frihavnsgade 57")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Norsvej 1A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Norsvej 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nordre Fasanvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1670), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nordre Fasanvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 13999), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nordre Fasanvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 700), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nordre Fasanvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 705), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nordre Fasanvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3060), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Ny Carlsberg Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2389), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Ny Carlsberg Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 154), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Ny Kongensgade 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nybrogade 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nybrogade 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nyelandsvej 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nyelandsvej 33")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nyelandsvej 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nyelandsvej 77A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nyhavn")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nysøvej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nyvej 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Næsbyholmvej 39")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nøddehaven 48")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nørre Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2575), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Nørre Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1730), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nørre Sideallé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1989), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nørre Voldgade 102")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Nørrebrogade 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Nørregade 37B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nørretofte Allé 11A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Nørretofte Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nørretofte Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Nørretofte Allé 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Nørretofte Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Nørretofte Allé 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Nørretofte Allé 6A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Obdams Allé 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Obdams Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Obdams Allé 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Odensegade 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Orla Lehmanns Vej 12B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Oscar Ellingers Vej 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Oscar Pettifords Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 10258), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Ove Billes Vej 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Overbys Allé 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Overbys Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Overbys Allé 1A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Overbys Allé 36")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Overbys Allé 40")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Overbys Allé 43")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Overbys Allé 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Overgaden Neden Vandet 39")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Overgaden Neden Vandet 45")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Overgaden Neden Vandet 49A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Overgaden Neden Vandet 51A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Overgaden Oven Vandet 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Overgaden Oven Vandet 52")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Overgaden Oven Vandet 54")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Overgaden Oven Vandet 56")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Oxford Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 33")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 3B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Oxford Allé 4A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 58")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Oxford Allé 59")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Oxford Allé 68")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Oxford Allé 69")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 74")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 81A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Oxford Allé 83A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Oxford Allé 84")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Oxford Allé 91")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Oxford Allé 94")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Oxford Allé 95A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Allé 95B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Oxford Have 219")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("P.D. Løvs Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1280), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("P.D. Løvs Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1861), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Paludan Müllers Vej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Panumsvej 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Paradisæblevej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 9244), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Parmagade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2085), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Parmagade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 5443), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Parmagade 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Peder Hvitfeldts Stræde 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Persiensvej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Persillevej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2015), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Peter Bangs Vej 276")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Peter Fabers Gade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1158), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Peter Ipsens Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1995), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Pile Alle")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 5676), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Platanskellet 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Platanskellet 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Platanskellet 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Platanskellet 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Platanvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2289), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Platanvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 4420), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Poppelgade 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Poppelgade 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Poppelgade 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Peter Ipsens Allé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Poppelgade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Poppelskellet 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Porcelænshaven 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Portlandsvej 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Portlandsvej 31A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Poul Bundgaards Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 6124), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Poul Reumerts Vej 39")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Prags Boulevard 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Pragtstjernevej 10B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Prinsessegade 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Prisholmvej 34")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Præstegårds Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Præstegårds Allé 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Præstegårds Allé 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Præstegårds Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Præstegårds Allé 33")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Præstegårds Allé 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Præstegårds Allé 48")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Præstegårds Allé 54")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Præstegårds Allé 55A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Præstegårds Allé 58")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Præstegårds Allé 61")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Præstegårds Allé 63B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Prøvestens Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Rahbeks Alle 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Rahbeks Alle 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Rahbeks Alle 2C")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rahbeks Alle 2D")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rahbeks Allé 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Rathsacksvej 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Reberbanegade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 4185), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rentemestervej 38C")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Retortvej 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Reventlowsgade 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Reventlowsgade 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Richard Mortensens Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2011), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Richard Mortensens Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2018), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Richard Mortensens Vej 6A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Rigensgade 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Robert Jacobsens Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2016), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Rolfsvej 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rosbæksvej 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Roselillevej 41")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Roselillevej 84A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Rosenborggade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Rosenholms Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenholms Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Rosenholms Allé 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenholms Allé 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenholms Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenholms Allé 50")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenholms Allé 54")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rosenlunds Allé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 36")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 37A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 40")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Allé 5B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Hovedvej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Rosenvængets Sideallé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenvængets Sideallé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rosenørns Alle 64")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Roshagevej 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Rossinisvej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rubinolavej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2018), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Rudolph Berghs Gade 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rued Langgaards Vej 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rundholtsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2015), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Rundholtsvej 69")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Rydsletten")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ryesgade 32B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rysensteensgade 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rådhusstræde 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rådhusstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Røddinggade 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rømersgade 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rønnegade 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rønnegade 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Rørholmsgade 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Rørholmsgade 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Sankt Annæ Gade 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Annæ Gade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Annæ Plads 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Sankt Gertruds Stræde 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Gertruds Stræde 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Gertruds Stræde 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Jørgens Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Sankt Kjelds Plads 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Sankt Knuds Vej 23B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Sankt Knuds Vej 27B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Sankt Knuds Vej 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Sankt Knuds Vej 8B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Sankt Pauls Gade 66")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sankt Peders Stræde 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Peders Stræde 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Peders Stræde 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sankt Thomas Alle 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Sankt Thomas Alle 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Saxhøjvej 37")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Schubertsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1974), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Schønbergsgade 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Selsøvej 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Selveje Allé 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Sigynsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 11810), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Selveje Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Silkeborggade 40")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Silkeborggade 41")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sjælør Boulevard 45")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skaffervej 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Skindergade 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Skindergade 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Skindergade 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Skindergade 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Skindergade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Skipper Clements Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 22A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 41")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 43B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 49")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skjulhøj Allé 77")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skovbogårds Allé 11A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Skovbogårds Allé 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Skovløbervej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Skovløbervej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Skyttegårdvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2295), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Slotsherrensvej 114")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Slotsherrensvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1856), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Smallegade 26B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Snaregade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Snertingevej 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Snorresgade 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Snorresgade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Solsortvej 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Soltoftevej 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Sorgenfrigade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2285), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sorrentovej 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Sortedam Dossering 41D")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Spanagervej 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Spartavej 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sprogøvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 6490), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sprogøvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 5203), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Steen Blichers Vej 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Steen Blichers Vej 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Steenbergsvej 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Steenwinkelsvej 4B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Stenhuggervej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2745), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Stenlandsvej 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Stenlandsvej 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Stenlandsvej 3A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Stockflethsvej 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Store Kannikestræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Store Kongensgade 116")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Store Kongensgade 53")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Store Kongensgade 67A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Store Kongensgade 69")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Store Kongensgade 75D")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Store Kongensgade 79")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Store Kongensgade 90A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Store Kongensgade 96")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Store Strandstræde 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Stormgade 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Stormgade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Stradellasvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2560), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Strandgade 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 24A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 34")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 44")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandgade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Strandlodsvej 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Strandlodsvej 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Strandpromenaden 47")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Strandboulevarden")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 848), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Strandboulevarden")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1070), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Strandboulevarden")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3408), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Strandboulevarden")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1070), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Strandboulevarden")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2175), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Strandvejen 59")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Strandvejen 69")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Stratfordvej 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Stubmøllevej 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Studiestræde 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Studiestræde 35")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sturlasgade 12G")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Støvnæs Allé 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Sumatravej 57")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Sundevedsgade 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sundholmsvej 67")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sundkaj")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1484), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Sundkaj")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 135), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Sundkaj")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 5526), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Sundkaj")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1194), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Svanholmsvej 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Svankærvej 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Svend Dyrings Vej 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Svend Trøsts Vej 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Sværtegade 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sylviavej 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Syriensvej 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Søflygade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] > 2015 ), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Søllerødgade 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sølvgade 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Sølvgade 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Sølvgade 92A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 12B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Søndervangs Allé 13")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Søndervangs Allé 16A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Søndervangs Allé 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Søndervangs Allé 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Søndervangs Allé 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 34")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Søndervangs Allé 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 44")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Søndervangs Allé 47")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 52B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Søndervangs Allé 57")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 60")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Søndervangs Allé 62")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndervangs Allé 63")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Søndervangs Allé 70")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Søndervangs Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndre Allé 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndre Allé 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Søndre Allé 28")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndre Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Søndre Fasanvej 87")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Teglbrændervej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2745), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Teglbrændervej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1763), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Teglbrændervej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1350), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Teglbrændervej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1368), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Teglgårdstræde")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Teglholm Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 9864), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Teglholm Tværvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3224), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Teglholm Tværvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 4335), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Sundkaj 65")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Telemarksgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Tesdorpfsvej 31A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Tesdorpfsvej 31B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Theklavej 49")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Thingvalla Allé 10")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Thingvalla Allé 1")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Thingvalla Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Thingvalla Allé 24")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Thingvalla Allé 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Thingvalla Allé 34")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Thorsgade 56B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Thorsmindevej 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Thorupgård Allé 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Thorupgård Allé 31")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Thyrasgade 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tingskrivervej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 5039), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Tirsbækvej 18A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Tjørnelunds Allé 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Tjørnelunds Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tjørneskellet 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Tjørneskellet 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Tjørneskellet 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Tjørneskellet 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Toftebakkevej 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Toftegårds Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 16110), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Toldbodgade 36")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Toldbodgade 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Toldbodgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Tom Kristensens Vej 157")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Tom Kristensens Vej 165")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Tom Kristensens Vej 171")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Tomatvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 2015), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Torben Oxes Allé 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Torvegade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Tove Maës Vej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 6124), 'EnergyLabel'] = '5'
df.loc[(df['Address'].str.contains("Tovelillevej 35B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Traps Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Traps Allé 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Traps Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Trekronergade")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1924), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Trelleborggade 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Trelleborggade 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Troels-Lunds Vej 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Troels-Lunds Vej 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tschernings Allé 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Tschernings Allé 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Tschernings Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tybjergvej 81")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Tybjergvej 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Tyborøn Allé 10B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Tyborøn Allé 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tyborøn Allé 66")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Tyborøn Allé 74")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tyborøn Allé 86B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Tyborøn Allé 88")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Tyborøn Allé 89B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Tyborøn Allé 94")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tycho Brahes Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 970), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Tycho Brahes Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 965), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Tycho Brahes Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 968), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tycho Brahes Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1184), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Tycho Brahes Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 580), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Tønnesvej 47")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ulrik Birchs Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Ulrik Birchs Allé 41")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Ulrik Birchs Allé 42")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Ulrik Birchs Allé 56")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Ulrik Birchs Allé 58")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Under Elmene 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Valborg Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1498), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Valborg Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 6463), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Valborg Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1943), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Valby Langgade 48")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Valbygårdsvej 40")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Valkendorfsgade 32")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Vandkunsten 13A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Vangs Allé 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vangs Allé 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vangs Allé 26")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vangs Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vanløse Allé 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vanløse Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 897), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vanløse Allé 38")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vanløse Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 125), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vanløse Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 166), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vanløse Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 634), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vanløse Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1770), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vanløse Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1692), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ved Amagerport 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ved Andebakken")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 5729), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ved Kløvermarken")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2962), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ved Linden 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vejlands Allé 109")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Vejlands Allé 110")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vejlands Allé 113")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vejlands Allé 117")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 130")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 131")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vejlands Allé 146")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 148")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 151B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vejlands Allé 152")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vejlands Allé 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vejlands Allé 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vejlands Allé 23")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vejlands Allé 27")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 45")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 62")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 65")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vejlands Allé 73")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vejlands Allé 78")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'G'
df.loc[(df['Address'].str.contains("Vejlands Allé 7")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vejlands Allé 81")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vejlands Allé 87A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vejlands Allé 97")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vejlands Allé 99")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vejlands Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vendsysselvej 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vennely Allé 3A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Vennely Allé 3B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Vennely Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Veras Allé 13A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Veras Allé 15C")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Veras Allé 17")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 662), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Veronavej 17")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vester Søgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 11617), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Vester Søgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 16124), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Vester Voldgade 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Vesterbrogade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 7030), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vesterløkken 15")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vestermarksvej 19B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = '0'
df.loc[(df['Address'].str.contains("Vestermarksvej 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vestre Allé 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vibekegade 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vigerslev Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1939), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vigerslev Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1912), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vigerslev Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1930), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vigerslev Allé")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1935), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1945), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1940), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1975), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1988), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1989), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1950), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1929), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vigerslevvej")) & (df['EnergyLabel'] == 'Mangler') & (df['YearRemodAdd'] == 1931), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Viktoriagade 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Viktoriagade 8")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Vindingevej 35")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vinkelager 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vodroffsvej 2")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Vognborgvej 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vognborgvej 14")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vogtervej 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Vordingborggade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2873), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vordingborggade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3408), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Vordingborggade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1367), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Vølundsgade 10A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Wagnersvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3348), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Wesselsgade 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Wesselsgade 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Wibrandtsvej 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Wildersgade")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Wilkensvej 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Yrsavej 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = ''
df.loc[(df['Address'].str.contains("Wilkensvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3407), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Åbenrå 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Åbenrå 25")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Åboulevard")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 4030), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Åboulevard 12")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Åboulevard")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2105), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Åboulevard 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Åboulevard 18")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Ågerupvej 46")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Ågerupvej 51B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Åhaven 85")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Ålandsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3970), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ålandsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 4185), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Ålekistevej 222")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Århusgade 142")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'A'
df.loc[(df['Address'].str.contains("Ålholm Plads")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 1548), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Århus Plads")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 11317), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Århusgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 11317), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Æblehaven 15B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Æblestien")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 10284), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Ægirsgade 19")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Øresund Parkvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3661), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Øresundsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 4048), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Øresundsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3816), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Øresundsvej")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 2863), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østbanegade 11")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Østbanegade 21")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Øster Allé 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Øster Allé 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Øster Farimagsgade 16B")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'E'
df.loc[(df['Address'].str.contains("Øster Søgade 8A")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'Fredet'
df.loc[(df['Address'].str.contains("Østerbrogade 33")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østerbrogade 45")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østerdalsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3575), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østergårds Allé 16")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østergårds Allé 20")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østergårds Allé 22")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østergårds Allé 29")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østergårds Allé 30")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østergårds Allé 3")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'F'
df.loc[(df['Address'].str.contains("Østergårds Allé 4")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østergårds Allé 5")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'C'
df.loc[(df['Address'].str.contains("Østergårds Allé 6")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'B'
df.loc[(df['Address'].str.contains("Østergårds Allé 9")) & (df['EnergyLabel'] == 'Mangler'), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Østrigsgade")) & (df['EnergyLabel'] == 'Mangler') & (df['FloorAreaBuilding'] == 3080), 'EnergyLabel'] = 'D'
df.loc[(df['Address'].str.contains("Yrsavej")) & (df['EnergyLabel'] == '') & (df['FloorAreaBuilding'] == 540), 'EnergyLabel'] = 'D'


In [38]:
##################################################
#####               EnergyLabel              #####
##################################################

#print(df['EnergyLabel'].value_counts())
df.EnergyLabel.loc[df.EnergyLabel == '0'] = 'A'
df.EnergyLabel.loc[df.EnergyLabel == '2'] = 'A'
df.EnergyLabel.loc[df.EnergyLabel == '5'] = 'A'

ordered_EnergyLabel = ['A','B','C','D','E','F','G','Fredet']
df['EnergyLabel'] = df.EnergyLabel.astype('category').cat.set_categories(ordered_EnergyLabel, ordered = True)
df['EnergyLabel'].value_counts()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


D         16125
C          8232
E          4847
B          2670
A          2271
F          1102
Fredet      594
G           381
Name: EnergyLabel, dtype: int64

In [39]:
df.isnull().sum(axis=0)

df.shape

(36222, 55)

In [40]:
df.describe(include=['category']).T

Unnamed: 0,count,unique,top,freq
Bathrooms,36222,5,1,32501
BurglaryRisk,36222,4,lav,32661
CloudburstRisk,36222,3,kan være risiko,32142
EnergyLabel,36222,8,D,16125
FloorsTotal,36222,17,5,14348
HeatSource,36222,7,Fjernvarme/blokvarme (radiatorsystemer el. var...,35743
Kitchen,36222,4,Eget køkken (med afløb og kogeinstallation),36147
LargestParty,36222,4,socialdemokratiet,12593
Levels,36222,3,1,34783
NoiseMeasurement,36222,6,Ingen trafikstøj,17721


### Numeric variables - inspection

In [41]:
numeric_var = df.describe().T[['count','mean', 'min', 'max']]
#print(numeric_var.to_latex(index=True))  
numeric_var

Unnamed: 0,count,mean,min,max
AskingPrice,36222.0,3406877.0,195000.0,27500000.0
BasementArea,36222.0,6.4235,0.0,4807.0
FloorAreaBuilding,36222.0,4269.895,35.0,76211.0
GeomaticAVMPrice,36120.0,3810918.0,-2989154.0,29416090.0
LandValue,36159.0,378883.9,0.0,11692300.0
Latitude,36222.0,55.67863,55.61631,55.72676
Longitude,36222.0,12.55278,12.45386,12.63956
MASL,36222.0,7.970697,0.0,37.3
SalePrice,36222.0,3270323.0,12325.0,27000000.0
TaxableValue,36159.0,1912210.0,110000.0,94000000.0


In [43]:
##################################################
#####  Inspection of the numerical variables #####
##################################################

##################################################
#####               AskingPrice              #####
##################################################

##################################################
#####             BasementArea               #####
##################################################

##################################################
#####           FloorAreaBuilding            #####
##################################################

##################################################
#####            GeomaticAVMPrice            #####
##################################################
print(len(df.loc[(df.GeomaticAVMPrice < 0), 'GeomaticAVMPrice']))
df.loc[(df.GeomaticAVMPrice < 0), 'GeomaticAVMPrice'] = np.NaN
print(len(df.loc[(df.GeomaticAVMPrice == 0), 'GeomaticAVMPrice']))
df.loc[(df.GeomaticAVMPrice == 0), 'GeomaticAVMPrice'] = np.NaN

##################################################
#####               LandValue                #####
##################################################
#len(df.loc[(df.LandValue == 0), 'LandValue'])

##################################################
#####                Latitude                #####
##################################################

##################################################
#####               Longitude                #####
##################################################

##################################################
#####                  MASL                  #####
##################################################

##################################################
#####               SalePrice                #####
##################################################
#len(df.loc[(df.SalePrice < 250000)])

##################################################
#####              TaxableValue              #####
##################################################

##################################################
#####             TotalFloorArea             #####
##################################################

##################################################
#####             VoterTurnout               #####
##################################################

##################################################
#####            WeightedFloorArea           #####
##################################################
#len(df.loc[(df.WeightedFloorArea < 15)])
#(df.loc[(df.WeightedFloorArea < 15)])
#(df.loc[(df.WeightedFloorArea > 500), 'TotalFloorArea'])


##################################################
#####             PriceReduction             #####
##################################################

##################################################
#####             PriceReduction             #####
##################################################

##################################################
#####             PriceReduction             #####
##################################################

##################################################
#####             PriceReduction             #####
##################################################


##################################################
#######  Inspection of the date variables  #######
##################################################

##################################################
#####               OfferingEnd              #####
##################################################

##################################################
#####               OfferingEnd              #####
##################################################

##################################################
#####               OfferingStart            #####
##################################################

##################################################
#####                 SaleDate               #####
##################################################

##################################################
#####                   Time                 #####
##################################################

##################################################
#####                  ValDate               #####
##################################################

##################################################
#####                YearBuilt               #####
##################################################

##################################################
#####               YearRemodAdd             #####
##################################################

##################################################
#####                  Quarter               #####
##################################################

##################################################
#####                    Year                #####
##################################################

##################################################
#####                YearQuarter             #####
##################################################

##################################################
#####               TurnoverTime             #####
##################################################

##################################################
#####               YearRemodAdd             #####
##################################################

##################################################
#####               YearRemodAdd             #####
##################################################


##################################################
####### Inspection of the heler variables  #######
##################################################

##################################################
#####                  Address               #####
##################################################

##################################################
#####              LinkBoligsiden            #####
##################################################

##################################################
#####                  LinkGeo               #####
##################################################

##################################################
#####                  ZipCode               #####
##################################################

df.drop(columns=['SquareMetrePrice', 'SquareMetrePrice_range', 'GeomaticAVMPricePerMetre', 'GeomaticAVMPricePerMetre_range'])

# GEM DATA
#df.to_csv('Endeligt data/Boligsiden&DinGeo_type.csv', index=False)


4
29


Unnamed: 0,Address,AskingPrice,BasementArea,Bathrooms,BurglaryRisk,CloudburstRisk,EnergyLabel,FloorAreaBuilding,FloorsTotal,GeomaticAVMPrice,...,YearRemodAdd,ZipCity,PriceReduction,HasMultipleLevels,Quarter,Year,YearQuarter,TurnoverTime,ZipCode,City
7018,"A.C. Meyers Vænge 1, 2.",3795000.0,0.0,1,lav,kan være risiko,B,12759.0,7,3869488.0,...,2006.0,2450 København SV,0.000000,0,4,2017,2017Q4,60 days 00:00:00.000000000,2450,København SV
34268,"A.C. Meyers Vænge 1, 4.",2545000.0,0.0,1,lav,kan være risiko,B,12759.0,7,3893277.0,...,2006.0,2450 København SV,-34.659820,0,4,2018,2018Q4,108 days 00:00:00.000000000,2450,København SV
24387,"A.C. Meyers Vænge 1, 5. th",1895000.0,0.0,1,lav,kan være risiko,B,12759.0,7,3341446.0,...,2006.0,2450 København SV,-34.655172,0,3,2016,2016Q3,145 days 00:00:00.000000000,2450,København SV
26897,"A.C. Meyers Vænge 11A, 1. th",3395000.0,0.0,1,lav,kan være risiko,B,7719.0,6,3808328.0,...,2008.0,2450 København SV,-0.029446,0,1,2017,2017Q1,25 days 00:00:00.000000000,2450,København SV
17880,"A.C. Meyers Vænge 11A, 4. th",3999000.0,0.0,1,lav,kan være risiko,B,7719.0,6,4248120.0,...,2008.0,2450 København SV,-5.905882,0,2,2018,2018Q2,42 days 00:00:00.000000000,2450,København SV
17787,"A.C. Meyers Vænge 11A, 6. th",2995000.0,0.0,1,lav,kan være risiko,B,7719.0,6,4016475.0,...,2008.0,2450 København SV,-2.601626,0,3,2015,2015Q3,5 days 00:00:00.000000000,2450,København SV
21169,"A.C. Meyers Vænge 11B, 1. th",3845000.0,0.0,1,lav,kan være risiko,B,7719.0,6,3745000.0,...,2008.0,2450 København SV,2.670227,0,1,2019,2019Q1,60 days 00:00:00.000000000,2450,København SV
32384,"A.C. Meyers Vænge 11B, 1. tv",3998000.0,0.0,1,lav,kan være risiko,B,7719.0,6,3966569.0,...,2008.0,2450 København SV,1.730280,0,2,2018,2018Q2,78 days 00:00:00.000000000,2450,København SV
34304,"A.C. Meyers Vænge 11B, 2. tv",3975000.0,0.0,1,lav,kan være risiko,B,7719.0,6,3973242.0,...,2008.0,2450 København SV,0.000000,0,4,2018,2018Q4,28 days 00:00:00.000000000,2450,København SV
9233,"A.C. Meyers Vænge 11B, 4. tv",4225000.0,0.0,1,lav,kan være risiko,B,7719.0,6,4198349.0,...,2008.0,2450 København SV,0.595238,0,4,2018,2018Q4,15 days 00:00:00.000000000,2450,København SV


### Scraping hvorlangterder.dk

#### Main function

In [2]:
def hvorlangterder(Address):
    url = 'https://hvorlangterder.poi.viamap.net/v1/nearestpoi/?poitypes=daycare,doctor,hospital,junction,metro,school,stop,strain,supermarket,train,library,pharmacy,coast,forest,lake,airport,sportshall,publicbath,soccerfield,roadtrain&fromaddress=' + adresse + '&mot=foot&token=eyJkcGZ4IjogImh2b3JsYW5ndGVyZGVyIiwgInByaXZzIjogInIxWjByMEYwazZCdFdxUWNPVXlrQi95NlNVcEp2MlFiZ3lYZXRxNEhZNFhPLzNZclcwK0s5dz09In0.fP4JWis69HmaSg5jVHiK8nemiCu6VaMULSGGJyK4D4PkWq4iA1+nSHWMaHxepKwJ83sEiy9nMNZhv7BcktRNrA'
    resp = requests.get(url)
    cont = resp.json()
    df =  pd.DataFrame(cont).loc[['routedmeters']]
    df['Location'] = adresse
    return(df)

#hvorlangterder(df['Lokation'][0])

Loop that takes the adress from boligsiden and gives the distance (in a dataframe).

In [31]:
# Getting data 
df_14_19 = pd.read_csv("Endeligt data/Boligsiden&DinGeo_type.csv")
# Load data

# Choose the wanted data part
df = df_14_19.iloc[0:10000] 
df['Location'] = df['Address'].str.split(',').str[0] + ', ' + df['ZipCity']

df_hvorlangt = pd.DataFrame()

# Looop
for i in tqdm(df['Location']):
    try:
        dist = hvorlangterder(i)
        df_hvorlangt = pd.concat([df_hvorlangt,dist])
    except Exception: 
        pass
    time.sleep(0.2)

len(df_hvorlangt)
    
# Save csv-file
df_hvorlangt.to_csv('Endeligt data/distance_10000.csv', index = False) 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
100%|██████████| 10/10 [00:02<00:00,  4.89it/s]


### Merge data  from Boligsiden, DinGeo and hvorlangerder

In [46]:
# Data from boligsiden.dk and dingeo.dk = 36222
boligsiden_dingeo = pd.read_csv("Endeligt data/Boligsiden&DinGeo_type.csv") 
boligsiden_dingeo['Lokation'] = boligsiden_dingeo['Address'].str.split(',').str[0] + ', ' + boligsiden_dingeo['ZipCity']


# Data from hvorlangterder.dk = 43976
df_1 = pd.read_csv("Endeligt data/distance_10000.csv")
df_2 = pd.read_csv("Endeligt data/distance_20000.csv")
df_3 = pd.read_csv("Endeligt data/distance_30000.csv")
df_4 = pd.read_csv("Endeligt data/distance_40000.csv")
df_5 = pd.read_csv("Endeligt data/distance_50000.csv")
hvorlangterder = pd.concat([df_1, df_2, df_3, df_4, df_5])

hvorlangterder = hvorlangterder.rename(columns={"Skole": "School", 
                               "Modulvogntog": "RoadTrain",
                               "Motorvej": "Motorway",
                               "Daginstitution": "Daycare",
                               "Metro": "Metro",
                               "Læge": "Doctor",
                               "Fodboldbane": "Soccerfield",
                               "Hospital": "Hospital",
                               "Stoppested": "BusStop",
                               "Sø": "Lake",
                               "Supermarked": "Supermarket",
                               "Apotek": "Pharmacy",
                               "S-Tog": "STrain",
                               "Lufthavn": "Airport",
                               "Regionaltog": "Train",
                               "Bibliotek": "Library",
                               "Svømmehal": "PublicBath",
                               "Kyst": "Coast",
                               "Idrætshal": "SportsHall",
                               "Skov": "Forest"})


# Merged data = 36183
df = pd.merge(boligsiden_dingeo, hvorlangterder, how='inner', on='Lokation', right_index=False).drop_duplicates()
df.drop(columns=['Lokation'])

print(df.shape)

df.to_csv('Endeligt data/df_cleaned.csv', index=False)



(36183, 76)


In [47]:
# Distance variable

df = pd.read_csv("Endeligt dataa/df_cleaned.csv") 
distance_var = df.iloc[:, 56:].describe().T[['count','mean', 'min', 'max']]

distance_var

print(distance_var.to_latex(index=True))  


\begin{tabular}{lrrrr}
\toprule
{} &    count &         mean &      min &       max \\
\midrule
School      &  36183.0 &   505.817279 &    30.23 &   2320.13 \\
RoadTrain   &  36183.0 &  5093.902265 &   267.59 &   8944.08 \\
Motorway    &  36183.0 &  3056.314082 &   210.09 &   5912.88 \\
Daycare     &  36183.0 &   316.048231 &    17.54 &   1489.15 \\
Metro       &  36183.0 &  1131.791728 &    31.23 &   4830.45 \\
Doctor      &  36183.0 &   526.736329 &    10.09 &   2603.05 \\
Soccerfield &  36183.0 &   970.592632 &    52.79 &   2947.06 \\
Hospital    &  36183.0 &  2304.296768 &    87.07 &   6451.71 \\
BusStop     &  36183.0 &   220.956763 &    12.35 &   1226.21 \\
Lake        &  36183.0 &  1521.552948 &    35.65 &   5626.16 \\
Supermarket &  36183.0 &   373.945738 &     6.13 &   1901.15 \\
Pharmacy    &  36183.0 &   755.982655 &    15.13 &   2776.94 \\
STrain      &  36183.0 &  1601.709348 &    57.36 &   6177.97 \\
Airport     &  36183.0 &  9910.385969 &  2481.29 &  17199.77 \\
Train   

In [19]:
##########################################
################ LOST DATA ###############
##########################################

# Data from hvorlangterder.dk = 41981
df_hvorlangt = pd.read_csv("df_BoligsidenDinGeoHvorLangt.csv")
df_hvorlangt = df_hvorlangt.iloc[:, -21:]
print(df_hvorlangt.shape)

# Data from EDA = 36222
#df_no_missing = pd.read_csv("Endeligt data/Boligsiden&DinGeo_type.csv")
#df_no_missing['Lokation'] = df_no_missing['Address'].str.split(',').str[0] + ', ' + df_no_missing['ZipCode'] + ' ' + Df_no_missing['City'] 
#print(df_no_missing.shape)

#print('Der er gået ' +  str(len(data_from_data_cleaning) - len(df_hvorlangt)) + ' observationer tabt ud af ' + str(len(data_from_data_cleaning)))
#list_mangler = (list(set(data_from_data_cleaning.Lokation.tolist()) - set(df_hvorlangt.Lokation.tolist())))
#(list_mangler)

##########################################
############# MERGE THE DATA #############
##########################################

#df_samlet = pd.merge(data_from_data_cleaning, df_hvorlangt, how='inner', on='Lokation', right_index=False).drop_duplicates()
#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_samlet.head(10).to_html()))  
    
# Gem datafil
#df_samlet.to_csv('Endeligt data/df_BoligsidenDinGeoHvorLangt.csv', index = False) 


#df_BoligsidenDinGeoHvorLangt = pd.read_csv("Endeligt data/df_BoligsidenDinGeoHvorLangt.csv")
#df_BoligsidenDinGeoHvorLangt.shape

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_hvorlangt.head(10).to_html()))  

#df_no_missing.columns
#df_hvorlangt['Lokation']

(41981, 21)


In [237]:
df = df_tabt
df = df.reset_index(drop=True)

df['geolink'] = pd.concat([pd.DataFrame([(get_geolink(i))], columns=['geolink']) for i in range(0, len(df))], ignore_index=True)

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df.head(25).to_html()))   

df.at[1, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-københavn-ø/lange--müllers-gade-27/1-th/'
df.at[6, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/st-th/'
df.at[7, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-21/4-th/'
df.at[8, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/3-th'
df.at[12, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-21/st-th/'
df = df.drop(df.index[14])
df.at[18, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-23/1-tv/'
df.at[20, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-19/st-tv/'
df.at[24, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/st-tv/'
df.at[26, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-frederiksberg/troels--lunds-vej-20/'
df.at[27, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/4-tv/'
df.at[29, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-27/2-th/'
df.at[31, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/st-tv'
df.at[33, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/2-th'
df.at[35, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/3-th'
df = df.drop(df.index[35])
df.at[39, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/1-tv'
df.at[40, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/1-tv'
df.at[43, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/3-th'
df.at[48, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/4-th'
df.at[49, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/st-tv'
df.at[50, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/4-tv'
df.at[52, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/st-th'
#df.at[53, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/2-th'
#df = df.drop(df.index[53])
df.at[61, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/st-tv'
df.at[62, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/2-th'
df.at[64, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/2-tv'
df.at[66, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/4-tv'
df.at[71, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/1-th'
df.at[75, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/st-th'
df.at[81, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/4-tv'
df.at[82, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34a/2-tv/'
df.at[85, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/4-tv'
df.at[87, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange-Müllers-Gade-23/2-tv'
df.at[89, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-frederiksberg/troels--lunds-vej-18/'
df.at[99, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/4-th'
df.at[108, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/3-th'
df.at[109, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-Frederiksberg/Troels--Lunds-Vej-34'
df = df.drop(df.index[111])
df.at[112, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/4-tv'
df.at[114, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/1-tv/'
df = df.drop(df.index[115])
df.at[118, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/3-mf/'
df.at[119, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/3-tv'
df.at[120, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/1-tv'
df.at[125, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/2-tv'
df.at[126, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/st-th'
df.at[129, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-23/st-th'
df.at[130, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/3-th'
df.at[136, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/1-tv'
df.at[137, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/2-tv'
df.at[138, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/3-tv'
df.at[139, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-17/2-tv'
df.at[140, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34a/1-th/'
df.at[143, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-27/4-tv'
df.at[145, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-20/4-th'
df.at[147, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-27/3-tv'
df.at[148, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/2-tv/'
df.at[154, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/3-th'
df.at[158, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/4-th'
df.at[164, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/st-th'
df.at[165, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/1-th'
df.at[168, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/1-th'
df.at[171, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/2-th/'
df.at[174, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/4-tv'
df.at[175, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-k%C3%B8benhavn-k/danneskiold--sams%C3%B8es-all%C3%A9-34b/4-th/'
df.at[176, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/4-th'
df.at[178, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-15/4-th'
df.at[185, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/2-th'
df.at[188, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-21/1-th'
df = df.drop(df.index[191])
df.at[192, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-27/st'
df.at[208, 'geolink'] = 'https://www.dingeo.dk/adresse/1434-København-K/Danneskiold--Samsøes-Allé-34B/3-tv'
df.at[208, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/2-tv'
df.at[209, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-25/3-tv'
df.at[212, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/1-th'
df.at[214, 'geolink'] = 'https://www.dingeo.dk/adresse/2000-Frederiksberg/Troels--Lunds-Vej-32'
df.at[219, 'geolink'] = 'https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-19/3-tv'

df = df.reset_index(drop=True)
df_ny = df

with pd.option_context('display.max_colwidth', -1): 
    display(HTML(df_ny.to_html()))
    

Unnamed: 0,Adresse,Antal plan,Antal værelser,Boligtype,Breddegrad,Byggeår,Ejendomsværdi,Grund,Grundværdi,Kælderareal,Link,Længdegrad,Om-/tilbygningsår,Periode,Post_nr,Samlet Boligareal,Seneste_salgsdato,Seneste_salgspris,Udbud_Slut,Udbud_Start,Udbudspris,Varmekilde,Vurderingsdato,Vægtet areal,Ydervægsmateriale,geolink
0,"Bentzonsvej 37, 2. tv",-,3,Ejerlejlighed,55.685782,-,1.550.000 kr.,-,157.200 kr.,-,https://www.boligsiden.dk/boligen/147-8570,12.529234,-,20-03-2017 - 06-04-2017,2000 Frederiksberg,67 m²,25-03-2017,3.395.000 kr.,06-04-2017,20-03-2017,3.395.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2000-Frederiksberg/Bentzonsvej-37/2-tv
1,"Lange-Müllers Gade 27, 1. th",-,2,Ejerlejlighed,55.712541,-,1.300.000 kr.,-,106.900 kr.,-,https://www.boligsiden.dk/boligen/101-336123,12.566848,-,01-08-2016 - 29-08-2016,2100 København Ø,65 m²,25-08-2016,2.250.000 kr.,29-08-2016,01-08-2016,2.395.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2100-københavn-ø/lange--müllers-gade-27/1-th/
2,"Amsterdamvej 3, 3.",-,6,Ejerlejlighed,55.64873,-,2.050.000 kr.,-,144.300 kr.,-,https://www.boligsiden.dk/boligen/101-24059,12.61399,-,10-07-2016 - 01-11-2016,2300 København S,123 m²,07-10-2016,3.400.000 kr.,01-11-2016,10-07-2016,3.495.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2300-København-S/Amsterdamvej-3/3
3,"Langelandsvej 20B, 1. tv",-,4,Ejerlejlighed,55.684792,-,1.950.000 kr.,-,169.300 kr.,-,https://www.boligsiden.dk/boligen/147-71132,12.530195,-,01-08-2018 - 29-08-2018,2000 Frederiksberg,83 m²,27-08-2018,4.295.000 kr.,29-08-2018,01-08-2018,4.295.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2000-Frederiksberg/Langelandsvej-20B/1-tv
4,"Porcelænshaven 5G, st. tv",-,3,Ejerlejlighed,55.677771,-,2.500.000 kr.,-,405.500 kr.,-,https://www.boligsiden.dk/boligen/147-257946,12.523563,-,22-05-2014 - 12-07-2014,2000 Frederiksberg,99 m²,26-08-2014,5.395.000 kr.,12-07-2014,22-05-2014,5.395.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2000-Frederiksberg/Porcelænshaven-5G/st-tv
5,"Nordre Digevej 54, st. tv",-,3,Ejerlejlighed,55.650154,-,1.500.000 kr.,-,326.500 kr.,-,https://www.boligsiden.dk/boligen/101-734790,12.588153,-,,2300 København S,85 m²,28-08-2014,1.125.000 kr.,Ingen registrerede udbud,Ingen registrerede udbud,,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2300-København-S/Nordre-Digevej-54/st-tv
6,"Lange-Müllers Gade 20, st. th",-,3,Ejerlejlighed,55.712375,-,1.600.000 kr.,-,120.400 kr.,-,https://www.boligsiden.dk/boligen/101-335534,12.567279,-,05-07-2018 - 08-07-2018,2100 København Ø,88 m²,08-07-2018,3.200.000 kr.,08-07-2018,05-07-2018,3.245.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-20/st-th/
7,"Lange-Müllers Gade 21, 4. th",-,2,Ejerlejlighed,55.712206,-,1.150.000 kr.,-,85.300 kr.,-,https://www.boligsiden.dk/boligen/101-335720,12.566929,-,,2100 København Ø,56 m²,19-06-2014,1.745.000 kr.,Ingen registrerede udbud,Ingen registrerede udbud,,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2100-k%C3%B8benhavn-%C3%B8/lange--m%C3%BCllers-gade-21/4-th/
8,"Lange-Müllers Gade 22, 3. th",-,3,Ejerlejlighed,55.712548,-,1.750.000 kr.,-,120.400 kr.,-,https://www.boligsiden.dk/boligen/101-335828,12.567231,-,20-01-2016 - 02-06-2016,2100 København Ø,87 m²,26-03-2016,3.400.000 kr.,02-06-2016,20-01-2016,3.420.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2100-København-Ø/Lange--Müllers-Gade-22/3-th
9,"Rumæniensgade 12, 5. th",-,2,Ejerlejlighed,55.659712,-,940.000 kr.,-,112.600 kr.,-,https://www.boligsiden.dk/boligen/101-472701,12.612983,-,24-02-2014 - 04-03-2014,2300 København S,53 m²,02-03-2014,1.600.000 kr.,04-03-2014,24-02-2014,1.595.000 kr.,-,01-10-2018,-,,https://www.dingeo.dk/adresse/2300-København-S/Rumæniensgade-12/5-th


## Economic indicators

### House price index

In [11]:
df = pd.read_csv("Endeligt data/df_cleaned.csv")

# Load price index for houses and flats
price_index = pd.read_csv('Endeligt data/price_index3.csv',delimiter=';',header=1,skiprows=1)
price_index=price_index.drop([0, 1]).T

# Make replacement dicts houses and flats
ss1={index.replace('M','-'):value for index,value in price_index[2].iteritems()}
ss2={index.replace('M','-'):value for index,value in price_index[3].iteritems()}

# Create 2 new columns called "PriceHouse" and "PriceFlat", which is the price index for houses and flats.
df['SaleDateMonth'] = df['SaleDate'].str[:7]
df['PriceHouse']=df['SaleDateMonth'].replace(ss1)
df['PriceFlat']=df['SaleDateMonth'].replace(ss2) 

# Change index for PriceIndex
HouseIndexZero = 100/ 80.7
FlatIndexZero = 100/ 85.7
df['PriceHouse'] = (df['PriceHouse'] * HouseIndexZero) /100
df['PriceFlat'] = (df['PriceFlat'] * FlatIndexZero) /100
def f(X):
    if X['Type'] == 'Ejerlejlighed':
        PriceIndex = X['PriceFlat']
    else:
        PriceIndex = X['PriceHouse']
    return PriceIndex
df['PriceIndex'] = df.apply(f, axis=1)

df = df.drop(columns=['PriceFlat', 'PriceHouse'])

### OMXC20, Mortage rate and Unemployment

In [13]:
# Load OMXC20 - note: 1 month delay manuelt
MPK13 = pd.read_csv('Endeligt data/MPK13-OMXC20CAP.csv',delimiter=';',header=0,skiprows=1)
MPK13 = MPK13.T

# Load Mortgage - note: 1 quarter delay manuelt
MPK18 = pd.read_csv('Endeligt data/MPK18-MortgageCredit.csv',delimiter=';',header=1,skiprows=1)
MPK18 = MPK18.iloc[1:,2:].T

# Load Unemplotment - note: 1 month delay manuelt
AULK04 = pd.read_csv('Endeligt data/AULK04.csv',delimiter=';',header=0,skiprows=1)
AULK04 = AULK04.iloc[0:,1:].T

FODDAG = pd.read_csv('Endeligt data/FODDAG.csv',delimiter=';',header=0,skiprows=1)
FODDAG = FODDAG.iloc[0:,1:].T


# Make replacement dicts 
df['SaleDate'] = pd.to_datetime(df['SaleDate'], format='%Y-%m-%d', errors='coerce')
ss1 = {index.replace('M','-'):value for index,value in MPK13[0].iteritems()}
ss2 = {index.replace('Q','Q'):value for index,value in MPK18[1].iteritems()}
ss3 = {index.replace('M','-'):value for index,value in AULK04[0].iteritems()}
ss4 = {index.replace('M','-'):value for index,value in AULK04[1].iteritems()}
ss5 = {index.replace('M','-'):value for index,value in FODDAG[0].iteritems()}


df['SaleDateMonth'] = df['SaleDate'].astype(str).str[:7]
df['SaleDateQuarter'] = pd.PeriodIndex(pd.to_datetime(df['SaleDate']), freq='Q').astype(str)

df['OMXC20']=df['SaleDateMonth'].replace(ss1)
df['Mortgage'] = df['SaleDateQuarter'].replace(ss2)
df['Unemployed']=df['SaleDateMonth'].replace(ss3)
df['UnemploymentRelativeToLabourForce']=df['SaleDateMonth'].replace(ss4)
df['Births']=df['SaleDateMonth'].replace(ss5)

# Final dataset
#df.to_csv('Endeligt data/df_final.csv', index=False)

# Overview

In [14]:
df_final = pd.read_csv("Endeligt data/df_final.csv")
df_final = df_final.drop(columns=['Time', 'ZipCity', 'PriceReduction', 'HasMultipleLevels', 'SquareMetrePrice', 'SquareMetrePrice_range', 'GeomaticAVMPricePerMetre', 'GeomaticAVMPricePerMetre_range', 'Quarter', 'Year', 'YearQuarter', 'Lokation', 'SaleDateMonth', 'SaleDateQuarter'])

#with pd.option_context('display.max_colwidth', -1): 
#    display(HTML(df_final.head(20).to_html()))  

#print(df_final.isnull().sum().to_latex(index=True))  
print(df_final.shape)

la = df_final.info()
pd.DataFrame(la)

(36183, 70)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36183 entries, 0 to 36182
Data columns (total 70 columns):
Address                              36183 non-null object
AskingPrice                          36183 non-null float64
BasementArea                         36183 non-null float64
Bathrooms                            36183 non-null int64
BurglaryRisk                         36183 non-null object
CloudburstRisk                       36183 non-null object
EnergyLabel                          36183 non-null object
FloorAreaBuilding                    36183 non-null float64
FloorsTotal                          36183 non-null int64
GeomaticAVMPrice                     36048 non-null float64
HeatSource                           36183 non-null object
Kitchen                              36183 non-null object
LandValue                            36120 non-null float64
LargestParty                         36183 non-null object
Latitude                             36183 non-nul

In [None]:
##################################################
#############           New          #############
##################################################
df['SquareMetrePrice'] = df.SalePrice / df.WeightedFloorArea
bins = [0, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 75000, 200000]
names = ['<15000','15000-20000','20000-25000','25000-30000', '30000-35000','35000-40000','40000-45000','45000-50000','50000-75000', '75000-100000']
d = dict(enumerate(names,1))
df['SquareMetrePrice_range'] = np.vectorize(d.get)(np.digitize(df['SquareMetrePrice'],bins))
df['GeomaticAVMPricePerMetre'] = df.GeomaticAVMPrice / df.WeightedFloorArea
df['GeomaticAVMPricePerMetre_range'] = np.vectorize(d.get)(np.digitize(df['GeomaticAVMPricePerMetre'],bins))
df['SaleDate'] = pd.to_datetime(df['SaleDate'],format='%Y-%m-%d', errors='coerce')
df['OfferingEnd'] = pd.to_datetime(df['OfferingEnd'],format='%Y-%m-%d', errors='coerce')
df['OfferingStart'] = pd.to_datetime(df['OfferingStart'],format='%Y-%m-%d', errors='coerce')
df['Quarter'] = df['SaleDate'].dt.quarter
df['Year'] = df['SaleDate'].dt.year
df['YearQuarter'] = df['Year'].map(str)+ 'Q' +df['Quarter'].map(str)
df['TurnoverTime'] = (df.OfferingEnd - df.OfferingStart)
df['PriceReduction'] = ((df.AskingPrice / df.SalePrice)-1)*100
df['HasMultipleLevels'] = df['Levels'].apply(lambda x: 1 if x > 1 else 0)
