In [131]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from geopy.distance import geodesic
import tqdm.notebook as tqdm
from collections import Counter
import lemmy

### Scraping Boliga website for data

In [173]:
import requests
from bs4 import BeautifulSoup
import re
import json

def houses_boliga(number_houses):
    """
    Returns a list of all ids for houses on boliga
    """
    house_id = list()
    url = "https://www.boliga.dk/resultat"
    
    for i in range(int(number_houses/50)):
        new_url = url + f"?page={i}"
        response = requests.get(new_url)
        html = response.text
        soup = BeautifulSoup(html,"html.parser")
        ids = soup.find_all("a",{"class":"house-list-item"})
        link_houses = list()

        for link in ids:
            link_houses.append(re.findall("(/\d{4,}/)",link["href"])[0].replace("/",""))
        
        house_id.extend(link_houses)
        
    return house_id

def get_info(id_list):
    
    """
    Take an list with ids of houses on boliga and gets specific data about these ids
    
    """
    all_df = list()
    new_keys = ["registeredArea","downPayment","estateUrl","currentArchiveId","forSaleNowId",
                "foreclosureId","selfsaleEstateId","cleanStreet","estateId","latitude","longitude",
               "propertyType","priceChangePercentTotal","energyClass","price","rooms","size","lotSize",
               "floor","buildYear","city","isActive","municipality","zipCode","street",
                "squaremeterPrice","daysForSale","createdDate","basementSize","views"]
    
    for house_id in id_list:
        response = requests.get(f'https://api.boliga.dk/api/v2/estate/{house_id}')
        response = response.json()
        df_dict = {key: response[key] for key in new_keys}
        df = pd.DataFrame(df_dict,index=[0])
        all_df.append(df)

    df = pd.concat(all_df,axis=0,ignore_index=True)
    
    return df

def get_reviews(df):
    bodys = list()
    i=0
    #Finder alle ejendomsmæglere, som har mere end 100 huse til salg
    for value in df["estateUrl"].values:
        estates.append(value[8:15])
    numbers = dict(Counter(estates))
    over_100 = dict() 
    for key, value in numbers.items():
        if value > 100:
            over_100[key] = value
    
    for link in tqdm.tqdm(df["estateUrl"].values[30000:30050]):
        try:
            i += 1
            response = requests.get(link)
            html = response.text
            soup = BeautifulSoup(html,"html.parser")
            
            if link[8:15] =="home.dk": #Home
                ids = soup.find_all("div",{"class":"text"},"p")
                bodys.extend([x.p.text.replace("\n","").strip().lower() for x in ids[0:1] if len(x)>1])
            elif link[8:15] =="ww.skbo": #skbolig
                ids = soup.find_all("div",{"class":"listing-text"})
                bodys.extend([sk.text.replace("\n","").replace("\r","").strip().lower() for sk in ids[0:1] if len(sk)>1])
            elif link[8:15] == "www.nyb": #Nybolig
                ids = soup.find_all("div",{"class":"foldable-spot__container"})
                bodys.extend([ny.text.replace("\n","").strip().lower() for ny in ids[0:1] if len(ny)>1])
            elif link [8:15] == "ww.elto": #Eltoft Nielsen
                ids = soup.find_all("br")
                bodys.extend([elto.text.replace("\n","").strip().lower() for elto in ids[0:1] if len(elto)>1])
            elif link[8:15] == "www.cla": #Claus Borg
                ids = soup.find_all("div",{"id":"case_content"})
                bodys.extend([cla.text.replace("\n","").strip().lower() for cla in ids[0:1] if len(cla)>1])
            elif link[8:15] == "www.lok": #Lokalbolig
                ids = soup.find_all("p")
                loka = [lok.text.replace("\n","").strip().lower() for lok in ids if len(lok.text)>100]
                bodys.extend([''.join(loka)])
            elif link[8:15] == "www.edc": #EDC Bolig
                ids = soup.find_all("div",{"class":"description"})
                bodys.extend([edc.text.replace("\n","").strip().lower() for edc in ids[0:1] if len(edc)>1])
            elif link[8:15] == "adamsch": #Adam Schnack
                ids = soup.find_all("div",{"class":"listing-text"})
                bodys.extend([adam.text.replace("\n","").strip().lower() for adam in ids[0:1] if len(adam)>1])
            elif link[8:20] == "www.estate.d": #Estate
                ids = soup.find_all("div",{"class":"property-description"})
                bodys.extend([est.text.replace("\n","").strip().lower() for est in ids[0:1] if len(est)>1])
            elif link[8:15] == "www.bri": #Brikk Ejendomme
                ids = soup.find_all("div",{"class":"prop-user-content"})
                bodys.extend([bri.text.replace("\n","").strip().lower() for bri in ids[0:1] if len(bri)>1])
            elif link[8:15] == "www.rea": #Realmæglerne
                ids = soup.find_all("div",{"class":"text-full"})
                bodys.extend([rea.text.replace("\n","").strip().lower() for rea in ids[0:1] if len(rea)>1])
            elif link[8:15] == "danboli": #Danbolig
                ids = soup.find_all("div",{"class":"db-description-block"})
                bodys.extend([dan.text.replace("\n","").strip().lower() for dan in ids[0:1] if len(dan)>1])
            elif link[8:15] == "ww.lili": #Lillenhof
                ids = soup.find_all("div",{"class":"inner"})
                bodys.extend([dan.text.replace("\n","").strip().lower() for dan in ids[0:1] if len(dan)>10])
            elif link[8:15] == "bjornby":
                ids = soup.find_all("div",{"class":"content d-md-block d-none wrap-content"})
                bodys.extend([bjor.text.replace("\n","").strip() for bjor in ids[0:1] if len(bjor)>10])
            elif link[8:15] == 'www.hov': #Hovmand
                ids = soup.find_all("div",{"class":"column"})
                bodys.extend([hov.text.replace("\n","").strip() for hov in ids[0:1] if len(hov)>1])
            elif link[8:15] == 'ww.jesp': #Jesper Nielsen
                ids = soup.find_all("div",{"class":"case-description"})
                bodys.extend([jesp.text.replace("\n","").strip() for jesp in ids[0:1] if len(jesp)>1])
            elif link[8:15] == "www.sel": #Selvsalg
                ids = soup.find_all("div",{"class":"tab-pane active fade in"})
                bodys.extend([selv.text.replace("\n","").strip() for selv in ids[0:1] if len(selv)>1])
            elif link[8:15] == "www.bol": #Bolig
                ids = soup.find_all("div",{"class":"description col-md-16"})
                bodys.extend([bol.text.replace("\n","").strip() for bol in ids[0:1] if len(bol)>1])
            elif link[8:15] == 'www.joh': #Johns
                ids = soup.find_all("div",{"class":"column"})
                bodys.extend([john.text.replace("\n","").strip() for john in ids[0:1] if len(john)>1])
            elif link[8:15] == "racking": #Robinhus
                ids = soup.find_all("div",{"class":"text-container"})
                bodys.extend([robin.text.replace("\n","").strip() for robin in ids[0:1] if len(robin)>1])
            elif link[8:15] == "www.min": #minbolighandel
                ids = soup.find_all("div",{"class":"description col-md-16"})
                bodys.extend([minb.text.replace("\n","").strip() for minb in ids[0:1] if len(minb)>1])
            #elif link[8:15] == "www.nor":#NordFynBo
            #    None
            #elif link[8:15] == "ww.boli": #BoligtilBolig
            #    None
            elif link[8:15] == "ww.unni": #Unnibolig
                ids = soup.find_all("div",{"class":"column"})
                bodys.extend([un.text.replace("\n","").strip() for un in ids[0:1] if len(un)>1])
            elif link[8:15] == "www.sdb": #Sdb bolig
                ids = soup.find_all("div",{"class":"column"})
                bodys.extend([un.text.replace("\n","").strip() for un in ids[0:1] if len(un)>1])
            elif link[8:15] == "ww.land":#Landobolig
                ids = soup.find_all("div",{"class":"col-md-8"})
                bodys.extend([land.text.replace("\n","").strip() for land in ids[0:1] if len(land)>1])
            elif link[8:15] == "www.ber": #Bermistof
                ids = soup.find_all("div",{"class":"column"})
                bodys.extend([ber.text.replace("\n","").strip() for ber in ids[0:1] if len(ber)>1])
            elif link[8:20] == "www.estaldo.":#Estaldo
                None
            elif link[8:15] == "www.car": #Carsten Nordbo
                ids = soup.find_all("div",{"class":"description col-md-16"})
                bodys.extend([car.text.replace("\n","").strip() for car in ids[0:1] if len(car)>1])
            elif link[8:15] == 'ww.agri':
                ids = soup.find_all("div",{"class":"col-md-8 col-sm-7 hidden-xs text-box desktop"})
                bodys.extend([agr.text.replace("\n","").strip() for agr in ids[0:1] if len(agr)>1])
            elif link[8:15] in over_100.keys():
                print(link,"not in loop")
                print(link[8:15])
                
                
            if len(bodys) != i:
                bodys.append(np.nan)
        except:
            print(link,"virkede ikke")
            continue
    
    return bodys

def find_realtors(df):
    realtors_link = list()
    #Finder alle ejendomsmæglere, som har mere end 100 huse til salg
    estates = list()
    for value in df["estateUrl"].values:
        estates.append(value[8:15])
    numbers = dict(Counter(estates))

    over_100 = dict() 
    for key, value in numbers.items():
        if value > 100:
            over_100[key] = value
            
    for link in tqdm.tqdm(df["estateUrl"].values[40000:50000]):        
        if link[8:15] in over_100.keys():
            print(link,"not in loop")
            print(link[8:15])
            realtors_link.append(link[8:15])

    return realtors_link
    
def preprocess_csv(csv):
    """
    This function loads the dataset from boliga annd preproccesses it.
    """
    df = pd.read_csv(csv)
    y = np.array(df["price"])
    
    return df

def preprocess_text(string):
    lemmatizer = lemmy.load("da")
    string = lemmatizer.lemmatize("",string)[0].split()

    return string

def words_count(list_of_strings):
    sentences = list_of_strings
    counts = dict(Counter(sentences))
    return counts

def add_lonlat(df,df_station):
    min_dist = list()

    for location in tqdm(df["location"]):
        distance = list()
        for lon,lat in zip(df_station["lon"],df_station["lat"]):
            distance.append(geodesic((lat,lon), location).km)
    
        min_dist.append(min(distance))
        
    df["dist_station"] = min_dist
    return df

In [3]:
df = preprocess_csv("house_data.csv")
len(np.array(df["estateUrl"].values))

52093

In [7]:
df["dist_station"] = min_dist
df.to_csv("house_data_staion.csv")

In [174]:
find_realtors(df)

HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))

http://www.lilienhoff.dk/redirect.htm?sag=30000017&mgl=2389&DID=135&udbudsform=salg not in loop
ww.lili
https://www.nybolig.dk/villa/8660/blishoenedalen/102248/29722 not in loop
www.nyb
https://www.nybolig.dk/ejerlejlighed/8660/adelgade/102248/r990801 not in loop
www.nyb
https://www.nybolig.dk/ejerlejlighed/8660/adelgade/102248/990642 not in loop
www.nyb
https://www.johnfrandsen.dk/sag/337684/raadyrvej-19-8660-skanderborg/ not in loop
www.joh
https://danbolig.dk/bolig/skanderborg/8660/villa/299-19-1644-299 not in loop
danboli
https://home.dk/boligkatalog/skanderborg/8660/huse-villaer/borgergade_19_6070000275.aspx not in loop
home.dk
https://www.johnfrandsen.dk/sag/264167/stadionvej-3-8660-skanderborg/ not in loop
www.joh
https://home.dk/boligkatalog/skanderborg/8660/grunde/baastrupvej_15_607-01623.aspx not in loop
home.dk
http://www.lilienhoff.dk/redirect.htm?sag=8660A895&mgl=2389&DID=135&udbudsform=salg not in loop
ww.lili
https://home.dk/boligkatalog/skanderborg/8660/landejendomme/ly

https://www.edc.dk/alle-boliger/horsens/8700/sundg%C3%A5rdsvej-68/?sagsnr=87003472 not in loop
www.edc
https://www.edc.dk/alle-boliger/horsens/8700/f%C3%B8lfoden-3/?sagsnr=87104720 not in loop
www.edc
https://www.edc.dk/alle-boliger/horsens/8700/dalvej-5/?sagsnr=87104368 not in loop
www.edc
https://www.edc.dk/alle-boliger/horsens/8700/b%C3%B8geh%C3%B8jvej-29/?sagsnr=87104357 not in loop
www.edc
https://www.nybolig.dk/ejerlejlighed/8700/jennykammersgaardsvej/270182/10306 not in loop
www.nyb
https://www.edc.dk/alle-boliger/horsens/8700/hanstedvej-59/?sagsnr=87104166 not in loop
www.edc
http://www.agriteam.dk/sag.asp?sagsnr=H-2861&mgl=1419&DID=135&udbudsform=salg not in loop
ww.agri
http://www.agriteam.dk/sag.asp?sagsnr=H-2802&mgl=1419&DID=135&udbudsform=salg not in loop
ww.agri
https://www.edc.dk/alle-boliger/horsens/8700/dalagervej-62/?sagsnr=87104868 not in loop
www.edc
https://www.edc.dk/alle-boliger/horsens/8700/babettes-vej-5/?sagsnr=87104919 not in loop
www.edc
https://www.nybolig.

www.edc
https://www.nybolig.dk/villa/8781/laerkevej/250034/210802 not in loop
www.nyb
https://www.edc.dk/alle-boliger/stenderup/8781/hedenstedvej-11/?sagsnr=71202967 not in loop
www.edc
https://www.edc.dk/alle-boliger/hornsyld/8783/s%C3%B8ndergade-72/?sagsnr=87202742 not in loop
www.edc
https://www.estate.dk/grund/8783/snebaervej/270191/2017g425 not in loop
www.est
https://www.estate.dk/villa/8783/nederbjerrevej/270191/2018v252 not in loop
www.est
https://home.dk/boligkatalog/hedensted/8783/huse-villaer/bjerrevej_321_bjerre_7170000062.aspx not in loop
home.dk
https://danbolig.dk/bolig/hedensted/8783/villa/082v1262188-082 not in loop
danboli
https://home.dk/boligkatalog/hedensted/8783/huse-villaer/bjoernkaervej_6_7210000277.aspx not in loop
home.dk
https://www.realmaeglerne.dk/bolig/6960124-nebsager-kirkevej-26 not in loop
www.rea
https://home.dk/boligkatalog/hedensted/8783/huse-villaer/skovparken_7_7210000219.aspx not in loop
home.dk
https://www.realmaeglerne.dk/bolig/6960182-braaskovv

https://www.johnfrandsen.dk/sag/155354/skovlunden-66-8831-loegstrup/ not in loop
www.joh
https://www.johnfrandsen.dk/sag/369360/laerkebakken-12-8831-loegstrup/ not in loop
www.joh
https://danbolig.dk/bolig/viborg/8831/fritidsbolig/096-00-8929-096 not in loop
danboli
https://www.nybolig.dk/villa/8831/bavnevej/230042/20200029 not in loop
www.nyb
https://www.nybolig.dk/fritidshus/8831/isfuglebakken/230042/20200379 not in loop
www.nyb
https://www.nybolig.dk/villa/8831/snabevej/230042/20200157 not in loop
www.nyb
https://www.nybolig.dk/villa/8831/snabevej/230042/20200156 not in loop
www.nyb
https://www.nybolig.dk/villa/8831/snabevej/230042/20200155 not in loop
www.nyb
https://www.nybolig.dk/fritidshus/8831/isfuglebakken/230042/20200381 not in loop
www.nyb
https://danbolig.dk/bolig/viborg/8831/villa/096-00-8976-096 not in loop
danboli
https://www.nybolig.dk/fritidshus/8831/isfuglebakken/230042/20200380 not in loop
www.nyb
https://www.nybolig.dk/fritidshus/8831/isfuglebakken/230042/20200377 n

https://danbolig.dk/bolig/randers/8900/andelsbolig/2742019a004-274 not in loop
danboli
https://home.dk/boligkatalog/randers/8900/andelsboliger/hospitalsgade_7_st_tv_601f00875.aspx not in loop
home.dk
https://www.edc.dk/alle-boliger/randers-c/8900/hospitalsgade-7-2-tv/?sagsnr=89303840 not in loop
www.edc
https://www.estate.dk/andelsbolig/8900/sennelsgade/270221/0075 not in loop
www.est
https://www.johnfrandsen.dk/sag/327533/steen-blichers-gade-1b-8900-randers-c/ not in loop
www.joh
https://www.realmaeglerne.dk/bolig/472288-sennelsgade-17-1-th not in loop
www.rea
https://www.nybolig.dk/ejerlejlighed/8900/oestervold/102147/9579 not in loop
www.nyb
https://www.edc.dk/alle-boliger/randers-c/8900/hospitalsgade-3-1-th/?sagsnr=85503892 not in loop
www.edc
https://www.edc.dk/alle-boliger/randers-c/8900/jernbanegade-16-1-tv/?sagsnr=89303929 not in loop
www.edc
https://www.nybolig.dk/andelsbolig/8900/steenblichersgade/270141/2563 not in loop
www.nyb
https://www.edc.dk/alle-boliger/randers-c/8900/

https://www.edc.dk/alle-boliger/randers-n%C3%B8/8930/hadsundvej-76/?sagsnr=85503408 not in loop
www.edc
https://danbolig.dk/bolig/randers/8930/villa/2742020v140-274 not in loop
danboli
https://www.nybolig.dk/villa/8930/noerrebrogade/270141/2420 not in loop
www.nyb
https://www.nybolig.dk/villa/8930/lundsbjergvej/102147/8450 not in loop
www.nyb
https://danbolig.dk/bolig/randers/8930/villa/2742018v157-274 not in loop
danboli
http://www.landbogruppen.dk/sag/?sagsnr=2051919A&mgl=1706&DID=135&udbudsform=salg not in loop
ww.land
https://www.johnfrandsen.dk/sag/319299/birkevej-16-8930-randers-noe/ not in loop
www.joh
https://www.nybolig.dk/villa/8930/fjordbakken/270141/2538 not in loop
www.nyb
https://www.edc.dk/alle-boliger/randers-n%C3%B8/8930/lindevej-9/?sagsnr=85503893 not in loop
www.edc
https://www.realmaeglerne.dk/bolig/4721993-skovlyvej-2b not in loop
www.rea
https://www.edc.dk/alle-boliger/randers-n%C3%B8/8930/valmuevej-3/?sagsnr=89303685 not in loop
www.edc
https://www.estate.dk/vill

https://www.edc.dk/alle-boliger/alling%C3%A5bro/8961/byparken-62/?sagsnr=89002187 not in loop
www.edc
https://www.nybolig.dk/villa/8961/fyrreparken/250080/2310467 not in loop
www.nyb
https://www.edc.dk/alle-boliger/alling%C3%A5bro/8961/%C3%A5lykkevej-1/?sagsnr=89002055 not in loop
www.edc
https://www.edc.dk/alle-boliger/alling%C3%A5bro/8961/t%C3%B8jstrupvej-8/?sagsnr=89001922 not in loop
www.edc
https://www.johnfrandsen.dk/sag/120343/hovedvejen-102-8963-auning/ not in loop
www.joh
https://danbolig.dk/bolig/norddjurs/8963/villa/288-2018209-288 not in loop
danboli
https://www.johnfrandsen.dk/sag/131070/skovlunden-1-8963-auning/ not in loop
www.joh
https://www.johnfrandsen.dk/sag/353014/mortensensvej-20a-8963-auning/ not in loop
www.joh
https://www.johnfrandsen.dk/sag/178369/syrenvej-10-8963-auning/ not in loop
www.joh
https://www.johnfrandsen.dk/sag/120187/tjoernevej-2-8963-auning/ not in loop
www.joh
https://www.boligmaegleren.dk/redirect/?caseno=SVE-00003 not in loop
www.bol
https://ww

https://www.realmaeglerne.dk/bolig/47420140-heimdalsgade-21 not in loop
www.rea
https://www.edc.dk/alle-boliger/aalborg/9000/vejg%C3%A5rd-m%C3%B8llevej-3-1-tv/?sagsnr=90004912 not in loop
www.edc
https://home.dk/boligkatalog/aalborg/9000/raekkehuse/gammel_aavej_10a_moelholm_8060000255.aspx not in loop
home.dk
https://www.htbolig.dk/r/?Sagsnr=52-00317&MglID=2552&DID=135&udbudsform=salg not in loop
www.htb
https://www.htbolig.dk/r/?Sagsnr=52-00325&MglID=2552&DID=135&udbudsform=salg not in loop
www.htb
https://www.nybolig.dk/ejerlejlighed/9000/egeparken/240100/550049 not in loop
www.nyb
https://danbolig.dk/bolig/aalborg/9000/ejerlejlighed/310tk000273-310 not in loop
danboli
https://www.htbolig.dk/r/?Sagsnr=52-00051&MglID=2552&DID=135&udbudsform=salg not in loop
www.htb
https://www.realmaeglerne.dk/bolig/47420064-vester-faelledvej-1 not in loop
www.rea
https://www.edc.dk/alle-boliger/aalborg/9000/hadsundvej-126a/?sagsnr=91501074 not in loop
www.edc
https://home.dk/boligkatalog/aalborg/9000

https://www.edc.dk/alle-boliger/svenstrup-j/9230/sj%C3%A6llandsg%C3%A5rden-11/?sagsnr=92302056 not in loop
www.edc
https://www.htbolig.dk/r/?Sagsnr=51-00230&MglID=2551&DID=135&udbudsform=salg not in loop
www.htb
https://www.edc.dk/alle-boliger/svenstrup-j/9230/lobovej-9/?sagsnr=92301967 not in loop
www.edc
https://www.edc.dk/alle-boliger/svenstrup-j/9230/ellidsh%C3%B8jvej-82/?sagsnr=92302046 not in loop
www.edc
https://home.dk/boligkatalog/aalborg/9230/raekkehuse/tostrupvej_47a_8230000134.aspx not in loop
home.dk
https://www.edc.dk/alle-boliger/svenstrup-j/9230/doktorparken-74/?sagsnr=92301565 not in loop
www.edc
https://www.edc.dk/alle-boliger/svenstrup-j/9230/%C3%B8rnevej-57/?sagsnr=92302103 not in loop
www.edc
https://www.edc.dk/alle-boliger/svenstrup-j/9230/klokkevej-39/?sagsnr=92301860 not in loop
www.edc
https://www.edc.dk/alle-boliger/svenstrup-j/9230/vesterhedenvej-70/?sagsnr=92302011 not in loop
www.edc
https://www.edc.dk/alle-boliger/svenstrup-j/9230/snedronningens-vej-18/?sa

https://www.nybolig.dk/villa/9300/skovalleen/105085/933119 not in loop
www.nyb
https://www.realmaeglerne.dk/bolig/hans-dyres-vej-11 not in loop
www.rea
https://www.nybolig.dk/villa/9300/elisabethbillesvej/105085/933424 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/solsbaekvej/105085/933280 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/vinkelvej/105085/933341 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/hansbuchsvej/105085/933419 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/virkelyst/105085/933092 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/strandgade/105085/933269 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/virkelyst/105085/933415 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/gasvaerksvej/105085/933423 not in loop
www.nyb
https://www.nybolig.dk/ejerlejlighed/9300/soendergade/105085/933308 not in loop
www.nyb
https://www.nybolig.dk/villa/9300/boelsmindevej/105085/933289 not in loop
www.nyb
https://www.nybolig.dk/ejerlejlighed

www.rea
https://www.realmaeglerne.dk/bolig/soehesten-15 not in loop
www.rea
https://www.realmaeglerne.dk/bolig/soehesten-11 not in loop
www.rea
https://www.nybolig.dk/fritidshus/9370/rasmusjensensvej/105912/160444 not in loop
www.nyb
https://www.nybolig.dk/fritidshus/9370/houvest/105912/160541 not in loop
www.nyb
https://www.edc.dk/alle-boliger/hals/9370/vestvej-23/?sagsnr=93802157 not in loop
www.edc
https://www.nybolig.dk/villa/9370/lyngby/105912/hv7815 not in loop
www.nyb
https://www.nybolig.dk/grund/9370/strandvaenget/105912/hfg6740 not in loop
www.nyb
https://www.edc.dk/alle-boliger/hals/9370/saturnvej-8/?sagsnr=93800630 not in loop
www.edc
https://www.nybolig.dk/fritidshus/9370/svanevej/105352/2531 not in loop
www.nyb
https://www.edc.dk/alle-boliger/hals/9370/damvej-6/?sagsnr=93802003 not in loop
www.edc
https://www.realmaeglerne.dk/bolig/strandvejen-78 not in loop
www.rea
https://home.dk/boligkatalog/aalborg/9370/sommerhuse/ritavej_6_828-00463.aspx not in loop
home.dk
https://ho

https://www.edc.dk/alle-boliger/n%C3%B8rresundby/9400/lindholmsvej-131/?sagsnr=94002111 not in loop
www.edc
https://www.edc.dk/alle-boliger/n%C3%B8rresundby/9400/carl-klitgaards-vej-33-st-th/?sagsnr=94002149 not in loop
www.edc
https://www.edc.dk/alle-boliger/n%C3%B8rresundby/9400/gyldenlakvej-25/?sagsnr=94002103 not in loop
www.edc
https://www.nybolig.dk/andelsbolig/9430/sneumvej/240008/280503 not in loop
www.nyb
https://www.nybolig.dk/grund/9430/vesterhalnevej/240008/281209 not in loop
www.nyb
https://danbolig.dk/bolig/jammerbugt/9430/villa/14918003003-149 not in loop
danboli
https://maeglerhuset.dk/redirect.php?sagsnr=003997&mgl=2336 not in loop
maegler
https://www.htbolig.dk/r/?Sagsnr=51-00491&MglID=2551&DID=135&udbudsform=salg not in loop
www.htb
https://www.nybolig.dk/grund/9430/havrevaenget/240109/10051 not in loop
www.nyb
https://www.johnfrandsen.dk/sag/211451/gammel-thistedvej-15-9430-vadum/ not in loop
www.joh
https://maeglerhuset.dk/redirect.php?sagsnr=004096&mgl=158 not in 

https://www.edc.dk/alle-boliger/l%C3%B8kken/9480/ingeborgvej-2-2406/?sagsnr=94802445 not in loop
www.edc
https://www.villadsenbolig.dk/redirect/?caseno=VE0000085 not in loop
www.vil
https://www.edc.dk/alle-boliger/l%C3%B8kken/9480/ingeborgvej-2-706/?sagsnr=94802432 not in loop
www.edc
https://www.edc.dk/alle-boliger/l%C3%B8kken/9480/stationsvej-20/?sagsnr=94801970 not in loop
www.edc
https://www.edc.dk/alle-boliger/l%C3%B8kken/9480/ingeborgvej-2-907/?sagsnr=94802215 not in loop
www.edc
https://www.nybolig.dk/villa/9480/loekkensvej/240064/25012628 not in loop
www.nyb
http://www.calundan.dk/Default.aspx?id=46&sagsnr=HJ7604B&mgl=1694&DID=135&udbudsform=salg not in loop
ww.calu
https://www.edc.dk/alle-boliger/l%C3%B8kken/9480/solkrogen-9/?sagsnr=94802679 not in loop
www.edc
https://www.edc.dk/alle-boliger/l%C3%B8kken/9480/soltoften-13/?sagsnr=94802690 not in loop
www.edc
https://www.edc.dk/alle-boliger/l%C3%B8kken/9480/ravnebakken-12/?sagsnr=98402025 not in loop
www.edc
https://www.edc.dk/

https://www.edc.dk/alle-boliger/hobro/9500/m%C3%B8llebakken-66/?sagsnr=95207227 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/randersvej-35/?sagsnr=95207087 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/b%C3%B8gevej-3/?sagsnr=95207104 not in loop
www.edc
https://danbolig.dk/bolig/viborg/9500/villa/47520200012-475 not in loop
danboli
https://www.edc.dk/alle-boliger/hobro/9500/koldb%C3%A6ksvej-18/?sagsnr=95207077 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/r%C3%B8jdrup-hede-2/?sagsnr=95206745 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/birkegade-6/?sagsnr=95207210 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/vibevej-17/?sagsnr=95205533 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/ranunkelvej-14/?sagsnr=95206716 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/d%C3%B8strupvej-175/?sagsnr=95207177 not in loop
www.edc
https://www.edc.dk/alle-boliger/hobro/9500/karlsvej-2

www.joh
https://www.johnfrandsen.dk/sag/288836/vestergade-10-9560-hadsund/ not in loop
www.joh
https://www.johnfrandsen.dk/sag/142234/gyvelvej-5-9560-hadsund/ not in loop
www.joh
https://www.edc.dk/alle-boliger/hadsund/9560/vestergade-30/?sagsnr=95602887 not in loop
www.edc
https://www.edc.dk/alle-boliger/hadsund/9560/br%C3%B8ndb%C3%A6kken-72/?sagsnr=95802478 not in loop
www.edc
https://www.edc.dk/alle-boliger/hadsund/9560/plantagevej-18b/?sagsnr=95602664 not in loop
www.edc
https://www.edc.dk/alle-boliger/hadsund/9560/fjordbakken-9/?sagsnr=95602386 not in loop
www.edc
https://www.edc.dk/alle-boliger/hadsund/9560/storegade-7/?sagsnr=95602886 not in loop
www.edc
https://www.edc.dk/alle-boliger/hadsund/9560/stenkisten-31/?sagsnr=95801021 not in loop
www.edc
https://www.edc.dk/alle-boliger/hadsund/9560/%C3%B8ster-hurupvej-38/?sagsnr=95602822 not in loop
www.edc
https://www.johnfrandsen.dk/sag/372872/samsoegade-14-9560-hadsund/ not in loop
www.joh
https://www.nybolig.dk/villa/9560/oddevej/

https://www.johnfrandsen.dk/sag/246439/baunebjergvej-5-9640-farsoe/ not in loop
www.joh
https://www.realmaeglerne.dk/bolig/46600741-postbakken-11 not in loop
www.rea
https://www.realmaeglerne.dk/bolig/46600650-graaboelle-24 not in loop
www.rea
https://danbolig.dk/bolig/vesthimmerlands/9640/fritidsbolig/258v2000147-258 not in loop
danboli
https://www.realmaeglerne.dk/bolig/46600644-noerregade-8 not in loop
www.rea
https://www.realmaeglerne.dk/bolig/46600408-sundvej-48-hvalpsund not in loop
www.rea
https://www.nybolig.dk/villa/9640/loegstoervej/100343/3160 not in loop
www.nyb
https://www.maegleren.dk/sag.aspx?sagsnr=2917&mgl=2563&DID=135&udbudsform=salg not in loop
www.mae
https://www.johnfrandsen.dk/sag/356177/strandvejen-35a-9640-farsoe/ not in loop
www.joh
https://www.nybolig.dk/villa/9640/golfvaenget/240101/20165 not in loop
www.nyb
https://www.realmaeglerne.dk/bolig/46600527-pilevej-4-hvalpsund not in loop
www.rea
http://www.boligone.dk/sag.aspx?sagsnr=843-1335&mgl=2732&DID=135&udbu

https://www.edc.dk/alle-boliger/ranum/9681/s%C3%B8lvm%C3%A5gen-616/?sagsnr=96704641 not in loop
www.edc
https://www.nybolig.dk/grund/9690/boegevej/105145/5030 not in loop
www.nyb
https://www.nybolig.dk/grund/9690/kollerupklitvej/105145/5106 not in loop
www.nyb
https://www.nybolig.dk/grund/9690/egevej/105145/5152 not in loop
www.nyb
https://www.nybolig.dk/grund/9690/egevej/105145/5101 not in loop
www.nyb
https://www.johnfrandsen.dk/sag/344617/sidevaenget-4-9690-fjerritslev/ not in loop
www.joh
https://www.maegleren.dk/sag.aspx?sagsnr=4463&mgl=2563&DID=135&udbudsform=salg not in loop
www.mae
https://www.maegleren.dk/sag.aspx?sagsnr=3399&mgl=2563&DID=135&udbudsform=salg not in loop
www.mae
https://www.nybolig.dk/grund/9690/hjortdalvej/105145/5089 not in loop
www.nyb
https://www.nybolig.dk/grund/9690/hjortevej/105145/5038 not in loop
www.nyb
https://www.maegleren.dk/sag.aspx?sagsnr=4204&mgl=2563&DID=135&udbudsform=salg not in loop
www.mae
https://www.selvsalg.dk/bolig/4705/ahornvej_56-broe

['ww.lili',
 'www.nyb',
 'www.nyb',
 'www.nyb',
 'www.joh',
 'danboli',
 'home.dk',
 'www.joh',
 'home.dk',
 'ww.lili',
 'home.dk',
 'home.dk',
 'danboli',
 'www.joh',
 'home.dk',
 'home.dk',
 'danboli',
 'www.lok',
 'home.dk',
 'home.dk',
 'home.dk',
 'ww.lili',
 'www.joh',
 'home.dk',
 'home.dk',
 'www.nyb',
 'www.nyb',
 'www.nyb',
 'home.dk',
 'www.nyb',
 'ww.lili',
 'www.nyb',
 'home.dk',
 'www.nyb',
 'www.nyb',
 'www.nyb',
 'home.dk',
 'ww.lili',
 'www.nyb',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.nyb',
 'www.edc',
 'www.nyb',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'www.edc',
 'ww

In [170]:
over_100

{'https://home.dk': 7282,
 'https://www.nyb': 9459,
 'https://www.lok': 1203,
 'http://www.elto': 180,
 'https://www.edc': 10535,
 'https://www.est': 2481,
 'https://www.bri': 121,
 'https://www.rea': 2560,
 'https://danboli': 6492,
 'http://www.lili': 295,
 'http://www.paul': 119,
 'https://www.sel': 315,
 'https://www.car': 338,
 'https://www.bol': 116,
 'http://tracking': 611,
 'http://www.boli': 347,
 'http://www.itva': 159,
 'https://www.min': 207,
 'http://www.vest': 189,
 'https://www.tho': 271,
 'http://www.thom': 105,
 'https://www.pla': 113,
 'http://www.land': 177,
 'https://www.joh': 1548,
 'https://www.mæg': 119,
 'https://www.mik': 221,
 'http://www.agri': 125,
 'https://grundsa': 184,
 'https://www.mae': 189,
 'http://grundsal': 131,
 'https://byggegr': 139,
 'https://www.htb': 141,
 'https://maegler': 155,
 'https://www.vil': 179,
 'http://www.calu': 110}

In [117]:
"https://www.estate.dk/villa/5683/kildevej/270085/115819"[8:20]

'www.estate.d'

In [118]:
test ="https://www.estaldo.com/listing/c528a1bcf9b947a29fec7415d1087451"
test[8:20]

'www.estaldo.'

In [156]:
bodys = get_reviews(df)
bodys

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))




['i det smukke grejsdalen finder i denne skønne villa, på højtbeliggende grund med god udsigt til skoven som omkranser hele området. grejsdalen byder bl.a. på skole, sportsforening, og så er der ikke langt til vejle centrum. motorvejsnettet ligger heller ikke langt væk, man skal blot køre op igennem grejs, og så er man ved motorvejen.\xa0villaen byder på hele 197 kvadratmeter bolig, fordelt på to plan. i stueplan træder i ind i et stort og lækkert bryggers med god plads til vaskemaskine og tørretumbler, samt god garderobeplads.\xa0herfra træder man ind i husets egentlige entre, hvorfra der er adgang til husets dejligt store badeværelse. fra entreen går man videre ind i fordelingsgangen hvor der er gæstetoilet, og trappe til første sal.\xa0forbi trappen træder man ind i køkkenet, som er i fantastisk god åben forbindelse til alrum, og også til den store lyse stue som ligger i forlængelse af alrummet.\xa0på første sal kommer i op til endnu en stue, og her er der også udgang til altan. her

In [157]:
all_text = list()
for string in bodys[0:3]:
    all_text.extend(preprocess_text(string))
    
test_counts = words_count(all_text)

In [158]:
test_counts

{'i': 31,
 'det': 5,
 'smukke': 1,
 'grejsdalen': 2,
 'finder': 3,
 'denne': 4,
 'skønne': 2,
 'villa,': 2,
 'på': 12,
 'højtbeliggende': 1,
 'grund': 1,
 'med': 15,
 'god': 7,
 'udsigt': 1,
 'til': 18,
 'skoven': 2,
 'som': 6,
 'omkranser': 1,
 'hele': 4,
 'området.': 2,
 'byder': 2,
 'bl.a.': 1,
 'skole,': 1,
 'sportsforening,': 1,
 'og': 31,
 'så': 5,
 'er': 25,
 'der': 9,
 'ikke': 5,
 'langt': 3,
 'vejle': 5,
 'centrum.': 1,
 'motorvejsnettet': 1,
 'ligger': 4,
 'heller': 1,
 'væk,': 1,
 'man': 15,
 'skal': 2,
 'blot': 1,
 'køre': 1,
 'op': 3,
 'igennem': 2,
 'grejs,': 1,
 'ved': 2,
 'motorvejen.': 1,
 'villaen': 1,
 '197': 1,
 'kvadratmeter': 1,
 'bolig,': 1,
 'fordelt': 2,
 'to': 3,
 'plan.': 3,
 'stueplan': 4,
 'træder': 3,
 'ind': 4,
 'et': 11,
 'stort': 1,
 'lækkert': 1,
 'bryggers': 1,
 'plads': 2,
 'vaskemaskine': 1,
 'tørretumbler,': 1,
 'samt': 8,
 'garderobeplads.': 1,
 'herfra': 1,
 'husets': 4,
 'egentlige': 1,
 'entre,': 1,
 'hvorfra': 1,
 'adgang': 3,
 'dejligt': 1,
 

In [52]:
df["estateUrl"].values[201]

'https://home.dk/boligkatalog/koebenhavn/1428/ejerlejligheder/andreas_bjoerns_gade_8_1_th_1160000295.aspx'

In [175]:

link = "http://www.agriteam.dk/sag.asp?sagsnr=H-2802&mgl=1419&DID=135&udbudsform=salg"

response = requests.get(link)
html = response.text
soup = BeautifulSoup(html,"html.parser")
ids = soup.find_all("div",{"class":"col-md-8 col-sm-7 hidden-xs text-box desktop"})
print(len(ids))
for p in ids[0:1]:
    print(p.text)

1
Lystejendom med ca. 1,6 ha få km fra Lund nabo til Vinten SkovRødstensejendom beliggende med kort afstand til Lund og ca. 4 km til motorvejs af-/tilkørsel. Fra ejendommen er der kun ca. 9 km til Horsens midtby. Dejlig lystejendom, pæn og velholdt, bestående af et stort rødstensstuehus samt en rødstens udhusbygning, der omkranser gårdspladsen, der har to ind-/udkørsler. Bag udhusbygningen er der en maskinhal, dybstrøelsesstald, som ligger placeret med direkte udgang til ejendommens jordareal på ca. 1,6 ha. Stuehuset har et samlet beboelsesareal på 259 m2 fordelt på 1 1/2 plan, som løbende gennem årene har gennemgået modernisering og tilbygning. Udhusbygningen er udnyttet med henholdsvis et fyrrum installeret med et stokerfyr, værksted, depotrum samt garage. Maskinhallen ligger lidt for sig selv, placeret således der er direkte udgang til jordarealet, og vil være velegnet at anvende som evt. løsdriftsstald. På taget af maskinhalen er ejendommens solcelleanlæg. På hver side af bygningsp

In [176]:
link[8:15]

'ww.agri'