# Scraping the bottom of the barrel

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import time
import urllib
import re
import os

from providers import ParariusProvider
from utils import Entry
import webbrowser

import pickle

In [3]:
urls = {
  "Funda": "https://www.funda.nl/zoeken/huur?selected_area=%5B%22den-haag%22%5D&price=%22-1750%22",
  "Pararius": "/apartments/den-haag/0-1750/2-bedrooms"
}

# Too good to be true threshold (probably fake listing)
tgtbt_area_price_threshold = 13
# Tiny Room Threshold (the rooms are just too small)
tiny_room_threshold = 15

In [4]:
import requests
from bs4 import BeautifulSoup

response = requests.get(f"https://www.funda.nl/zoeken/huur")
soup = BeautifulSoup(response.content, 'html.parser')
soup

<!DOCTYPE html>

<html lang="nl">
<head>
<meta charset="utf-8"/>
<title>Je bent bijna op de pagina die je zoekt [funda]</title>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<script>
    (function(){
        /*! grunt-grunticon Stylesheet Loader - v2.1.6 | https://github.com/filamentgroup/grunticon | (c) 2015 Scott Jehl, Filament Group, Inc. | MIT license. */

!function(){function e(e,t){function n(){!o&&t&&(o=!0,t.call(e))}var o;e.addEventListener&&e.addEventListener("load",n),e.attachEvent&&e.attachEvent("onload",n),"isApplicationInstalled"in navigator&&"onloadcssdefined"in e&&e.onloadcssdefined(n)}!function(e){"use strict";var t=function(t,n,o){function r(e){if(d.body)return e();setTimeout(function(){r(e)})}function a(){c.addEventListener&&c.removeEventListener("load",a),c.media=o||"all"}var i,d=e.document,c=d.createElement("link");if(n)i=n;else{var s=(d.body||d.getElementsByTagName("head")[0]).childNodes;i=s[s.length-1]}var u=d.styleSheets;c.rel="stylesheet"

In [5]:
pp = ParariusProvider(urls['Pararius'])
pp.query_entries()
pp.get_detailed_results()

100%|██████████| 3/3 [00:01<00:00,  2.20it/s]
100%|██████████| 86/86 [00:04<00:00, 20.53it/s]


  self.entries[i].description = BeautifulSoup(soup.find("div", class_="listing-detail-description__additional listing-detail-description__additional--collapsed").get_text().strip()).get_text()


In [6]:
df = pd.DataFrame(pp.entries)

df['Price per area'] = df['price'] / df['area']
df['Price per room'] = df['price'] / df['rooms']
df['Area per room'] = df['area'] / df['rooms']

initial_rows = df.shape[0]

df.head()

Unnamed: 0,title,link,location,price,area,rooms,furnished,description,address,rented,Price per area,Price per room,Area per room
0,Flat Bezuidenhoutseweg 365,https://www.pararius.com/apartment-for-rent/de...,2594 AR Den Haag (Bezuidenhout-Oost),1500,82,4,Part-furnished,Description\nFREDERIK HENDRIKLAAN 225-I THE HA...,"{'street': 'Frederik Hendriklaan 225 I', 'city...",,18.292683,375.0,20.5
1,Flat Van Leeuwenhoekstraat,https://www.pararius.com/apartment-for-rent/de...,2516 GV Den Haag (Laakhaven-Oost),1395,84,4,Part-furnished,Description\nPlease note: the displayed photos...,"{'street': 'Brouwersgracht', 'city': 'Den Haag...",,16.607143,348.75,21.0
2,Flat Loosduinse Uitleg,https://www.pararius.com/apartment-for-rent/de...,2553 AV Den Haag (Kom Loosduinen),1255,88,3,,Description\nFully renovated and spacious 2 be...,"{'street': 'Beeklaan', 'city': 'Den Haag', 'po...",,14.261364,418.333333,29.333333
3,Flat Joan Maetsuyckerstraat,https://www.pararius.com/apartment-for-rent/de...,2593 ZP Den Haag (Bezuidenhout-Oost),1375,82,3,Furnished,Description\nDelightful 3/4-room apartment on ...,"{'street': 'Bezuidenhoutseweg 365', 'city': 'D...",Rented under option,16.768293,458.333333,27.333333
4,Flat Korte Houtstraat,https://www.pararius.com/apartment-for-rent/de...,2511 DA Den Haag (Uilebomen),1395,66,3,Part-furnished,Description\nLoosduinse Uitleg 52 tm 206 te De...,"{'street': 'Loosduinse Uitleg', 'city': 'Den H...",,21.136364,465.0,22.0


In [7]:
# Properties which are too good to be true
df[df['Price per area'] < tgtbt_area_price_threshold]

Unnamed: 0,title,link,location,price,area,rooms,furnished,description,address,rented,Price per area,Price per room,Area per room
30,Flat Ruimzicht,https://www.pararius.com/apartment-for-rent/de...,"2543 RP Den Haag (Zijden, Steden en Zichten)",425,59,3,Shell,Description\nLovely furnished apartment with p...,"{'street': 'Stadhoudersplantsoen', 'city': 'De...",Rented under option,7.20339,141.666667,19.666667


In [8]:
# Rooms are tiny
df[df['Area per room'] < tiny_room_threshold]

Unnamed: 0,title,link,location,price,area,rooms,furnished,description,address,rented,Price per area,Price per room,Area per room
9,Flat Brouwersgracht,https://www.pararius.com/apartment-for-rent/de...,2512 ER Den Haag (Zuidwal),1381,40,3,Part-furnished,Description\nThis upholstered three bedroom ap...,"{'street': 'Schoolstraat', 'city': 'Den Haag',...",,34.525,460.333333,13.333333
53,Flat Brouwersgracht,https://www.pararius.com/apartment-for-rent/de...,2512 ER Den Haag (Zuidwal),1281,30,3,Furnished,Description\nPlease note: the displayed photos...,"{'street': 'Brouwersgracht', 'city': 'Den Haag...",,42.7,427.0,10.0


In [9]:
# Needs a Permit
df[df["description"].str.contains('permit', case=False)]

Unnamed: 0,title,link,location,price,area,rooms,furnished,description,address,rented,Price per area,Price per room,Area per room
0,Flat Bezuidenhoutseweg 365,https://www.pararius.com/apartment-for-rent/de...,2594 AR Den Haag (Bezuidenhout-Oost),1500,82,4,Part-furnished,Description\nFREDERIK HENDRIKLAAN 225-I THE HA...,"{'street': 'Frederik Hendriklaan 225 I', 'city...",,18.292683,375.0,20.5
3,Flat Joan Maetsuyckerstraat,https://www.pararius.com/apartment-for-rent/de...,2593 ZP Den Haag (Bezuidenhout-Oost),1375,82,3,Furnished,Description\nDelightful 3/4-room apartment on ...,"{'street': 'Bezuidenhoutseweg 365', 'city': 'D...",Rented under option,16.768293,458.333333,27.333333
5,Flat Korte Houtstraat,https://www.pararius.com/apartment-for-rent/de...,2511 DA Den Haag (Uilebomen),1395,67,3,Part-furnished,Description\nFully furnished apartment in Bezu...,"{'street': 'Joan Maetsuyckerstraat', 'city': '...",,20.820896,465.0,22.333333
7,Flat Schoolstraat,https://www.pararius.com/apartment-for-rent/de...,2511 AX Den Haag (Kortenbos),1700,90,4,Part-furnished,Description\nSituated in a very populair neigh...,"{'street': 'Berberisstraat', 'city': 'Den Haag...",,18.888889,425.0,22.5
10,Flat Copernicusstraat,https://www.pararius.com/apartment-for-rent/de...,2561 VT Den Haag (Valkenboskwartier),1450,60,3,Furnished,Description\nWell-maintained 3-room apartment ...,"{'street': 'Vrouw Avenweg 12 C', 'city': 'Den ...",Under option,24.166667,483.333333,20.0
15,Flat Parkweg,https://www.pararius.com/apartment-for-rent/de...,2585 JK Den Haag (Van Stolkpark en Schevenings...,1695,89,3,Part-furnished,"Description\nKORTE HOUTSTRAAT, CENTRE, THE HAG...","{'street': 'Korte Houtstraat', 'city': 'Den Ha...",Rented under option,19.044944,565.0,29.666667
16,Flat Stadhoudersplantsoen,https://www.pararius.com/apartment-for-rent/de...,2517 JL Den Haag (Zorgvliet),1695,67,3,Furnished,Description\nSpacious and Bright 2-Bedroom Apa...,"{'street': '3e Eeldepad 40', 'city': 'Den Haag...",Under option,25.298507,565.0,22.333333
22,Flat 3e Eeldepad 40,https://www.pararius.com/apartment-for-rent/de...,2541 JJ Den Haag (Morgenstond-Zuid),1350,59,3,,Description\n**English text below\nGreat 3-roo...,"{'street': 'Frederik Hendriklaan 66 B', 'city'...",,22.881356,450.0,19.666667
25,Flat Weissenbruchstraat,https://www.pararius.com/apartment-for-rent/de...,2596 GC Den Haag (Nassaubuurt),1395,65,3,Furnished,Description\n**English text below\nAre you int...,"{'street': 'Okkernootstraat 109', 'city': 'Den...",,21.461538,465.0,21.666667
26,Flat Nicolaïstraat 80 A,https://www.pararius.com/apartment-for-rent/de...,2517 TD Den Haag (Stadhoudersplantsoen),1395,70,3,Furnished,Description\nSuper nice house available locate...,"{'street': 'Abrikozenstraat 35', 'city': 'Den ...",,19.928571,465.0,23.333333


In [10]:
# Rented Under Option
df[~df["rented"].isnull()]

Unnamed: 0,title,link,location,price,area,rooms,furnished,description,address,rented,Price per area,Price per room,Area per room
3,Flat Joan Maetsuyckerstraat,https://www.pararius.com/apartment-for-rent/de...,2593 ZP Den Haag (Bezuidenhout-Oost),1375,82,3,Furnished,Description\nDelightful 3/4-room apartment on ...,"{'street': 'Bezuidenhoutseweg 365', 'city': 'D...",Rented under option,16.768293,458.333333,27.333333
10,Flat Copernicusstraat,https://www.pararius.com/apartment-for-rent/de...,2561 VT Den Haag (Valkenboskwartier),1450,60,3,Furnished,Description\nWell-maintained 3-room apartment ...,"{'street': 'Vrouw Avenweg 12 C', 'city': 'Den ...",Under option,24.166667,483.333333,20.0
15,Flat Parkweg,https://www.pararius.com/apartment-for-rent/de...,2585 JK Den Haag (Van Stolkpark en Schevenings...,1695,89,3,Part-furnished,"Description\nKORTE HOUTSTRAAT, CENTRE, THE HAG...","{'street': 'Korte Houtstraat', 'city': 'Den Ha...",Rented under option,19.044944,565.0,29.666667
16,Flat Stadhoudersplantsoen,https://www.pararius.com/apartment-for-rent/de...,2517 JL Den Haag (Zorgvliet),1695,67,3,Furnished,Description\nSpacious and Bright 2-Bedroom Apa...,"{'street': '3e Eeldepad 40', 'city': 'Den Haag...",Under option,25.298507,565.0,22.333333
20,Flat Noordwal,https://www.pararius.com/apartment-for-rent/de...,2513 DS Den Haag (Kortenbos),1100,80,3,Furnished,Description\nBeautiful apartment in 's-Gravenh...,"{'street': 'Stuyvesantplein', 'city': 'Den Haa...",Rented under option,13.75,366.666667,26.666667
21,Flat Vrouw Avenweg 12 C,https://www.pararius.com/apartment-for-rent/de...,2493 WM Den Haag (De Lanen),1500,77,3,Part-furnished,Description\nFOR RENT: This beautiful and brig...,"{'street': 'Van Alkemadelaan 966', 'city': 'De...",Rented under option,19.480519,500.0,25.666667
30,Flat Ruimzicht,https://www.pararius.com/apartment-for-rent/de...,"2543 RP Den Haag (Zijden, Steden en Zichten)",425,59,3,Shell,Description\nLovely furnished apartment with p...,"{'street': 'Stadhoudersplantsoen', 'city': 'De...",Rented under option,7.20339,141.666667,19.666667
33,House Larensestraat,https://www.pararius.com/house-for-rent/den-ha...,2574 VL Den Haag (Rustenburg),1050,70,3,,"Description\nLARENSESTRAAT, RUSTENBURG, € 1050...","{'street': 'Larensestraat', 'city': 'Den Haag'...",Rented under option,15.0,350.0,23.333333
35,Flat De Savornin Lohmanlaan 29,https://www.pararius.com/apartment-for-rent/de...,2566 AH Den Haag (Bosjes van Pex),1495,93,4,Part-furnished,Description\nThis spacious and bright three-be...,"{'street': 'Groot Hertoginnelaan 173 A', 'city...",Rented under option,16.075269,373.75,23.25
43,Flat Altingstraat 140,https://www.pararius.com/apartment-for-rent/de...,2593 TA Den Haag (Bezuidenhout-Oost),1450,75,3,Furnished,Description\nLovely furnished 2 bedroom corner...,"{'street': 'Harstenhoekweg', 'city': 'Den Haag...",Rented under option,19.333333,483.333333,25.0


In [11]:
df = df[df['Price per area'] > tgtbt_area_price_threshold]
df = df[df['Area per room'] > tiny_room_threshold]
df = df[~df["description"].str.contains('permit', case=False)]
df = df[df["rented"].isnull()]

print(f"{df.shape[0]}/{initial_rows} rows eligible")

32/86 rows eligible


In [12]:

proceeed = str(input("Are you sure you want to filter by already looked at?(y/n): "))

if proceeed == "y":
  output_file_name = "output.dat"

  array = []

  if os.path.isfile(output_file_name):
    with open(output_file_name, "r") as f:
      array = f.read()[1:-1].replace("'","").strip().split(', ')
      df2 = df[~df["title"].isin(array)]
      print(f"{df2.shape[0]} new Properties!")
    
  with open(output_file_name, "w") as f:
    array += list(df2["title"])
    f.write(str(array))

  df2.head()
else:
  df2 = df

In [None]:
links = list(df2.link)
for link in links:
  webbrowser.open_new_tab(link)