# Huurhuis Webscraper Tests

Dit is een testfile die alle tests voor de verschillende componenten bevat.

## Imports:

In [1]:
import logging
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import date, datetime
from typing import Any, Dict, List, Optional, Union
from urllib.parse import urljoin

import psycopg2
import requests
from bs4 import BeautifulSoup
from psycopg2 import sql

# Import configuration
from config import DATABASE, EMAIL
# Import data access
from data_access import BrokerAgency, DataAccess, Property
# Import logging service
from log_service import LogService, get_logger
# Import mail service
from mail_service import MailService

2025-05-09 12:02:10,816 - [MainThread] - root - INFO - ===== HuurhuisWebscraper Started =====
2025-05-09 12:02:10,817 - [MainThread] - root - INFO - Start time: 2025-05-09 12:02:10
2025-05-09 12:02:10,817 - [MainThread] - root - INFO - Start time: 2025-05-09 12:02:10


## Logging Service Tests

In [None]:
# Initialize the logging service
logger = get_logger("HuurhuisWebscraper")

# We can use this logger throughout the notebook
# The LogService class manages writing to both console and file


def test_logging():
    """Test the logging service and create a log file."""
    log_service = LogService()

    # Log application start
    log_service.log_app_start()

    # Log some test messages
    logger.info("This is a test info message")
    logger.warning("This is a test warning message")
    logger.error("This is a test error message")

    # Log broker processing
    log_service.log_broker_processing("Test Broker", 5, 2)

    # Log email status
    log_service.log_email_sent(True, ["test@example.com"])

    # Log application end
    log_service.log_app_end(5, 2)

    print("Check the logs directory for the log file")


# Uncomment to test the logging service
test_logging()

## Data access Tests

In [None]:
# Example of using the DataAccess class
def test_data_access():
    db = DataAccess()
    logger = get_logger("DataAccess")

    # Test connection
    try:
        with db.get_connection() as conn:
            with conn.cursor() as cursor:
                cursor.execute("SELECT 1")
                result = cursor.fetchone()
                logger.info(f"Database connection successful: {result}")
    except Exception as e:
        logger.error(f"Database connection failed: {e}")


# Uncomment to test
test_data_access()

## Mail Service Tests

In [None]:
# Example of using the MailService
def test_mail_service():
    mail = MailService()
    test_properties = [
        {
            "adres": "Teststraat 123",
            "naam_dorp_stad": "Amsterdam",
            "huurprijs": "€ 1.500 per maand",
            "oppervlakte": "80m²",
            "link": "https://example.com/property/1",
        }
    ]

    log_service = LogService()
    success = mail.send_new_properties_email(nieuwe_properties=test_properties)
    log_service.log_email_sent(success, EMAIL["recipients"])


# Uncomment to test
test_mail_service()

## Webscraper Tests

In [None]:
# Import the scrapers from the separate modules
from scrapers.base_scraper import BaseScraper
from scrapers.pararius_scraper import ParariusScraper
from scrapers.scraper_factory import ScraperFactory
from scrapers.vdbunt_scraper import VdBuntScraper


# Example usage
def test_scraper(website: str):
    """Test a scraper by retrieving properties and printing them.

    Args:
        website: Name of the website to scrape
    """
    logger = get_logger("ScraperTest")
    try:
        scraper = ScraperFactory.get_scraper(website)
        logger.info(f"Testing {website} scraper...")

        # Get the first page with properties
        listings = scraper.get_property_listings()
        logger.info(f"Found: {len(listings)} properties")

        # Show the first 3 properties
        for i, listing in enumerate(listings[:3]):
            logger.info(f"Property {i+1}: {listing.get('adres')}")
            logger.debug(f"Full details: {listing}")

            # Get details for this property if we have a link
            if listing["link"]:
                logger.info("Getting detailed information...")
                details = scraper.get_property_details(listing["link"])
                logger.debug(f"Details: {details}")

        return listings
    except Exception as e:
        logger.error(f"Error testing {website} scraper: {e}")
        return []


# Test with different scrapers
vdbunt_listings = test_scraper("vdbunt")

## Zonnenberg Scraper Tests

In [None]:
# Test the Zonnenberg scraper
import requests
from bs4 import BeautifulSoup

from scrapers.zonnenberg_scraper import ZonnenbergScraper


def test_zonnenberg_scraper():
    """Test the Zonnenberg scraper implementation."""
    logger = get_logger("ZonnenbergTest")
    scraper = ZonnenbergScraper()
    logger.info("Testing Zonnenberg scraper...")

    # Try to get property listings
    listings = scraper.get_property_listings()
    logger.info(f"Found: {len(listings)} properties")

    # Show the first 3 properties (if any)
    for i, listing in enumerate(listings[:3]):
        logger.info(f"Property {i+1}: {listing.get('adres')}")
        logger.info(f"  - Plaats: {listing.get('naam_dorp_stad')}")
        logger.info(f"  - Prijs: {listing.get('huurprijs')}")
        logger.info(f"  - Oppervlakte: {listing.get('oppervlakte')}")
        logger.info(f"  - Link: {listing.get('link')}")

    return listings


# Uncomment to test
zonnenberg_listings = test_zonnenberg_scraper()

## Pararius Scraper Tests

In [None]:
# Test the Pararius scraper and fix the postal code issue
import re

from scrapers.pararius_scraper import ParariusScraper


def test_pararius_scraper():
    """Test the Pararius scraper implementation and fix the postal code issue."""
    logger = get_logger("ParariusTest")

    # Create a modified version of the ParariusScraper
    class EnhancedParariusScraper(ParariusScraper):
        def clean_city_name(self, text):
            """Remove postal code from city name and clean the text."""
            # First apply the regular clean_text method
            text = self.clean_text(text)

            # Remove postal code pattern (e.g. '3512 AG ' of '1234 AB ')
            clean_text = re.sub(r"\d{4}\s*[A-Z]{2}\s*", "", text)

            # Extract just the city name (before any parentheses)
            city_match = re.match(r"([^(]+)", clean_text)
            if city_match:
                return city_match.group(1).strip()

            return clean_text

        def get_property_listings(self, page_num=1):
            """Override to implement the postal code removal."""
            properties = super().get_property_listings(page_num)

            # Process each property to clean the city name
            for prop in properties:
                if prop["naam_dorp_stad"] != "N/A":
                    prop["naam_dorp_stad"] = self.clean_city_name(
                        prop["naam_dorp_stad"]
                    )

            return properties

    # Test with our enhanced scraper
    scraper = EnhancedParariusScraper()
    logger.info("Testing Pararius scraper with postal code removal...")

    # Test the city name cleaning function with sample data
    test_cases = [
        "3512 AG Utrecht (Domplein, Neude, Janskerkhof)",
        "1234AB Amsterdam (Centrum)",
        "9876 ZX Groningen",
        "Utrecht (Binnenstad)",
    ]

    for test in test_cases:
        cleaned = scraper.clean_city_name(test)
        logger.info(f"Original: '{test}' -> Cleaned: '{cleaned}'")

    # Try to get property listings
    logger.info("Fetching actual property listings...")
    listings = scraper.get_property_listings()
    logger.info(f"Found: {len(listings)} properties")

    # Show the first 3 properties (if any)
    for i, listing in enumerate(listings[:3]):
        logger.info(f"Property {i+1}: {listing.get('adres')}")
        logger.info(f"  - Plaats: {listing.get('naam_dorp_stad')}")
        logger.info(f"  - Prijs: {listing.get('huurprijs')}")
        logger.info(f"  - Oppervlakte: {listing.get('oppervlakte')}")
        logger.info(f"  - Link: {listing.get('link')}")

    return scraper


# Uncomment to run the test
enhanced_pararius_scraper = test_pararius_scraper()

## Ditters scraper test

In [None]:
from scrapers.ditters_scraper import DittersScraper


# Test the Ditters scraper
def test_ditters_scraper():
    logger = get_logger("DittersTest")
    scraper = DittersScraper()
    logger.info("Testing Ditters scraper...")

    # Get property listings
    listings = scraper.get_property_listings()
    logger.info(f"Found: {len(listings)} properties")

    # Show the first 3 properties
    for i, listing in enumerate(listings[:3]):
        logger.info(f"Property {i+1}: {listing.get('adres')}")
        logger.info(f"  - Plaats: {listing.get('naam_dorp_stad')}")
        logger.info(f"  - Prijs: {listing.get('huurprijs')}")
        logger.info(f"  - Oppervlakte: {listing.get('oppervlakte')}")
        logger.info(f"  - Link: {listing.get('link')}")

    return listings


# Run the test
ditters_listings = test_ditters_scraper()

## InterHouse scraper test

In [2]:
from scrapers.interhouse_scraper import InterHouseScraper


# Test the InterHouse scraper
def test_interhouse_scraper():
    logger = get_logger("InterHouseTest")
    scraper = InterHouseScraper()
    logger.info("Testing InterHouse scraper...")
    
    # Check if Playwright is available and warn if not
    try:
        from playwright.sync_api import sync_playwright
        logger.info("Playwright is installed and will be used for JavaScript rendering")
    except ImportError:
        logger.warning(
            "Playwright is not installed. Run 'pip install playwright && playwright install' for better results."
            "Falling back to basic requests which may result in incomplete data."
        )

    # Get property listings
    listings = scraper.get_property_listings()
    logger.info(f"Found: {len(listings)} properties")

    # Show the first 3 properties
    for i, listing in enumerate(listings[:3]):
        logger.info(f"Property {i+1}: {listing.get('adres')}")
        logger.info(f"  - Plaats: {listing.get('naam_dorp_stad')}")
        logger.info(f"  - Prijs: {listing.get('huurprijs')}")
        logger.info(f"  - Oppervlakte: {listing.get('oppervlakte')}")
        logger.info(f"  - Link: {listing.get('link')}")

    return listings


# Uncomment to run the test
interhouse_listings = test_interhouse_scraper()

2025-05-09 12:02:15,591 - [MainThread] - InterHouseTest - INFO - Testing InterHouse scraper...
2025-05-09 12:02:15,665 - [MainThread] - InterHouseTest - INFO - Playwright is installed and will be used for JavaScript rendering
2025-05-09 12:02:15,666 - [MainThread] - InterHouseScraper - INFO - Retrieving page with Selenium: https://interhouse.nl/huurwoningen/?location_id=Utrecht_Algemeen&number_of_results=20&sort=date-desc&display=list
2025-05-09 12:02:15,665 - [MainThread] - InterHouseTest - INFO - Playwright is installed and will be used for JavaScript rendering
2025-05-09 12:02:15,666 - [MainThread] - InterHouseScraper - INFO - Retrieving page with Selenium: https://interhouse.nl/huurwoningen/?location_id=Utrecht_Algemeen&number_of_results=20&sort=date-desc&display=list
2025-05-09 12:02:16,471 - [MainThread] - WDM - INFO - Get LATEST chromedriver version for google-chrome
2025-05-09 12:02:16,471 - [MainThread] - WDM - INFO - Get LATEST chromedriver version for google-chrome
2025-05-0