In [4]:
### This notebook will allow you to find pictures of properties that have been listed before but are not listed on Funda anymore. 
### or just to scrape pictures of houses that are currently being offered. 

### For old homes there are two options. 
### Option 1: try to find the address on https://www.miljoenhuizen.nl/ and open the thumbnail image in a new tab.
### This should provide you with a funda link to the picture. Paste this link in 'starting_url' to scrape the following pictures
### quite often multiple houses are uploaded at the same time making the pictures not increase sequential.
### Option 2: look if the house has a thumbnail on https://walterliving.com/. The thumbnail is available in multiple pages on some you might need to
### retrieve the url through 'inspect element' on some you can open the image on a new tab. Paste this link in 'starting_url' to scrape the pictures

### When a new house is offered make sure to try this the same day as both options will scrape the new pictures and refresh their site 
### with the new pictures and thus losing the link to the past in the process. 

In [1]:
### Funda or funda link through https://www.miljoenhuizen.nl/
import os
import requests
from urllib.parse import urljoin
import re

# Change this to your preferred directory
target_directory = "C:\\Users\\xxx\\FUNDA foto's"
#Paste the link to the picture below
starting_url = "https://cloud.funda.nl/valentina_media/198/771/242_720x480.jpg"
number_of_pictures = 80

# Ensure the target directory exists
os.makedirs(target_directory, exist_ok=True)

# Function to generate base URL by trimming to the last '/'
def get_base_url(input_url):
    base_path = input_url.rsplit('/', 1)[0] + '/'
    return base_path

# Function to extract the starting number from the URL
def get_start_end_numbers(input_url):
    match = re.search(r'/(\d{3})_(1440|groot|720x480|klein)\.jpg$', input_url)
    if match:
        start_num = int(match.group(1))
        end_num = start_num + number_of_pictures
        return start_num, end_num
    else:
        raise ValueError("Invalid URL format. Expected format like .../841_1440.jpg or .../841_groot.jpg")

# Main download function
def download_images(input_url):
    # Generate the base URL
    base_url = get_base_url(input_url)
    
    # Extract start and end numbers
    start_num, end_num = get_start_end_numbers(input_url)
    
    # Loop over the range of image numbers
    for i in range(start_num, end_num + 1):
        # Try to download with _1440.jpg first
        image_url = urljoin(base_url, f"{i:03d}_1440.jpg")
        local_path = os.path.join(target_directory, f"{i:03d}_1440.jpg")
        
        try:
            # Attempt to fetch the image
            response = requests.get(image_url, stream=True)
            response.raise_for_status()
            
            # Save the image if successful
            with open(local_path, 'wb') as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
                    
            print(f"Successfully downloaded {image_url}")
        except requests.exceptions.HTTPError as err:
            # If a 400/403 error occurs, try the _groot.jpg version
            if err.response.status_code in [400, 403]:
                image_url = urljoin(base_url, f"{i:03d}_groot.jpg")
                local_path = os.path.join(target_directory, f"{i:03d}_groot.jpg")
                
                try:
                    response = requests.get(image_url, stream=True)
                    response.raise_for_status()
                    
                    # Save the image if successful
                    with open(local_path, 'wb') as file:
                        for chunk in response.iter_content(1024):
                            file.write(chunk)
                            
                    print(f"Successfully downloaded {image_url}")
                except requests.exceptions.HTTPError as err:
                    print(f"No image for {i}")
            else:
                print(f"No image for {i}")
        except Exception as err:
            print(f"No image for {i}")

download_images(starting_url)


PermissionError: [WinError 5] Access is denied: 'C:\\Users\\xxx'

In [None]:
#Walterliving google storage link
import os
import requests
from urllib.parse import urljoin
import re

# Change this to your preferred directory
target_directory = "C:\\Users\\xxx\\FUNDA foto's"
#Paste the link to the picture below
starting_url = "https://storage.googleapis.com/walter-companies-property-photos/fce06e4b-fe6d-45d1-bd4f-7bca3414f11c/original/1.jpg"
number_of_pictures = 80

# Ensure the target directory exists
os.makedirs(target_directory, exist_ok=True)

# Function to modify URL path to ensure 'original' is the second-to-last part
def modify_url_path(input_url):
    parts = input_url.rsplit('/', 2)
    if len(parts) >= 2:
        parts[-2] = "original"
    return '/'.join(parts)

# Function to generate base URL by trimming to the last '/'
def get_base_url(modified_url):
    base_path = modified_url.rsplit('/', 1)[0] + '/'
    return base_path

# Function to extract the starting number from the URL
def get_start_end_numbers(modified_url):
    match = re.search(r'/(\d+)\.jpg$', modified_url)
    if match:
        start_num = int(match.group(1))
        end_num = start_num + number_of_pictures
        return start_num, end_num
    else:
        raise ValueError("Invalid URL format. Expected format like .../123.jpg")

# Main download function
def download_images(input_url):
    # Modify the URL to have 'original' in the correct place
    modified_url = modify_url_path(input_url)
    
    # Generate the base URL
    base_url = get_base_url(modified_url)
    
    # Extract start and end numbers
    start_num, end_num = get_start_end_numbers(modified_url)
    
    # Loop over the range of image numbers
    for i in range(start_num, end_num + 1):
        # Construct the image URL with current number
        image_url = urljoin(base_url, f"{i}.jpg")
        local_path = os.path.join(target_directory, f"{i}.jpg")
        
        try:
            # Attempt to fetch the image
            response = requests.get(image_url, stream=True)
            response.raise_for_status()
            
            # Save the image if successful
            with open(local_path, 'wb') as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
                    
            print(f"Successfully downloaded {image_url}")
        except requests.exceptions.HTTPError as err:
            print(f"HTTP error occurred for {image_url}: {err}")
        except Exception as err:
            print(f"An error occurred for {image_url}: {err}")

# Example usage with a starting URL

download_images(starting_url)
