In [1]:
import base64
import requests
import json
from datetime import datetime
import numpy as np

from api_access import API_KEY, SECRET

In [2]:
def get_oauth_token():
    """
    This function retrieves the OAuth token from the Idealista API.
    It uses the API_KEY and SECRET to authenticate and obtain the token.
    """
    # Encode the API_KEY and SECRET
    message = API_KEY + ":" + SECRET

    # Base64 encode the message
    auth = base64.b64encode(message.encode("ascii")).decode("ascii")

    # Set up headers for the request
    headers_dic = {'Authorization': 'Basic ' + auth,
                   'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
}

    # Set up parameters for the request
    params_dic = {'grant_type': 'client_credentials',
                  'scope': 'read'}

    # Make the request to get the token
    request_call = requests.post(
        "https://api.idealista.com/oauth/token",
        headers=headers_dic,
        data=params_dic
    )

    # Parse the response to get the token
    token = json.loads(request_call.text)['access_token']
    
    return token

In [3]:
# Search parameters filtering

BASE_URL = "https://api.idealista.com/3.5/es" # Base URL to search in Spain
# CENTER = '36.7213,-4.4214' # Malaga Center coordinates (latitude, longitude) for the search
# DISTANCE = 9000.0 # Search radius in meters 
LOCATION = "0-EU-ES-29"
MAX_ITEMS = 50 # Maximum number of items to retrieve in one request 50
#since_date = 'M' # W:last week, M: last month, T:last day (for rent except rooms), Y: last 2 days (sale and rooms)
SORT = 'desc'
#operation =  Operation type: sale or rent
#property_type = Property type: homes, garages, offices, land, new_development


In [4]:
# Function to get the url for the search request

def get_search_url(operation: str, property_type: str) -> str:
    url = (
        f"{BASE_URL}/search?"
        f"operation={operation}&"
        f"maxItems={MAX_ITEMS}&"
        f"locationId={LOCATION}&"
        f"propertyType={property_type}&"
        f"sort={SORT}&"
        f"language=es&"
        f"numPage=%s"
    )
    return url


In [5]:
def get_data_from_api(url, pagination):
    """
    This function retrieves data from the Idealista API.
    It uses the provided URL to make a request to the API and get the data.
    """
    # Get the OAuth token
    token = get_oauth_token()

    # Set up headers for the request
    headers_dic = {'Content-Type': 'application/x-www-form-urlencoded',
                   'Authorization': 'Bearer ' + token,
                   }

    # Make the request to get the data
    request_call = requests.post(url, headers=headers_dic)

    # Parse the response
    try:
        data = request_call.json()
    except json.JSONDecodeError:
        print("Error: la respuesta no es JSON válida.")
        print("Texto recibido:", request_call.text)
        return None

    # File name with timestamp
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"../data/extracted_data/idealista-data-{timestamp}-{pagination}.json"

    # Save as  JSON file
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

    print(f"JSON guardado en {filename}")

    return data

In [None]:
url_homes_sale = get_search_url("sale", "homes")
url = url_homes_sale % 1
results1 = get_data_from_api(url, 1)

url = url_homes_sale % 2
results2 = get_data_from_api(url, 2)

JSON guardado en ../data/extracted_data/idealista-data-2025-08-15_13-28-32-1.json
JSON guardado en ../data/extracted_data/idealista-data-2025-08-15_13-28-33-2.json


AttributeError: 'dict' object has no attribute 'concat'

In [12]:
results1["totalPages"]

751

In [11]:
print(len(results1['elementList']))
print(len(results2['elementList']))
print("Total elements:", len(results1['elementList']) + len(results2['elementList']))


50
50
Total elements: 100


In [29]:
#Homes for sale
url_homes_sale = get_search_url("sale", "homes")
for pagination in range(1, 31):
    url = url_homes_sale % (pagination)
    results = get_data_from_api(url, pagination)

Error: la respuesta no es JSON válida.
Texto recibido: 
Error: la respuesta no es JSON válida.
Texto recibido: 


KeyboardInterrupt: 

In [20]:
#Homes for rent
url_homes_rent = get_search_url("rent", "homes")
for pagination in range(1, 31):
    url = url_homes_rent % (pagination)
    results = get_data_from_api(url)

JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-29.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-29.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-30.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-30.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-31.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-31.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-31.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-32.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-32.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-33.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-33.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-34.json
JSON guardado en ../data/extracted_data/

In [21]:
#Offices for sale
url_offices_sale = get_search_url("sale", "offices")
for pagination in range(1, 6):
    url = url_offices_sale % (pagination)
    results = get_data_from_api(url)

JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-43.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-44.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-44.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-44.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-44.json


In [22]:
#Offices for rent
url_offices_rent = get_search_url("rent", "offices")
for pagination in range(1, 6):
    url = url_offices_rent % (pagination)
    results = get_data_from_api(url)

JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-45.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-46.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-47.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-47.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-47.json


In [23]:
#Premises for sale
url_premises_sale = get_search_url("sale", "premises")
for pagination in range(1, 9):
    url = url_premises_sale % (pagination)
    results = get_data_from_api(url)

JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-48.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-48.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-49.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-49.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-50.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-50.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-50.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-51.json


In [24]:
#Premises for rent
url_premises_rent = get_search_url("rent", "premises")
for pagination in range(1, 9):
    url = url_premises_rent % (pagination)
    results = get_data_from_api(url)

JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-51.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-51.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-52.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-52.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-53.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-53.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-53.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-54.json


In [25]:
#Garages for sale
url_garages_sale = get_search_url("sale", "garages")
for pagination in range(1, 8):
    url = url_garages_sale % (pagination)
    results = get_data_from_api(url)

JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-54.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-55.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-55.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-55.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-56.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-56.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-56.json


In [26]:
#Garages for rent
url_garages_rent = get_search_url("rent", "garages")
for pagination in range(1, 8):
    url = url_garages_rent % (pagination)
    results = get_data_from_api(url)

JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-57.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-57.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-57.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-57.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-58.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-58.json
JSON guardado en ../data/extracted_data/idealista-data-2025-07-01_19-41-58.json
