In [6]:
# Import packages

import requests
import json
import pandas as pd

from sqlalchemy import create_engine, inspect, MetaData, Table, insert
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError

In [2]:
# Retrieve keys

from api_keys import idealista_api_key

In [3]:
# Configurations

## Static data

locationId_list = ["0-EU-ES-29-07-001-051-04", "0-EU-ES-29-02-001-067-02", 
                    "0-EU-ES-29-03-003-069", "0-EU-ES-29-08-003-054", 
                    "0-EU-ES-11-01-001-012"]
locationName_list = ["Estepona Pueblo, Estepona", "Este, Málaga", 
                    "Marbella, Málaga", "Fuengirola, Málaga", 
                    "Cádiz, Cádiz"]

## API information

url_property_list = "https://idealista2.p.rapidapi.com/properties/list"
url_property_detail_list = "https://idealista2.p.rapidapi.com/properties/detail"

headers = {
	"X-RapidAPI-Key": idealista_api_key,
	"X-RapidAPI-Host": "idealista2.p.rapidapi.com"
}

In [15]:
# Clean Database
## Connect to db

engine = create_engine("sqlite:///../Dataset/real_estate_data.sqlite")
metadata = MetaData()
metadata.reflect(bind = engine)

property_rent_table = metadata.tables["rawPropertyRentData"]
property_purchase_table = metadata.tables["rawPropertyPurchaseData"]

## Clear db

Session = sessionmaker(bind=engine)
session = Session()
session.execute(property_rent_table.delete())
session.commit()
session.execute(property_purchase_table.delete())
session.commit()
session.close()

OperationalError: (sqlite3.OperationalError) database is locked
[SQL: DELETE FROM "rawPropertyRentData"]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [5]:
# Retrieve rent property information

## Open connection with DB

Session = sessionmaker(bind=engine)
session = Session()


## Retrieve rent data
### Loop through locations:

for location in range(len(locationId_list)):

    locationId = locationId_list[location]
    locationName = locationName_list[location]

    # Find number of pages

    querystring = {"locationId":locationId,
                    "locationName":locationName,
                    "operation":"rent",
                    "numPage":"1",
                    "maxItems":"500",
                    "sort":"asc",
                    "locale":"en",
                    "country":"es"}

    response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

    if response_property_list.status_code == 200:
        json_data = response_property_list.json()
    else:
        print(f"Request failed with status code: {response_property_list.status_code}")

    number_of_pages = json_data["totalPages"]


    # Loop through rent property information

    for page in range(1, number_of_pages + 1):

        # Retrieve data from page

        querystring = {"locationId":locationId,
                        "locationName":locationName,
                        "operation":"rent",
                        "numPage":str(page),
                        "maxItems":"500",
                        "sort":"asc",
                        "locale":"en",
                        "country":"es",
                        "sinceDate":"Last month"}
        
        response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

        if response_property_list.status_code == 200:
            json_data = response_property_list.json()
        else:
            print(f"Request failed with status code: {response_property_list.status_code}")
        
        # Save property information

        for property in range(len(json_data["elementList"])):
            property_code = json_data["elementList"][property]["propertyCode"]
            searchType = "Rent"
            propertyType = json_data["elementList"][property]["propertyType"]
            address = json_data["elementList"][property]["address"]
            address = address[:100]
            municipality = json_data["elementList"][property]["municipality"]
            country = json_data["elementList"][property]["country"]
            latitude = json_data["elementList"][property]["latitude"]
            longitude = json_data["elementList"][property]["longitude"]
            bathrooms = json_data["elementList"][property]["bathrooms"]
            bedrooms = json_data["elementList"][property]["rooms"]
            size = json_data["elementList"][property]["size"]
            status = json_data["elementList"][property]["status"]
                
            try:
                floor = json_data["elementList"][property]["floor"]
            except:
                floor = "NA"

            try:
                hasLift = json_data["elementList"][property]["hasLift"]
            except:
                hasLift = False
                
            newDevelopment = json_data["elementList"][property]["newDevelopment"]
            newProperty = json_data["elementList"][property]["newProperty"]
            monthlyRent = json_data["elementList"][property]["price"]
            url = json_data["elementList"][property]["url"]

            try:
                image = json_data["elementList"][property]["thumbnail"]
            except:
                image = "../static/pictures/no_property_image.jpg"
                
            epc = "N/A"

            # Write to DB

            try:
                new_property_data = {
                    "propertyCode": property_code,
                    "searchType": searchType,
                    "propertyType": propertyType,
                    "address": address,
                    "municipality": municipality,
                    "country": country,
                    "latitude": latitude,
                    "longitude": longitude,
                    "bathrooms": bathrooms,
                    "bedrooms": bedrooms,
                    "size": size,
                    "status": status,
                    "floor": floor,
                    "hasLift": hasLift,
                    "newDevelopment": newDevelopment,
                    "newProperty": newProperty,
                    "epc": epc,
                    "monthlyRent": monthlyRent,
                    "url": url,
                    "image": image
                }
                insert_query = insert(property_rent_table).values(**new_property_data)
                session.execute(insert_query)
                session.commit()
            except IntegrityError as e:
                pass

## Close connection with DB

session.close()       

IntegrityError: (sqlite3.IntegrityError) UNIQUE constraint failed: rawPropertyRentData.propertyCode
[SQL: INSERT INTO "rawPropertyRentData" ("propertyCode", "searchType", "propertyType", address, municipality, country, latitude, longitude, bathrooms, bedrooms, size, status, floor, "hasLift", "newDevelopment", "newProperty", epc, "monthlyRent", url, image) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)]
[parameters: (103521506.0, 'Rent', 'flat', 'subdistrict Guadalmina Alta', 'Marbella', 'es', 36.4759447, -5.0069744, 2.0, 2.0, 220.0, 'good', '1', 1, 0, 0, 'N/A', 1400.0, 'https://www.idealista.com/inmueble/103521506/', 'https://img3.idealista.com/blur/WEB_LISTING-M/0/id.pro.es.image.master/a4/df/8f/1192141858.jpg')]
(Background on this error at: https://sqlalche.me/e/20/gkpj)

In [16]:
# Retrieve sale property information
## Open connection with DB

Session = sessionmaker(bind=engine)
session = Session()


## Retrieve sale data
### Loop through locations:

for location in range(len(locationId_list)):

    locationId = locationId_list[location]
    locationName = locationName_list[location]

    # Find number of pages

    querystring = {"locationId":locationId,
                    "locationName":locationName,
                    "operation":"sale",
                    "numPage":"1",
                    "maxItems":"500",
                    "sort":"asc",
                    "locale":"en",
                    "country":"es",
                    "sinceDate":"Last month",
                    "maxPrice":"250000",
                    "minPrice": "50000"}

    response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

    if response_property_list.status_code == 200:
        json_data = response_property_list.json()
    else:
        print(f"Request failed with status code: {response_property_list.status_code}")

    number_of_pages = json_data["totalPages"]


    # Loop through sale property information

    for page in range(1, number_of_pages + 1):

        # Retrieve data from page

        querystring = {"locationId":locationId,
                        "locationName":locationName,
                        "operation":"sale",
                        "numPage":str(page),
                        "maxItems":"500",
                        "sort":"asc",
                        "locale":"en",
                        "country":"es"}
        
        response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

        if response_property_list.status_code == 200:
            json_data = response_property_list.json()
        else:
            print(f"Request failed with status code: {response_property_list.status_code}")
        
        # Save property information
       
        for property in range(len(json_data["elementList"])):

            property_code = json_data["elementList"][property]["propertyCode"]
            searchType = "Purchase"
            propertyType = json_data["elementList"][property]["propertyType"]
                
            try:
                description = json_data["elementList"][property]["description"]
                address = address[:5000]
            except:
                description = "NA"
                    
            address = json_data["elementList"][property]["address"]
            address = address[:100]
            municipality = json_data["elementList"][property]["municipality"]
            country = json_data["elementList"][property]["country"]
            latitude = json_data["elementList"][property]["latitude"]
            longitude = json_data["elementList"][property]["longitude"]
            bathrooms = json_data["elementList"][property]["bathrooms"]
            rooms = json_data["elementList"][property]["rooms"]
            size = json_data["elementList"][property]["size"]
            status = json_data["elementList"][property]["status"]
                
            try:
                floor = json_data["elementList"][property]["floor"]
            except:
                floor = "NA"

            try:
                hasLift = json_data["elementList"][property]["hasLift"]
            except:
                hasLift = False
                
            newDevelopment = json_data["elementList"][property]["newDevelopment"]
            newProperty = json_data["elementList"][property]["newProperty"]
            epc = "N/A"
            purchasePrice = json_data["elementList"][property]["price"]
            url = json_data["elementList"][property]["url"]
            
            try:
                image = json_data["elementList"][property]["thumbnail"]
            except:
                image = "../static/pictures/no_property_image.jpg"
            
            
            # Write to DB

            try:
                new_property_data = {
                    "propertyCode": property_code,
                    "searchType": searchType,
                    "propertyType": propertyType,
                    "address": address,
                    "municipality": municipality,
                    "country": country,
                    "latitude": latitude,
                    "longitude": longitude,
                    "bathrooms": bathrooms,
                    "bedrooms": bedrooms,
                    "size": size,
                    "status": status,
                    "floor": floor,
                    "hasLift": hasLift,
                    "newDevelopment": newDevelopment,
                    "newProperty": newProperty,
                    "epc": epc,
                    "purchasePrice": purchasePrice,
                    "url": url,
                    "image": image
                }
                insert_query = insert(property_rent_table).values(**new_property_data)
                session.execute(insert_query)
                session.commit()
            except IntegrityError as e:
                pass


## Close connection with DB

session.close()  

In [17]:
# Clean data in DB

table_names = ["rawPropertyRentData", "rawPropertyPurchaseData"]
update_db_df = pd.DataFrame({"Original Value": ["bj", "st", "en", "ss"],
                             "Updated Value": ["0", "-1", "1.5", "-1"]})


## Open connection with DB

Session = sessionmaker(bind=engine)
session = Session()


## Loop through values

for table_name in table_names:
    try:
        # Replace values in the 'Floor' column

        for index, row in update_db_df.iterrows():
            original_value = row["Original Value"]
            updated_value = row["Updated Value"]

            update_query = f"UPDATE {table_name} SET Floor = {updated_value} WHERE Floor = '{original_value}'"
            session.execute(update_query, {"updated_value": updated_value, "original_value": original_value})
            session.commit()

    except Exception as e:
        print(f"Error in {table_name}:", e)


## Close connection with DB

session.close()  