In [1]:
# Import packages

import requests
import json
import pandas as pd

from sqlalchemy import create_engine, inspect, MetaData, Table, insert
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker


In [2]:
# Retrieve API key

from api_keys import idealista_api_key

In [3]:
# Configure locations

## Static data

locationId_list = ["0-EU-ES-29-07-001-051-04", "0-EU-ES-29-02-001-067-02", 
                    "0-EU-ES-29-03-003-069", "0-EU-ES-29-08-003-054", 
                    "0-EU-ES-11-01-001-012"]
locationName_list = ["Estepona Pueblo, Estepona", "Este, Málaga", 
                    "Marbella, Málaga", "Fuengirola, Málaga", 
                    "Cádiz, Cádiz"]

url_property_list = "https://idealista2.p.rapidapi.com/properties/list"
url_property_detail_list = "https://idealista2.p.rapidapi.com/properties/detail"

headers = {
	"X-RapidAPI-Key": idealista_api_key,
	"X-RapidAPI-Host": "idealista2.p.rapidapi.com"
}

In [4]:
# Retrieve rent property information

## Create lists

searchType_list = []
propertyCode_list = []
propertyType_list = []
address_list = []
municipality_list = []
country_list = []
latitude_list = []
longitude_list = []
bathrooms_list = []
rooms_list = []
size_list = []
status_list = []
floor_list = []
hasLift_list = []
newDevelopment_list = []
newProperty_list = []
price_list = []
url_list = []
image_list = []
epc_list = []


## Retrieve rent data

### Loop through locations:

for location in range(len(locationId_list)):

    locationId = locationId_list[location]
    locationName = locationName_list[location]

    # Find number of pages

    querystring = {"locationId":locationId,
                    "locationName":locationName,
                    "operation":"rent",
                    "numPage":"1",
                    "maxItems":"500",
                    "sort":"asc",
                    "locale":"en",
                    "country":"es"}

    response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

    if response_property_list.status_code == 200:
        json_data = response_property_list.json()
    else:
        print(f"Request failed with status code: {response_property_list.status_code}")

    number_of_pages = json_data["totalPages"]


    # Loop through rent property information

    for page in range(1, number_of_pages + 1):

        # Retrieve data from page

        querystring = {"locationId":locationId,
                        "locationName":locationName,
                        "operation":"rent",
                        "numPage":str(page),
                        "maxItems":"500",
                        "sort":"asc",
                        "locale":"en",
                        "country":"es",
                        "sinceDate":"Last month"}
        
        response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

        if response_property_list.status_code == 200:
            json_data = response_property_list.json()
        else:
            print(f"Request failed with status code: {response_property_list.status_code}")
        
        # Save property information

        for property in range(len(json_data["elementList"])):
            searchType_list.append("Rent")
            property_code = json_data["elementList"][property]["propertyCode"]
            propertyCode_list.append(property_code)
            propertyType_list.append(json_data["elementList"][property]["propertyType"])
            address_list.append(json_data["elementList"][property]["address"])
            municipality_list.append(json_data["elementList"][property]["municipality"])
            country_list.append(json_data["elementList"][property]["country"])
            latitude_list.append(json_data["elementList"][property]["latitude"])
            longitude_list.append(json_data["elementList"][property]["longitude"])
            bathrooms_list.append(json_data["elementList"][property]["bathrooms"])
            rooms_list.append(json_data["elementList"][property]["rooms"])
            size_list.append(json_data["elementList"][property]["size"])
            status_list.append(json_data["elementList"][property]["status"])
            
            try:
                floor_list.append(json_data["elementList"][property]["floor"])
            except:
                floor_list.append("NA")

            try:
                hasLift_list.append(json_data["elementList"][property]["hasLift"])
            except:
                hasLift_list.append(False)
            
            newDevelopment_list.append(json_data["elementList"][property]["newDevelopment"])
            newProperty_list.append(json_data["elementList"][property]["newProperty"])
            price_list.append(json_data["elementList"][property]["price"])
            url_list.append(json_data["elementList"][property]["url"])


            ## Retrieve Property specific data

            image_list.append("../static/pictures/no_property_image.jpg")
            epc_list.append("N/A")


            ## CLEAN-UP 

            # querystring = {"propertyCode": property_code,"country":"es","language":"en"}
            # response_detail = requests.get(url_property_detail_list, headers=headers, params=querystring)
            # json_detail_data = response_detail.json()

            # try:
                # image_list.append(json_detail_data["multimedia"]["images"][0]["url"])
            # except:
                # image_list.append("../static/pictures/no_property_image.jpg")
            # try:
                # epc_list.append(json_detail_data["energyCertification"]["energyCertificationType"])
            # except:
                # epc_list.append("N/A")


## Create rent df

rent_property_df = pd.DataFrame({"Search Type": searchType_list,
                                "Property Code": propertyCode_list,
                                "Property Type": propertyType_list,
                                "Address": address_list,
                                "Municipality": municipality_list,
                                "Country": country_list,
                                "Latitude": latitude_list,
                                "Longitude": longitude_list,
                                "Bathrooms": bathrooms_list,
                                "Bedrooms": rooms_list,
                                "Size (m2)": size_list,
                                "Status": status_list,
                                "Floor": floor_list,
                                "Has Lift": hasLift_list,
                                "New Development": newDevelopment_list,
                                "New Property": newProperty_list,
                                "Pice (EUR month)": price_list,
                                "URL": url_list,
                                "Image": image_list,
                                "EPC": epc_list})

In [5]:
# Retrieve sale property information

## Create lists

searchType_list = []
propertyCode_list = []
propertyType_list = []
description_list = []
address_list = []
municipality_list = []
country_list = []
latitude_list = []
longitude_list = []
bathrooms_list = []
rooms_list = []
size_list = []
status_list = []
floor_list = []
hasLift_list = []
newDevelopment_list = []
newProperty_list = []
price_list = []
url_list = []
image_list = []
epc_list = []


## Retrieve sale data

### Loop through locations:

for location in range(len(locationId_list)):

    locationId = locationId_list[location]
    locationName = locationName_list[location]

    # Find number of pages

    querystring = {"locationId":locationId,
                    "locationName":locationName,
                    "operation":"sale",
                    "numPage":"1",
                    "maxItems":"500",
                    "sort":"asc",
                    "locale":"en",
                    "country":"es",
                    "sinceDate":"Last month",
                    "maxPrice":"250000",
                    "minPrice": "50000"}

    response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

    if response_property_list.status_code == 200:
        json_data = response_property_list.json()
    else:
        print(f"Request failed with status code: {response_property_list.status_code}")

    number_of_pages = json_data["totalPages"]


    # Loop through sale property information

    for page in range(1, number_of_pages + 1):

        # Retrieve data from page

        querystring = {"locationId":locationId,
                        "locationName":locationName,
                        "operation":"sale",
                        "numPage":str(page),
                        "maxItems":"500",
                        "sort":"asc",
                        "locale":"en",
                        "country":"es"}
        
        response_property_list = requests.get(url_property_list, headers=headers, params=querystring)

        if response_property_list.status_code == 200:
            json_data = response_property_list.json()
        else:
            print(f"Request failed with status code: {response_property_list.status_code}")
        
        # Save property information

        for property in range(len(json_data["elementList"])):
            searchType_list.append("Purchase")
            property_code = json_data["elementList"][property]["propertyCode"]
            propertyCode_list.append(property_code)
            propertyType_list.append(json_data["elementList"][property]["propertyType"])
            
            try:
                description_list.append(json_data["elementList"][property]["description"])
            except:
                description_list.append("NA")
                
            address_list.append(json_data["elementList"][property]["address"])
            municipality_list.append(json_data["elementList"][property]["municipality"])
            country_list.append(json_data["elementList"][property]["country"])
            latitude_list.append(json_data["elementList"][property]["latitude"])
            longitude_list.append(json_data["elementList"][property]["longitude"])
            bathrooms_list.append(json_data["elementList"][property]["bathrooms"])
            rooms_list.append(json_data["elementList"][property]["rooms"])
            size_list.append(json_data["elementList"][property]["size"])
            status_list.append(json_data["elementList"][property]["status"])
            
            try:
                floor_list.append(json_data["elementList"][property]["floor"])
            except:
                floor_list.append("NA")

            try:
                hasLift_list.append(json_data["elementList"][property]["hasLift"])
            except:
                hasLift_list.append(False)
            
            newDevelopment_list.append(json_data["elementList"][property]["newDevelopment"])
            newProperty_list.append(json_data["elementList"][property]["newProperty"])
            price_list.append(json_data["elementList"][property]["price"])
            url_list.append(json_data["elementList"][property]["url"])

            ## Retrieve Property specific data

            image_list.append("../static/pictures/no_property_image.jpg")
            epc_list.append("N/A")


            ## CLEAN-UP
            # querystring = {"propertyCode": property_code,"country":"es","language":"en"}
            # response_detail = requests.get(url_property_detail_list, headers=headers, params=querystring)
            # json_detail_data = response_detail.json()

            # try:
                # image_list.append(json_detail_data["multimedia"]["images"][0]["url"])
            # except:
                # image_list.append("../static/pictures/no_property_image.jpg")
            # try:
                # epc_list.append(json_detail_data["energyCertification"]["energyCertificationType"])
            # except:
                # epc_list.append("N/A")
    

## Create purchase df

purchase_property_df = pd.DataFrame({"Search Type": searchType_list,
                                    "Property Code": propertyCode_list,
                                    "Property Type": propertyType_list,
                                    "Description": description_list,
                                    "Address": address_list,
                                    "Municipality": municipality_list,
                                    "Country": country_list,
                                    "Latitude": latitude_list,
                                    "Longitude": longitude_list,
                                    "Bathrooms": bathrooms_list,
                                    "Bedrooms": rooms_list,
                                    "Size (m2)": size_list,
                                    "Status": status_list,
                                    "Floor": floor_list,
                                    "Has Lift": hasLift_list,
                                    "New Development": newDevelopment_list,
                                    "New Property": newProperty_list,
                                    "Price (EUR)": price_list,
                                    "URL": url_list,
                                    "Image": image_list,
                                    "EPC": epc_list})


In [6]:
# Clean data

## Delete duplicate values

rent_property_df.drop_duplicates(subset="Property Code", keep="first", inplace=True)
rent_property_df.reset_index(drop=True, inplace = True)
purchase_property_df.drop_duplicates(subset="Property Code", keep="first", inplace=True)
purchase_property_df.reset_index(drop=True, inplace = True)

## Update wording

### Rent
try: 
    rent_property_df["Floor"].replace({"bj": "0"}, inplace=True)
except:
    pass
try: 
    rent_property_df["Floor"].replace({"st": "-1"}, inplace=True)
except:
    pass
try: 
    rent_property_df["Floor"].replace({"en": "1.5"}, inplace=True)
except:
    pass
try: 
    rent_property_df["Floor"].replace({"ss": "-1"}, inplace=True)
except:
    pass

### Purchase
try:
    purchase_property_df["Floor"].replace({"bj": "0"}, inplace=True)
except:
    pass
try:
    purchase_property_df["Floor"].replace({"st": "-1"}, inplace=True)
except:
    pass
try:
    purchase_property_df["Floor"].replace({"en": "1.5"}, inplace=True)
except:
    pass
try:
    purchase_property_df["Floor"].replace({"ss": "-1"}, inplace=True)
except:
    pass

In [7]:
# Save information

## Save in DB

### Connect to db

engine = create_engine("sqlite:///../Dataset/real_estate_data.sqlite")
metadata = MetaData()
metadata.reflect(bind = engine)

property_rent_table = metadata.tables["propertyRent"]
property_purchase_table = metadata.tables["propertyPurchase"]

### Clear db

Session = sessionmaker(bind=engine)
session = Session()
session.execute(property_rent_table.delete())
session.commit()
session.execute(property_purchase_table.delete())
session.commit()
session.close()

### Add new data to db

Session = sessionmaker(bind=engine)
session = Session()

#### Rental property

for property in range(len(rent_property_df["Property Code"])):
    new_property_data = {
        "propertyCode": rent_property_df["Property Code"][property],
        "searchType": rent_property_df["Search Type"][property],
        "propertyType": rent_property_df["Property Type"][property],
        "address": rent_property_df["Address"][property],
        "municipality": rent_property_df["Municipality"][property],
        "country": rent_property_df["Country"][property],
        "latitude": rent_property_df["Latitude"][property],
        "longitude": rent_property_df["Longitude"][property],
        "bathrooms": rent_property_df["Bathrooms"][property],
        "bedrooms": rent_property_df["Bedrooms"][property],
        "size": rent_property_df["Size (m2)"][property],
        "status": rent_property_df["Status"][property],
        "floor": rent_property_df["Floor"][property],
        "hasLift": rent_property_df["Has Lift"][property],
        "newDevelopment": rent_property_df["New Development"][property],
        "newProperty": rent_property_df["New Property"][property],
        "epc": rent_property_df["EPC"][property],
        "price": rent_property_df["Pice (EUR month)"][property],
        "url": rent_property_df["URL"][property],
        "image": rent_property_df["Image"][property]
    }
    insert_query = insert(property_rent_table).values(**new_property_data)
    session.execute(insert_query)
    session.commit()

#### Purchase property

for property in range(len(purchase_property_df["Property Code"])):
    new_property_data = {
        "propertyCode": purchase_property_df["Property Code"][property],
        "searchType": purchase_property_df["Search Type"][property],
        "propertyType": purchase_property_df["Property Type"][property],
        "description": purchase_property_df["Description"][property],
        "address": purchase_property_df["Address"][property],
        "municipality": purchase_property_df["Municipality"][property],
        "country": purchase_property_df["Country"][property],
        "latitude": purchase_property_df["Latitude"][property],
        "longitude": purchase_property_df["Longitude"][property],
        "bathrooms": purchase_property_df["Bathrooms"][property],
        "bedrooms": purchase_property_df["Bedrooms"][property],
        "size": purchase_property_df["Size (m2)"][property],
        "status": purchase_property_df["Status"][property],
        "floor": purchase_property_df["Floor"][property],
        "hasLift": purchase_property_df["Has Lift"][property],
        "newDevelopment": purchase_property_df["New Development"][property],
        "newProperty": purchase_property_df["New Property"][property],
        "epc": purchase_property_df["EPC"][property],
        "price": purchase_property_df["Price (EUR)"][property],
        "url": purchase_property_df["URL"][property],
        "image": purchase_property_df["Image"][property]
    }
    insert_query = insert(property_purchase_table).values(**new_property_data)
    session.execute(insert_query)
    session.commit()


session.close()
