In [1]:
import json
import re

from pymongo import MongoClient

In [2]:
with open("../data/listings/idealista_listings.json") as f:
    idealista_data = json.load(f)

with open("../data/listings/olx_listings.json") as f:
    olx_data = json.load(f)

with open("../data/listings/imovirtual_listings.json") as f:
    imovirtual_data = json.load(f)

with open("../data/listings/uniplaces_listings.json") as f:
    uniplaces_data = json.load(f)

In [66]:
final_listings = []
id = 1

#### IDEALISTA

In [67]:
for listing in idealista_data:
    data = {}
    data["id"] = id
    data["title"] = listing["suggestedTexts"]["title"]
    data["bedrooms"] = listing["rooms"]
    data["bathrooms"] = str(listing["bathrooms"])
    data["price"] = int(listing["price"])
    data["size"] = int(listing["size"])
    data["url"] = listing["url"]
    data["propertyType"] = listing["propertyType"]
    data["location"] = listing["municipality"]
    
    data["images"] = []
    ## If the multimedia field is in the listing, append images links
    if "multimedia" in listing:
        for i in range(len(listing["multimedia"]["images"])):
            data["images"].append(listing["multimedia"]["images"][i]["url"])
    
    ## Otherwise use a default
    else:
        data["images"] = ["https://www.fparceirosazoia.pt/photos/shares/default/default.jpg"]
        
    if "description" in listing:
        data["description"] = listing["description"]
    else:
        data["description"] = ""
    
    ## If the thumbnail field is present, use it
    if "thumbnail" in listing:
        data["thumbnail"] = listing["thumbnail"]
    
    ## Otherwise, check if there is any multimedia in the listing
    elif "multimedia" in listing:
        data["thumbnail"] = listing.multimedia.images[0]
    
    ## If not, use a default
    else:
        data["thumbnail"] = "https://www.fparceirosazoia.pt/photos/shares/default/default.jpg"
    
    data["source"] = {}
    data["source"]["type"] = "external"
    data["source"]["value"] = "Idealista"

    ## Features ##
    data["features"] = {}

    ## Check if the features field is present
    if "features" in listing:
        ## Check if the subfields are present
        subfields = listing["features"]
        if "hasSwimmingPool" in subfields:
            data["features"]["hasSwimmingPool"] = listing["features"]["hasSwimmingPool"]
        
        if "hasGarden" in subfields:
            data["features"]["hasGarden"] = listing["features"]["hasGarden"]
        
        if "hasAirConditioning" in subfields:
            data["features"]["hasAirConditioning"] = listing["features"]["hasAirConditioning"]
       
        if "hasBoxRoom" in subfields:
            data["features"]["hasBoxRoom"] = listing["features"]["hasBoxRoom"]
        
        if "hasTerrace" in subfields:
            data["features"]["hasTerrace"] = listing["features"]["hasTerrace"]

    data["coordinates"] = {}
    data["coordinates"]["latitude"] = listing["latitude"]
    data["coordinates"]["longitude"] = listing["longitude"]

    data["realAddress"] = listing["showAddress"]
    
    final_listings.append(data)
    id = id + 1

#### OLX

In [68]:
for listing in olx_data:
    data = {}
    data["id"] = id
    data["title"] = listing["title"]
    data["bedrooms"] = int(listing["rooms"])
    
    if listing["bathroom"] == "Not Provided":
        data["bathrooms"] = "-1"
    else:
        data["bathrooms"] = listing["bathroom"]
    
    data["price"] = round(float(listing["price"].split(" ")[0]))
    
    data["url"] = listing["url"]
    data["propertyType"] = listing["propertyType"]
    data["location"] = listing["location"]
    data["images"] = listing["images"]

    ## If the thumbnail is an internal image from olx, use a default
    if "http" not in listing["thumbnail"]:
        if len(listing["images"]) > 0:
            data["thumbnail"] = listing["images"][0]
        else:
            data["thumbnail"] = "https://www.fparceirosazoia.pt/photos/shares/default/default.jpg"
    else:
        data["thumbnail"] = listing["thumbnail"]

    if listing["size"] == "Not Provided":
        data["size"] = ""
    else:
        data["size"] = round(float(listing["size"].replace(",",".")))

    data["description"] = listing["description"]

    data["source"] = {}
    data["source"]["type"] = "external"
    data["source"]["value"] = "OLX"

    ## Features ##
    data["features"] = {}

    if listing["features"]["energetic_certificate"] != "":
        data["features"]["energyCertificate"] = listing["features"]["energetic_certificate"]
    
    final_listings.append(data)
    id = id + 1


#### Uniplaces

In [69]:
for listing in uniplaces_data:
    data = {}
    data["id"] = id
    data["title"] = listing["title"]
    data["price"] = int(listing["price"])
    data["size"] = -1
    data["url"] = listing["url"]
    data["propertyType"] = listing["type"]
    data["location"] = listing["place"]
    data["images"] = ["https://www.fparceirosazoia.pt/photos/shares/default/default.jpg"]
    data["thumbnail"] = "https://www.fparceirosazoia.pt/photos/shares/default/default.jpg"

    if "description" in listing:
        data["description"] = listing["description"]
    else:
        data["description"] = ""

    if data["propertyType"] == "residence" or listing["accommodation_type"] == "hostel":
        data["bathrooms"] = "0"
        data["bedrooms"] = 1
    else:
        data["bathrooms"] = str(listing["number_of_bathrooms"])
        data["bedrooms"] = listing["number_of_rooms"]
    
    data["source"] = {}
    data["source"]["type"] = "external"
    data["source"]["value"] = "Uniplaces"

    ## Features ##
    data["features"] = {}
    
    data["features"]["billsIncluded"] = listing["all_bills_included"]

    data["coordinates"] = {}
    data["coordinates"]["latitude"] = listing["location"][0]
    data["coordinates"]["longitude"] = listing["location"][1]
    
    final_listings.append(data)
    id = id + 1

#### Imovirtual

In [70]:
for listing in imovirtual_data:
    data = {}
    data["id"] = id
    data["title"] = listing["externalReference"]
    
    if listing["rooms"] == "zero":
        data["rooms"] = 0
    else:
        data["bedrooms"] = int(listing["rooms"])
        
    # while null should be read as not provided
    if listing["bathrooms"] == None:
        data["bathrooms"] = "0"
    
    else:
        data["bathrooms"] = listing["bathrooms"]

    if listing["price"] == "":
        data["price"] = -1
    else:
        data["price"] = int(listing["price"])
    
    data["size"] = round(float(listing["size"]))
    data["url"] = listing["url"]
    data["propertyType"] = ""
    data["location"] = listing["municipality"]
    
    data["images"] = []
    if listing["numPhotos"] > 0:
        for i in range(len(listing["multimedia"]["images"])):
            data["images"].append(listing["multimedia"]["images"][i]["url"])
    else:
        data["images"] = ["https://www.fparceirosazoia.pt/photos/shares/default/default.jpg"]
    
    if listing["thumbnail"] == "":
        data["thumbnail"] = "https://www.fparceirosazoia.pt/photos/shares/default/default.jpg"
    else:
        data["thumbnail"] = listing["thumbnail"]

    if "description" in listing:
        data["description"] = listing["description"]
    else:
        data["description"] = "Not Provided"
    
    data["source"] = {}
    data["source"]["type"] = "external"
    data["source"]["value"] = "Imovirtual"
    
    data["coordinates"] = {}
    data["coordinates"]["latitude"] = listing["latitude"]
    data["coordinates"]["longitude"] = listing["longitude"]

    ## Features ##
    data["features"] = {}
    
    final_listings.append(data)
    id = id + 1

#### Save the listings to the database

In [73]:
## Connect to the clien
client = MongoClient("mongodb+srv://Cluster75374:ZklDTlJreFxW@cluster75374.asyflmv.mongodb.net")

## Get the database
hotsquare = client.get_database("HotSquare")

In [None]:
## Drop the collection (this need to be done in a different way)
hotsquare.drop_collection("Listings")

In [75]:
## Create a new_collection with the same name
new_collection = hotsquare["Listings"]

In [76]:
## Populate it
for listing in final_listings:
    new_collection.insert_one(listing)