# Pipeline 2: Listing

## Prerequisites

In [None]:
import sys
from pathlib import Path
import os

# Find the project root
project_root = Path().cwd().parent
print(f"Project root: {project_root}")

# Add project root to Python path (not just sources)
sys.path.insert(0, str(project_root))
print(f"Added to Python path: {project_root}")

# Set environment variables

os.environ["QE_ENV"] = "dev"
os.environ["QE_CONF_FOLDER"] = "sources/resources"
print(f"Added environment variables: QE_ENV={os.environ['QE_ENV']}, QE_CONF_FOLDER={os.environ['QE_CONF_FOLDER']}")

In [None]:
from sources.datamodel.listing_details import ListingDetails
from sources.datamodel.listing_id import ListingId
from sources.logging import logging_utils
from sources.storage.abstract_storage import Storage
from sources.scrapers.immobiliare.scraper_listing import ImmobiliareListingScraper
from sources.config.config_manager import ConfigManager

## Configuration

In [None]:
URLs = [
    "https://www.immobiliare.it/annunci/109605437/",
    "https://www.immobiliare.it/annunci/122417312/",
    "https://www.immobiliare.it/annunci/122419172/",
    "https://www.immobiliare.it/annunci/122408050/",
    "https://www.immobiliare.it/annunci/122420120/",
    "https://www.immobiliare.it/annunci/122410316/",
    "https://www.immobiliare.it/annunci/122407882/",
    "https://www.immobiliare.it/annunci/122408454/"
]


In [None]:
logging_utils.setup_logging(config_path='sources/resources/logging.yaml')
logger = logging_utils.get_logger(__name__)

settings = ConfigManager().get_storage_config()

storage: Storage = Storage.create_storage(data_type=ListingDetails, config=settings)

test_ids = [ListingId(
        source="immobiliare", 
        source_id=URL.rstrip('/').split('/')[-1],
        title="Test Listing",
        url=URL)
        for URL in URLs]

scrapers = [ImmobiliareListingScraper(storage, listing_id=test_listing_id) for test_listing_id in test_ids]

## Start the pipeline!

In [None]:
for scraper in scrapers:
    scraper.scrape()