# Pipeline 1: IDs

## Prerequisites

In [None]:
import sys
from pathlib import Path
import os

# Find the project root
project_root = Path().cwd().parent
print(f"Project root: {project_root}")

# Add project root to Python path (not just sources)
sys.path.insert(0, str(project_root))
print(f"Added to Python path: {project_root}")

# Set environment variables

os.environ["QE_ENV"] = "dev"
os.environ["QE_CONF_FOLDER"] = "sources/resources"
print(f"Added environment variables: QE_ENV={os.environ['QE_ENV']}, QE_CONF_FOLDER={os.environ['QE_CONF_FOLDER']}")

In [None]:
from sources.datamodel.listing_id import ListingId
from sources.logging import logging_utils
from sources.storage.abstract_storage import Storage
from sources.scrapers.immobiliare.scraper_ids import ImmobiliareIdScraper
from sources.config.config_manager import ConfigManager

## Configuration

In [None]:
URL = "https://www.immobiliare.it/vendita-case/milano/?criterio=rilevanza"

In [None]:
logging_utils.setup_logging(config_path='sources/resources/logging.yaml')
logger = logging_utils.get_logger(__name__)

settings = ConfigManager().get_storage_config()
logger.info(f"Storage settings: {settings}")

storage: Storage = Storage.create_storage(data_type=ListingId, config=settings)
scraper = ImmobiliareIdScraper(storage, scrape_url=URL)

## Start the pipeline!

In [None]:
scraper.scrape()