# Pipeline 1: IDs

## Prerequisites

In [None]:
import sys
from pathlib import Path
import os

# Find the project root
project_root = Path().cwd().parent
print(f"Project root: {project_root}")

# Add project root to Python path (not just sources)
sys.path.insert(0, str(project_root))
print(f"Added to Python path: {project_root}")

# Set environment variables

os.environ["QE_ENV"] = "dev"
os.environ["QE_CONF_FOLDER"] = "sources/resources"
print(f"Added environment variables: QE_ENV={os.environ['QE_ENV']}, QE_CONF_FOLDER={os.environ['QE_CONF_FOLDER']}")

In [None]:
import logging

from pydantic import SecretStr

from sources.datamodel.listing_id import ListingId
from sources.logging import logging_utils
from sources.storage.abstract_storage import Storage
from sources.storage.file_storage import FileStorage
from sources.scrapers.immobiliare.scraper_ids import ImmobiliareIdScraper
from sources.config.model.storage_settings import MongoStorageSettings

## Configuration

In [None]:
URL = "https://www.immobiliare.it/vendita-case/milano/?criterio=rilevanza"
USR = ""
PSW = ""
HOST = ""
MONGO = f"mongodb+srv://{USR}:{PSW}@{HOST}/?retryWrites=true&w=majority&appName=QuantEstate01"

In [None]:
from sources.storage.mongo_storage import MongoDBStorage

logging_utils.setup_logging(config_path='sources/resources/logging.yaml')
logger = logging.getLogger(__name__)

mongoSettings = MongoStorageSettings(
    connection_string=SecretStr(MONGO),
)
storage: Storage = MongoDBStorage(ListingId, mongoSettings)
scraper = ImmobiliareIdScraper(storage, scrape_url=URL)

## Start the pipeline!

In [None]:
scraper.scrape()