# Immobiliare Connector Test

This notebook demonstrates the usage of the immobiliare connector for scraping real estate data.

In [1]:
import sys
import logging
from datetime import datetime
from pathlib import Path

# Find the project root
project_root = Path().cwd().parent
print(f"Project root: {project_root}")

# Add project root to Python path (not just sources)
sys.path.insert(0, str(project_root))
print(f"Added to Python path: {project_root}")

# Now import from sources
from sources.config import ConfigManager
from sources.connectors.immobiliare import ImmobiliareConnector
from sources.connectors.exceptions import ScrapingError, StorageError

Project root: C:\Users\gabri\workspace\aida_projects\quant-estate
Added to Python path: C:\Users\gabri\workspace\aida_projects\quant-estate


In [2]:
def setup_logging():
    """Configure logging for the script."""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler(),
            logging.FileHandler(f'immobiliare_scraper_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
        ]
    )
    return logging.getLogger(__name__)

logger = setup_logging()

In [3]:
try:
    # Initialize configuration
    logger.info("Initializing configuration...")
    config_manager = ConfigManager()
    
    # Create connector
    logger.info("Creating immobiliare connector...")
    connector = ImmobiliareConnector(config_manager=config_manager)
    
    # Define target URLs
    urls = [
        "https://www.immobiliare.it/vendita-case/milano/",
        #"https://www.immobiliare.it/vendita-case/roma/",
        #"https://www.immobiliare.it/vendita-case/torino/"
    ]
    
    # Scrape each URL
    for url in urls:
        try:
            logger.info(f"Starting scraping from: {url}")
            connector.scrape_and_store(url, max_pages=2)  # Limit to 2 pages per city for testing
            logger.info(f"Successfully scraped data from {url}")
        except ScrapingError as e:
            logger.error(f"Scraping error for {url}: {e}")
        except StorageError as e:
            logger.error(f"Storage error for {url}: {e}")
        except Exception as e:
            logger.error(f"Unexpected error for {url}: {e}")
    
    logger.info("Scraping completed!")
    
except Exception as e:
    logger.error(f"Fatal error: {e}")

2025-06-03 19:39:33,466 - __main__ - INFO - Initializing configuration...
2025-06-03 19:39:33,468 - sources.config.config_manager - INFO - Initialized ConfigManager with config directory: C:\Users\gabri\workspace\aida_projects\quant-estate\sources\config
2025-06-03 19:39:33,470 - __main__ - INFO - Creating immobiliare connector...
2025-06-03 19:39:33,471 - sources.connectors.immobiliare.connector - INFO - Initializing ImmobiliareConnector
2025-06-03 19:39:33,477 - sources.config.config_manager - INFO - Successfully loaded configuration: logging


2025-06-03 19:39:33 - INFO - sources.config.config_manager - Successfully loaded configuration: immobiliare.default
2025-06-03 19:39:33 - INFO - sources.connectors.immobiliare.scraper.ImmobiliareScraper - Initialized ImmobiliareScraper with base URL: https://www.immobiliare.it
2025-06-03 19:39:33 - INFO - sources.connectors.immobiliare.storage.FileStorage - Initialized FileStorage at data\immobiliare (JSON saving: enabled)
2025-06-03 19:39:33 - INFO - sources.connectors.immobiliare.connector - Created FileStorage at data\immobiliare
2025-06-03 19:39:33 - INFO - sources.connectors.immobiliare.connector - ImmobiliareConnector initialized successfully with FileStorage storage
2025-06-03 19:39:33 - INFO - __main__ - Starting scraping from: https://www.immobiliare.it/vendita-case/milano/
2025-06-03 19:39:33 - INFO - sources.connectors.immobiliare.connector - Starting scraping from https://www.immobiliare.it/vendita-case/milano/ (max pages: 2)
2025-06-03 19:39:54 - INFO - sources.connectors.

KeyboardInterrupt: 

## Basic Usage

Let's start with a basic example of scraping a single page of real estate listings.

In [2]:
# Initialize config manager
config_manager = ConfigManager()

# Create connector with config
print("Creating connector...")
connector = ImmobiliareConnector(config_manager=config_manager)

# Example URL for Rome apartments
test_url = "https://www.immobiliare.it/vendita-case/milano/"

# Start scraping
print(f"\nStarting scraping from: {test_url}")
try:
    connector.scrape_and_store(test_url)
    print("Scraping completed successfully!")
except Exception as e:
    print(f"Error during scraping: {e}")


# Load and display the results
print("\nLoading results from CSV...")
try:
    # Get the storage instance from the connector
    storage = connector.storage
    df = pd.read_csv(storage.csv_file)
    
    # Display basic information
    print(f"\nTotal records: {len(df)}")
    print("\nColumns:")
    print(df.columns.tolist())
    
    # Display first few records
    print("\nFirst 5 records:")
    print(df.head())
    
    # Basic statistics
    print("\nPrice Statistics:")
    print(df['price'].describe())
    
    print("\nSurface Statistics:")
    print(df['surface'].describe())
    
    print("\nProperty Types:")
    print(df['property_type'].value_counts())
    
except Exception as e:
    print(f"Error analyzing results: {e}")

Creating connector...
Storage directory: C:\Users\gabri\workspace\aida_projects\quant-estate\.venv\Lib\data\immobiliare_data_2025-06-03
Storage initialized at: C:\Users\gabri\workspace\aida_projects\quant-estate\.venv\Lib\data\immobiliare_data_2025-06-03
Initialized CSV file: C:\Users\gabri\workspace\aida_projects\quant-estate\.venv\Lib\data\immobiliare_data_2025-06-03\immobiliare.csv

Starting scraping from: https://www.immobiliare.it/vendita-case/milano/


KeyboardInterrupt: 