# Visualization of Helsinki Properties

This notebook visualizes the Helsinki properties that were processed and loaded into the DuckDB database by the `prepare_geospatial_data.py` script.

In [None]:
import duckdb
import geopandas as gpd
import pandas as pd
from shapely import wkt
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
import os

# --- Configuration ---
DB_PATH = os.path.join('..', 'data', 'real_estate.duckdb')
TABLE_NAME = 'helsinki_properties'
SAMPLE_SIZE = 10000

db_exists = os.path.exists(DB_PATH)

if not db_exists:
    display(Markdown(f"## ❌ Database Not Found\nCould not find the database file at `{os.path.abspath(DB_PATH)}`. Please run the `prepare_geospatial_data.py` script first."))
else:
    display(Markdown(f"## ✅ Database Found\nLocated at `{os.path.abspath(DB_PATH)}`."))

## Loading the Helsinki Properties Data

In [None]:
if db_exists:
    try:
        with duckdb.connect(DB_PATH) as con:
            # Check if the table exists
            tables = con.execute("SHOW TABLES;").fetchdf()
            if TABLE_NAME not in tables['name'].values:
                display(Markdown(f"## ❌ Table Not Found\nThe table `{TABLE_NAME}` was not found in the database. Please run the `prepare_geospatial_data.py` script."))
                table_exists = False
            else:
                table_exists = True
                display(Markdown(f"## ✅ Table Found\nSuccessfully found the `{TABLE_NAME}` table."))
                
                # Load a sample of the data
                query = f"SELECT * FROM {TABLE_NAME} LIMIT {SAMPLE_SIZE};"
                df = con.execute(query).fetchdf()
                display(Markdown(f"Loaded a sample of **{len(df)}** properties."))
                
                # Convert WKT to geometry
                df['geometry'] = df['geometry_wkt'].apply(wkt.loads)
                gdf = gpd.GeoDataFrame(df, geometry='geometry')
                
                display(Markdown("**Sample Data Head:**"))
                display(gdf.head())
    except Exception as e:
        display(Markdown(f"### Error loading data: {e}"))

## Visualizing the Sampled Helsinki Properties

In [None]:
if 'gdf' in locals():
    try:
        fig, ax = plt.subplots(1, 1, figsize=(12, 12))
        gdf.plot(ax=ax, marker='.', markersize=1, color='blue')
        ax.set_title(f'Sample of {len(gdf)} Helsinki Properties')
        ax.set_xlabel('Longitude')
        ax.set_ylabel('Latitude')
        plt.grid(True)
        plt.show()
    except Exception as e:
        display(Markdown(f"### Error during visualization: {e}"))
else:
    display(Markdown("Cannot visualize: data not loaded."))