In [1]:
import duckdb
from tqdm.auto import tqdm 

connection = duckdb.connect(database="../dbt/database_name.duckdb")

In [4]:
import pandas as pd
import plotly.graph_objects as go

In [8]:

# Exécution de la requête
query = """
WITH exploded AS (
    SELECT ff_idlocal, UNNEST(STRING_TO_ARRAY(file_years, ',')) AS year
    FROM lovac_history
),
ranked AS (
    SELECT 
        ff_idlocal, 
        year, 
        ROW_NUMBER() OVER (PARTITION BY ff_idlocal ORDER BY year) AS rnk
    FROM exploded
),
transitions AS (
    SELECT 
        t1.year AS source_year, 
        t2.year AS target_year, 
        COUNT(*) AS value
    FROM ranked t1
    JOIN ranked t2 ON t1.ff_idlocal = t2.ff_idlocal AND t1.rnk = t2.rnk - 1
    GROUP BY t1.year, t2.year
)
SELECT * FROM transitions;
"""

# Exécution de la requête et récupération des données
df = connection.execute(query).fetch_df()


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [7]:
# Préparation des données pour le Sankey diagram
all_years = sorted(list(set(df['source_year']).union(set(df['target_year']))))
year_to_index = {year: idx for idx, year in enumerate(all_years)}

source_indices = df['source_year'].map(year_to_index).tolist()
target_indices = df['target_year'].map(year_to_index).tolist()
values = df['value'].tolist()

# Création du Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=all_years,
        color="blue"
    ),
    link=dict(
        source=source_indices,
        target=target_indices,
        value=values
    )
)])

fig.update_layout(title_text="Evolution des logements vacants", font_size=10)
fig.show()