In [1]:
# Pysankey make random errors with plots. Implement it with plotly instead

In [2]:
from config import DATA_DIR, LAND_COVER_PALETTE, LEGEND_TO_PALETTE

import pandas as pd
import plotly.express as px
from config import PLOTS_DIR
import pandas as pd
import plotly.express as px
from config import PLOTS_DIR
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time


path = DATA_DIR / "Fix-the-sankeys"

In [3]:
LEGEND_TO_PALETTE

{'Water': '#419BDF',
 'Trees': '#397D49',
 'Grass': '#88B053',
 'Flooded vegetation': '#7A87C6',
 'Crops': '#E49635',
 'Shrub & Scrub': '#DFC35A',
 'Built Area': '#C4281B',
 'Bare ground': '#A59B8F',
 'Snow & Ice': '#B39FE1',
 'Wind Turbine': '#0984E3',
 'Solar Panel': '#2d3436',
 'Artificial Land Use': '#e17055',
 'Natural Areas': '#00b894',
 'Renewable Energy': '#00cec9'}

In [4]:
js = """
const TEXTPAD = 10; // constant used in Plotly.js

function sankeyNodeLabelsAlign() {
    const nodes = gd.getElementsByClassName('sankey-node');

    for (const node of nodes) {
        const d = node.__data__;
        const label = node.getElementsByClassName('node-label').item(0);

        // Ensure to reset any previous modifications
        label.setAttribute('x', 0);

        if (!d.horizontal) continue;

        // This is how Plotly's default text positioning is computed (coordinates
        // are relative to that of the corresponding node).
        const padX = d.nodeLineWidth / 2 + TEXTPAD;
        const posX = padX + d.visibleWidth;
        let x;
        let textAnchor;

        if (d.node.originalLayer === 0) {
            // Align left labels to the left
            x = -padX - TEXTPAD;  // Push further left by TEXTPAD
            textAnchor = 'end';
        } else {
            // Align right labels to the right
            x = posX;
            textAnchor = 'start';
        }

        label.setAttribute('x', x);
        label.setAttribute('text-anchor', textAnchor);
    }
}

const gd = document.getElementById('{plot_id}');

gd.on('plotly_afterplot', sankeyNodeLabelsAlign);
gd.emit('plotly_afterplot');
"""


# iterate over the files that ends with .csv
for file in path.glob("*.csv"):
    print(file)
    df = pd.read_csv(file)

    if file.stem == "Israel":
        # Reduce all changed_area by 15%
        df["changed_area"] = df["changed_area"] * 0.85
    elif "area" in df.columns:
        # all places where area is Israel
        df["changed_area"] = df["changed_area"] * 0.85

    # not possible to scale israel in clusters as we don't have the information

    import plotly.graph_objects as go

    # read df from dict

    src_classes = sorted(list(set(df["lulc_category_from"].to_list())))
    tgt_classes = sorted(list(set(df["lulc_category_to"].to_list())))

    labels = src_classes + tgt_classes

    # create a dictionary with index for each class for source and target
    src_dict = {src_classes[i]: i for i in range(len(src_classes))}
    tgt_dict = {tgt_classes[i]: i + len(src_classes) for i in range(len(tgt_classes))}

    source, target, value, color = [], [], [], []

    for _, row in df.iterrows():
        source.append(src_dict[row["lulc_category_from"]])
        target.append(tgt_dict[row["lulc_category_to"]])
        value.append(row["changed_area"])
        color.append(LEGEND_TO_PALETTE[row["lulc_category_from"]])

    fig = go.Figure(
        data=[
            go.Sankey(
                node=dict(
                    pad=10,
                    thickness=10,
                    line=dict(color="black", width=0.5),
                    label=labels,
                    color=[LEGEND_TO_PALETTE[label] for label in labels],
                ),
                link=dict(
                    source=source,
                    target=target,
                    value=value,
                    color=color,
                ),
            )
        ]
    )

    # specify size of the figure

    summed_area = df["changed_area"].sum()

    mappings = {
        "1_3_16": "Solar Expansion",
        "1_3_19": "Desertification",
        "1_8_37": "Urbanization",
        "6_23_24": "Renewable Energy",
        "14_3_12": "Deforestation",
        "tfidf_cluster_0": "Cluster 0",
        "tfidf_cluster_1": "Cluster 1",
        "tfidf_cluster_2": "Cluster 2",
        "tfidf_cluster_3": "Cluster 3",
        "tfidf_cluster_4": "Cluster 4",
    }

    if file.stem in mappings:
        title = f"{mappings[file.stem]} - {summed_area:.0f} km²"
    else:
        title = f"{file.stem} - {summed_area:.0f} km²"

    fig.update_layout(
        title_text=title,
        font_size=20,
        width=800,
        height=800,
    )

    # When I inspect the html I can't see the labels. Use the function similar to plt.tight_layout()
    fig.update_layout(margin=dict(t=100, l=200, r=200, b=50))

    # output the html
    outpath = file.with_suffix(".html")
    print("saving to", outpath)
    fig.write_html(outpath, post_script=[js])

/Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Netherlands.csv
saving to /Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Netherlands.html
/Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Denmark.csv
saving to /Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Denmark.html
/Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Netherlands (Renewable Energy Production).csv
saving to /Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Netherlands (Renewable Energy Production).html
/Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Israel (Renewable Energy Production).csv
saving to /Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Israel (Renewable Energy Production).html
/Users/viktorduepedersen/Documents/github/thesis/data/Fix-the-sankeys/Denmark (Renewable Energy Production).csv
saving to /Users/viktorduepedersen/Documents/github/th

In [5]:
import os
import time

from PIL import Image
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

from config import DATA_DIR, PLOTS_DIR


def html_to_png(html_files, output_dir):
    # Setup Chrome options
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--headless")  # Run in headless mode
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")

    # Initialize the WebDriver
    driver = webdriver.Chrome(
        service=ChromeService(ChromeDriverManager().install()), options=chrome_options
    )

    for html_file in html_files:
        # Load the HTML file
        file_path = f"file://{os.path.abspath(html_file)}"
        driver.get(file_path)

        # Give the page some time to render (adjust time if needed)
        time.sleep(2)

        # Take a screenshot and save it
        png_file = os.path.join(
            output_dir, os.path.splitext(os.path.basename(html_file))[0] + ".png"
        )
        driver.save_screenshot(png_file)

        # Optional: Crop the screenshot if needed
        image = Image.open(png_file)

        """
        The image was save with these settings from plotly

         fig.update_layout(
            title_text=title,
            font_size=20,
            width=800,
            height=800,
        )

        fig.update_layout(margin=dict(t=100, l=200, r=200, b=50))        
        """

        cropped_image = image.crop((10, 0, 1650, 1600))
        cropped_image.save(png_file)

        print(f"Saved screenshot to: {png_file}")

    # Quit the WebDriver
    driver.quit()


path = DATA_DIR / "Fix-the-sankeys"
html_files = [file for file in path.glob("*.html")]
output_dir = PLOTS_DIR
html_to_png(html_files, output_dir)

Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/Israel.png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/6_23_24.png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/Israel (Renewable Energy Production).png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/14_3_12.png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/1_3_19.png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/Estonia.png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/Denmark.png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/tfidf_cluster_0.png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/Denmark (Renewable Energy Production).png
Saved screenshot to: /Users/viktorduepedersen/Documents/github/thesis/plots/tfidf_cluster_1.png
Saved screenshot to: /Users/viktorduepedersen/D