<a href="https://colab.research.google.com/github/Esbern/Sankey-diagrams/blob/main/genneral%20sankey.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Note before using Plotly in Jupiter lab it is necessary not only to install the Pyton libary but also the Jupiter lab extension
for instance micromamba install -c conda-forge jupyterlab-plotly-extension

In [126]:
import requests
import json
import pandas as pd
import plotly.graph_objects as go

In [127]:
class Table:
    """
    Represents a database table and handles fetching of data from Airtable,
    storing it in a pandas DataFrame. It manages label and relationship
    lists with lazy loading and includes Airtable's primary key for each record.

    Attributes:
        table_id (str): Identifier for the table.
        label_column (str): Label of the table to be used in diagrams.
        api_key (str): API key for accessing Airtable.
        base_id (str): Base ID of the Airtable database.
        foreign_key_table_id (str): Identifier of the table referenced in the foreign_key_column.
        foreign_key_column (str): Column name that acts as a foreign key to another table.
        _data (DataFrame): Internal DataFrame containing fetched data.
        _labels (list): List of tuples containing record IDs and labels, lazily loaded.
        _relationships (list): List of source-target tuples based on foreign keys, lazily loaded.
    """

    def __init__(self, table_id, label_column, api_key, base_id, foreign_key_table_id, foreign_key_column):
        self.table_id = table_id
        self.label_column = label_column
        self.api_key = api_key
        self.base_id = base_id
        self.foreign_key_table_id = foreign_key_table_id
        self.foreign_key_column = foreign_key_column
        self._data = None
        self._labels = None
        self._relationships = None

    @property
    def data(self):
        if self._data is None:
            self.fetch_data()
        return self._data

    def fetch_data(self):
        """Fetches and populates the internal DataFrame with primary key and record fields."""
        url = f"https://api.airtable.com/v0/{self.base_id}/{self.table_id}"
        headers = {"Authorization": f"Bearer {self.api_key}"}
        params = {}
        data = []

        while True:
            response = requests.get(url, headers=headers, params=params)
            if response.status_code != 200:
                raise Exception(f"Failed to fetch data: {response.text}")
            page_data = response.json()
            for record in page_data['records']:
                record_data = record['fields']
                record_data['id'] = record['id']  # Include the primary key
                data.append(record_data)

            if 'offset' in page_data:
                params['offset'] = page_data['offset']
            else:
                break

        self._data = pd.DataFrame(data)

    @property
    def labels(self):
        if self._labels is None:
            self.create_label_and_relationship_lists()
        return self._labels

    @property
    def relationships(self):
        if self._relationships is None:
            self.create_label_and_relationship_lists()
        return self._relationships

    def create_label_and_relationship_lists(self):
        """Generates labels and source-target relationships from data."""
        if self._data is None:
            self.fetch_data()

        self._labels = [(self.table_id+"_"+ row['id'], row[self.label_column]) for index, row in self._data.iterrows()]
        if self.foreign_key_column != "":
            df_relationship = self._data.explode(self.foreign_key_column)
            self._relationships = [(row['id'], row[self.foreign_key_column]) for index, row in df_relationship.iterrows() if self.foreign_key_column in row]
        else:
            self._relationships = []

# Example usage (make sure the field names are correct for your Airtable setup)
# tables = [
#     Table(table_id="tblmO1yIO7iLGjeBx", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblO8e0GuUpzcnCOh", foreign_key_column="Phenomenon"),
#     Table(table_id="tblO8e0GuUpzcnCOh", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblWUnluzfa79Y26z", foreign_key_column="Variable"),
#     Table(table_id="tblWUnluzfa79Y26z", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
# ]



In [128]:
def create_sankey_diagram(tables):
    # Maps to store indices of each label in all tables
    label_to_index = {}
    current_index = 0

    # Lists for Sankey diagram
    node_labels = []
    source_indices = []
    target_indices = []
    values = []

    # First Phase: Index all labels from all tables
    for table in tables:
        for id_with_table, actual_label in table.labels:
            if id_with_table not in label_to_index:
                label_to_index[id_with_table] = current_index
                node_labels.append(actual_label)  # Append actual label for visualization
                current_index += 1

    # Second Phase: Process relationships now that all labels are indexed
    for table in tables:
        for source_id, target_id in table.relationships:
            # Create full unique IDs for source and target using the correct table IDs
            source_full_id = f"{table.table_id}_{source_id}"
            target_full_id = f"{table.foreign_key_table_id}_{target_id}"

            if source_full_id in label_to_index and target_full_id in label_to_index:
                source_index = label_to_index[source_full_id]
                target_index = label_to_index[target_full_id]
                source_indices.append(source_index)
                target_indices.append(target_index)
                values.append(1)  # Value can be adjusted if needed

    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=node_labels,
        ),
        link=dict(
            source=source_indices,
            target=target_indices,
            value=values
        ))])

    fig.update_layout(
        font_size=10,
        autosize=False,
        width= 1700,
        height=1000,
        margin=dict(
            l=10,
            r=10,
            b=10,
            t=20,
            pad=4
        ),
        title_text="Sankey Diagram",
        paper_bgcolor="white"
    )
    fig.show()

# Example usage
# Assuming 'tables' is a list of Table instances that have already fetched data and generated labels and relationships
# create_sankey_diagram(tables)


# Sanky for Geoøkologisk kortlægning:
## SANKEY 1: Geoøkologisk sankey diagram

Struktur (tabeller i airtable):

Sphere ->  Phenomenon -> variable

In [132]:
# Load data from airtable
api_key = 'patwjsizhgQyQkZkT.f9e8b1595df5b527d0d01d3a45af0dfa77eab63707e18398ad62f1f3818a9ce9'
base_id = 'appLztwTKWOhFJ40Z'
tables = [
    Table(table_id="tblmO1yIO7iLGjeBx", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblO8e0GuUpzcnCOh", foreign_key_column="Phenomenon"),
    Table(table_id="tblO8e0GuUpzcnCOh", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblWUnluzfa79Y26z",foreign_key_column="Variable"),
    Table(table_id="tblWUnluzfa79Y26z", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
]

In [133]:
print(tables[0].labels)


[('tblmO1yIO7iLGjeBx_recBAf1oiVUxNnXzI', '4 - Atmosphere (not part of geotope delineation procedures - only serves as descriptive variables)'), ('tblmO1yIO7iLGjeBx_recDCkgAVQBLEZp7g', '2 - Toposphere'), ('tblmO1yIO7iLGjeBx_recSJBcDKHrg2VJ02', '6 - Other (not part of geotope model)'), ('tblmO1yIO7iLGjeBx_recXPa6LR57R8T1EG', '5 - Biosphere (not part of geotope model)'), ('tblmO1yIO7iLGjeBx_reckbSlmzSC4yXFTu', '3 - Lithosphere'), ('tblmO1yIO7iLGjeBx_recwfNo5m4Qe2CKeh', '1 - Hydrosphere')]


In [134]:
# Construct the diagram
# Assuming tables is a list of Table instances that have already fetched data and generated labels and relationships
label_to_index = {}
node_labels = []
source_indices = []
target_indices = []
values = []
create_sankey_diagram(tables)



# Sanky for policy papers:

## SANKEY 1: POLITICAL TARGET-SETTING

Struktur (tabeller i airtable):

policy source -> Targets -> Target group

Filter: medtag kun linjer i "policy source" der har link til et eller flere linjer i "targets"

Filter: medtag kun linjer i "targets” der har link til et eller flere linjer i "target group"







In [None]:
# Load data from airtable
api_key = 'patwjsizhgQyQkZkT.f9e8b1595df5b527d0d01d3a45af0dfa77eab63707e18398ad62f1f3818a9ce9'
base_id = 'apprKfEKZ2Ju74g9w'
tables = [
    #Policy source  is tblzHR1WHYHA5MlwQ
    Table(table_id="tblzHR1WHYHA5MlwQ", label_column="Policy source", api_key=api_key, base_id=base_id, foreign_key_table_id="tbl7OYOXduME11uh7", foreign_key_column="Targets (policy targets)"),
    #Target is  is tbl7OYOXduME11uh7
    Table(table_id="tbl7OYOXduME11uh7", label_column="Target name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblVarbVYd96JUE6f",foreign_key_column="Target Group"),
    #Target group is  is tblVarbVYd96JUE6f
    Table(table_id="tblVarbVYd96JUE6f", label_column="Target Group", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
]

# Filtrér den første tabel: Policy Source
df_policy = tables[0].data
df_policy_filtered = df_policy[df_policy["Targets (policy targets)"].notnull() & df_policy["Targets (policy targets)"].astype(str).str.strip().ne("")]
tables[0]._data = df_policy_filtered

# Filtrér den anden tabel: Targets
df_targets = tables[1].data
df_targets_filtered = df_targets[df_targets["Target Group"].notnull() & df_targets["Target Group"].astype(str).str.strip().ne("")]
tables[1]._data = df_targets_filtered


In [None]:
# Construct the diagram
label_to_index = {}
node_labels = []
source_indices = []
target_indices = []
values = []
create_sankey_diagram(tables)


Minimum example

# SANKEY 2: LAND USE OPTIONS

Struktur (tabeller i airtable):

Target group -> Targets -> Land uses -> land conditions

Filter: medtag kun linjer i "targets” der har link til et eller flere linjer i "target group"



In [163]:
# Load data from airtable
api_key = 'patwjsizhgQyQkZkT.f9e8b1595df5b527d0d01d3a45af0dfa77eab63707e18398ad62f1f3818a9ce9'
base_id = 'apprKfEKZ2Ju74g9w'
tables = [
    Table(table_id="tblVarbVYd96JUE6f", label_column="Target Group", api_key=api_key, base_id=base_id, foreign_key_table_id="tbl7OYOXduME11uh7", foreign_key_column="Targets"),
    Table(table_id="tbl7OYOXduME11uh7", label_column="Target name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblTRyuT48bBN24QG",foreign_key_column="Land uses"),
    Table(table_id="tblTRyuT48bBN24QG", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tbl7OYOXduME11uh7",foreign_key_column="Land conditions (from Targets)"),
    Table(table_id="tbl7OYOXduME11uh7", label_column="Land conditions", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
]

# Filtrér den anden tabel: Targets
#df_targets = tables[1].data
#df_targets_filtered = df_targets[df_targets["Target Group"].notnull() & df_targets["Target Group"].astype(str).str.strip().ne("")]
#tables[1]._data = df_targets_filtered

In [161]:
print(tables[3].labels)

[('tbl7OYOXduME11uh7_rec0HbMnXg0CRgpQs', nan), ('tbl7OYOXduME11uh7_rec0M7xdMS03TcTKa', 'landbrugsareal'), ('tbl7OYOXduME11uh7_rec1dNt2JbAAeZiWt', nan), ('tbl7OYOXduME11uh7_rec1mJ7IBRdSA2sPl', nan), ('tbl7OYOXduME11uh7_rec2IIEoRzoDUSppM', nan), ('tbl7OYOXduME11uh7_rec2LbsMvRl4LYVXL', 'landbrugsområder'), ('tbl7OYOXduME11uh7_rec2fA40bNS7vC60q', nan), ('tbl7OYOXduME11uh7_rec2jtoHqQiOf7Aur', nan), ('tbl7OYOXduME11uh7_rec2lFEQQxikOwcod', nan), ('tbl7OYOXduME11uh7_rec2lcpEzSULVMaA4', nan), ('tbl7OYOXduME11uh7_rec2ryNxd0UC5D8Fz', nan), ('tbl7OYOXduME11uh7_rec2vYuYs7mNq3Ou5', nan), ('tbl7OYOXduME11uh7_rec30vuYDzu2d958J', nan), ('tbl7OYOXduME11uh7_rec32dNf7KLZbR3SB', nan), ('tbl7OYOXduME11uh7_rec35bHM2Mf8X1UP1', nan), ('tbl7OYOXduME11uh7_rec3FW62YnBIwE6Qm', 'græsområder'), ('tbl7OYOXduME11uh7_rec3UOR3qG8VwRtD1', 'landbrugsarealer'), ('tbl7OYOXduME11uh7_rec3ai3qcveLhcynI', nan), ('tbl7OYOXduME11uh7_rec3cBvv2KkfI2aYQ', 'Landbrugsarealer'), ('tbl7OYOXduME11uh7_rec3nW2cqlh9fO6mC', 'Landbrugsjord'),

In [None]:
tables[0]._data

In [164]:
# Construct the diagram
# Assuming tables is a list of Table instances that have already fetched data and generated labels and relationships
label_to_index = {}
node_labels = []
source_indices = []
target_indices = []
values = []
create_sankey_diagram(tables)

In [152]:
fig = go.Figure(go.Sankey(
    arrangement = "snap",
    node = {
        "label": ["A", "B", "C", "D", "E", "F"],
        "x": [0.2, 0.1, 0.5, 0.7, 0.3, 0.5],
        "y": [0.7, 0.5, 0.2, 0.4, 0.2, 0.3],
        'pad':10},  # 10 Pixels
    link = {
        "source": [0, 0, 1, 2, 5, 4, 3, 5],
        "target": [5, 3, 4, 3, 0, 2, 2, 3],
        "value": [1, 2, 1, 1, 1, 1, 1, 2]}))

fig.show()