<a href="https://colab.research.google.com/github/Esbern/Sankey-diagrams/blob/main/genneral%20sankey.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Note before using Plotly in Jupiter lab it is necessary not only to install the Pyton libary but also the Jupiter lab extension
for instance micromamba install -c conda-forge jupyterlab-plotly-extension

In [46]:
import requests
import json
import pandas as pd
import plotly.graph_objects as go

In [47]:
class Table:
    """
    Represents a database table and handles fetching of data from Airtable,
    storing it in a pandas DataFrame. It manages label and relationship
    lists with lazy loading and includes Airtable's primary key for each record.

    Attributes:
        table_id (str): Identifier for the table.
        label_column (str): Label of the table to be used in diagrams.
        api_key (str): API key for accessing Airtable.
        base_id (str): Base ID of the Airtable database.
        foreign_key_table_id (str): Identifier of the table referenced in the foreign_key_column.
        foreign_key_column (str): Column name that acts as a foreign key to another table.
        _data (DataFrame): Internal DataFrame containing fetched data.
        _labels (list): List of tuples containing record IDs and labels, lazily loaded.
        _relationships (list): List of source-target tuples based on foreign keys, lazily loaded.
    """

    def __init__(self, table_id, label_column, api_key, base_id, foreign_key_table_id, foreign_key_column):
        self.table_id = table_id
        self.label_column = label_column
        self.api_key = api_key
        self.base_id = base_id
        self.foreign_key_table_id = foreign_key_table_id
        self.foreign_key_column = foreign_key_column
        self._data = None
        self._labels = None
        self._relationships = None

    @property
    def data(self):
        if self._data is None:
            self.fetch_data()
        return self._data

    def fetch_data(self):
        """Fetches and populates the internal DataFrame with primary key and record fields."""
        url = f"https://api.airtable.com/v0/{self.base_id}/{self.table_id}"
        headers = {"Authorization": f"Bearer {self.api_key}"}
        params = {}
        data = []

        while True:
            response = requests.get(url, headers=headers, params=params)
            if response.status_code != 200:
                raise Exception(f"Failed to fetch data: {response.text}")
            page_data = response.json()
            for record in page_data['records']:
                record_data = record['fields']
                record_data['id'] = record['id']  # Include the primary key
                data.append(record_data)

            if 'offset' in page_data:
                params['offset'] = page_data['offset']
            else:
                break

        self._data = pd.DataFrame(data)

    @property
    def labels(self):
        if self._labels is None:
            self.create_label_and_relationship_lists()
        return self._labels

    @property
    def relationships(self):
        if self._relationships is None:
            self.create_label_and_relationship_lists()
        return self._relationships

    def create_label_and_relationship_lists(self):
        """Generates labels and source-target relationships from data."""
        if self._data is None:
            self.fetch_data()

        self._labels = [(self.table_id+"_"+ row['id'], row[self.label_column]) for index, row in self._data.iterrows()]
        if self.foreign_key_column != "":
            df_relationship = self._data.explode(self.foreign_key_column)
            self._relationships = [(row['id'], row[self.foreign_key_column]) for index, row in df_relationship.iterrows() if self.foreign_key_column in row]
        else:
            self._relationships = []

# Example usage (make sure the field names are correct for your Airtable setup)
# tables = [
#     Table(table_id="tblmO1yIO7iLGjeBx", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblO8e0GuUpzcnCOh", foreign_key_column="Phenomenon"),
#     Table(table_id="tblO8e0GuUpzcnCOh", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblWUnluzfa79Y26z", foreign_key_column="Variable"),
#     Table(table_id="tblWUnluzfa79Y26z", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
# ]



In [48]:
def create_sankey_diagram(tables):
    # Maps to store indices of each label in all tables
    label_to_index = {}
    current_index = 0

    # Lists for Sankey diagram
    node_labels = []
    source_indices = []
    target_indices = []
    values = []

    # First Phase: Index all labels from all tables
    for table in tables:
        for id_with_table, actual_label in table.labels:
            if id_with_table not in label_to_index:
                label_to_index[id_with_table] = current_index
                node_labels.append(actual_label)  # Append actual label for visualization
                current_index += 1

    # Second Phase: Process relationships now that all labels are indexed
    for table in tables:
        for source_id, target_id in table.relationships:
            # Create full unique IDs for source and target using the correct table IDs
            source_full_id = f"{table.table_id}_{source_id}"
            target_full_id = f"{table.foreign_key_table_id}_{target_id}"

            if source_full_id in label_to_index and target_full_id in label_to_index:
                source_index = label_to_index[source_full_id]
                target_index = label_to_index[target_full_id]
                source_indices.append(source_index)
                target_indices.append(target_index)
                values.append(1)  # Value can be adjusted if needed

    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=node_labels,
        ),
        link=dict(
            source=source_indices,
            target=target_indices,
            value=values
        ))])

    fig.update_layout(
        font_size=10,
        autosize=False,
        width= 1000,
        height=1000,
        margin=dict(
            l=10,
            r=10,
            b=10,
            t=20,
            pad=4
        ),
        title_text="Sankey Diagram",
        paper_bgcolor="white"
    )
    fig.show()

# Example usage
# Assuming 'tables' is a list of Table instances that have already fetched data and generated labels and relationships
# create_sankey_diagram(tables)


In [None]:
# Load data from airtable
api_key = 'patwjsizhgQyQkZkT.f9e8b1595df5b527d0d01d3a45af0dfa77eab63707e18398ad62f1f3818a9ce9'
base_id = 'appLztwTKWOhFJ40Z'
tables = [
    Table(table_id="tblmO1yIO7iLGjeBx", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblO8e0GuUpzcnCOh", foreign_key_column="Phenomenon"),
    Table(table_id="tblO8e0GuUpzcnCOh", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblWUnluzfa79Y26z",foreign_key_column="Variable"),
    Table(table_id="tblWUnluzfa79Y26z", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
]

In [None]:
print(tables[0].labels)


[('tblmO1yIO7iLGjeBx_recBAf1oiVUxNnXzI', '4 - Atmosphere (not part of geotope delineation procedures - only serves as descriptive variables)'), ('tblmO1yIO7iLGjeBx_recDCkgAVQBLEZp7g', '2 - Toposphere'), ('tblmO1yIO7iLGjeBx_recSJBcDKHrg2VJ02', '6 - Other (not part of geotope model)'), ('tblmO1yIO7iLGjeBx_recXPa6LR57R8T1EG', '5 - Biosphere (not part of geotope model)'), ('tblmO1yIO7iLGjeBx_reckbSlmzSC4yXFTu', '3 - Lithosphere'), ('tblmO1yIO7iLGjeBx_recwfNo5m4Qe2CKeh', '1 - Hydrosphere')]


In [None]:
# Construct the diagram
# Assuming tables is a list of Table instances that have already fetched data and generated labels and relationships
label_to_index = {}
node_labels = []
source_indices = []
target_indices = []
values = []
create_sankey_diagram(tables)



# Sanky for polyci papers:
## SANKEY 1: POLITICAL TARGET-SETTING

Struktur (tabeller i airtable):

policy source -> Targets -> Target group

Filter: medtag kun linjer i "policy source" der har link til et eller flere linjer i "targets"

Filter: medtag kun linjer i "targets” der har link til et eller flere linjer i "target group"







# Sankey 1: Political target-setting

In [55]:
# Load data from airtable
api_key = 'patwjsizhgQyQkZkT.f9e8b1595df5b527d0d01d3a45af0dfa77eab63707e18398ad62f1f3818a9ce9'
base_id = 'apprKfEKZ2Ju74g9w'
tables = [
    Table(table_id="tblzHR1WHYHA5MlwQ", label_column="Policy source", api_key=api_key, base_id=base_id, foreign_key_table_id="tbl7OYOXduME11uh7", foreign_key_column="Targets (policy targets)"),
    Table(table_id="tbl7OYOXduME11uh7", label_column="Target name", api_key=api_key, base_id=base_id, foreign_key_table_id="tblVarbVYd96JUE6f",foreign_key_column="Target Group"),
    Table(table_id="tblVarbVYd96JUE6f", label_column="Target Group", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
]

In [56]:
print(tables[0].labels)


[('tblzHR1WHYHA5MlwQ_rec097wcwr7wH7Snq', 'The Impact of Disasters on Agriculture and Food Security - Avoiding and reducing losses through investment in resilience'), ('tblzHR1WHYHA5MlwQ_rec0UlvMlDgnViaLV', 'Aftale om grøn omstilling af Dansk Landbrug'), ('tblzHR1WHYHA5MlwQ_rec1JMbQXty7j4wIq', 'Climate Change 2023 - Synthesis Report'), ('tblzHR1WHYHA5MlwQ_rec1ox4RpbP3H2cnG', 'Aftale om et grønt Danmark (grøn trepart)'), ('tblzHR1WHYHA5MlwQ_rec3Gwqr7J0lAy4Vy', 'Global Assessment Report on Biodiversity and Ecosystem Services'), ('tblzHR1WHYHA5MlwQ_rec4HkkhYN2AJfX8y', 'Scenarier for anvendelse af biomasse i fremtiden'), ('tblzHR1WHYHA5MlwQ_rec4PryMAYb5iTINT', 'European Green Deal'), ('tblzHR1WHYHA5MlwQ_rec5iLMtX43i1cOH4', 'Assessment of land degradation and restoration'), ('tblzHR1WHYHA5MlwQ_rec7Enr3Pg9vKqbdc', 'Basisanalyse for vandområdeplaner 2021-2027'), ('tblzHR1WHYHA5MlwQ_rec8R6Fh2nwXjFX2D', 'Genopretning af biodiversitet og økosystemer - Ekspertudtalelse'), ('tblzHR1WHYHA5MlwQ_rec9j

In [53]:
tables[0]._data

Unnamed: 0,Policy source,Source document,Source pdf,Publisher / author,Year,Territorial reference point,id,Finished (step 1),Targets (policy targets)
0,The Impact of Disasters on Agriculture and Foo...,https://www.fao.org/publications/home/fao-flag...,"[{'id': 'attsWeD5BuwuWkuad', 'url': 'https://v...",Food and Agriculture Organization of the Unite...,2023,[Global],rec097wcwr7wH7Snq,,
1,Aftale om grøn omstilling af Dansk Landbrug,https://fm.dk/media/25302/aftale-om-groen-omst...,"[{'id': 'attupvM6YiCyZfrpg', 'url': 'https://v...",Regeringen,2021,[National],rec0UlvMlDgnViaLV,Finished,
2,Climate Change 2023 - Synthesis Report,https://www.ipcc.ch/report/ar6/syr/downloads/r...,"[{'id': 'attDqMladH2i4q4HU', 'url': 'https://v...",Intergovernmental Panel on Climate Change,2023,[Global],rec1JMbQXty7j4wIq,,
3,Aftale om et grønt Danmark (grøn trepart),https://mim.dk/kampagner/groen-trepart,"[{'id': 'atto6IyXXispvRRbs', 'url': 'https://v...",Aftaleparterne i den grønne trepart,2024,[National],rec1ox4RpbP3H2cnG,,"[recXapBmC0gLZzFW3, recFAwdYP1HIcS4XT, recm15m..."
4,Global Assessment Report on Biodiversity and E...,https://www.ipbes.net/,"[{'id': 'attqMWASFv8O0jgNH', 'url': 'https://v...",Intergovernmental Science-Policy Platform on B...,2019,[Global],rec3Gwqr7J0lAy4Vy,,
...,...,...,...,...,...,...,...,...,...
56,The State of Agricultural Commodity Markets - ...,https://www.fao.org/publications/home/fao-flag...,"[{'id': 'att2z8eV2Cw07F5qx', 'url': 'https://v...",Food and Agriculture Organization of the Unite...,2022,[Global],recsj0AqzbsAYXTAh,,
57,i Danmark,https://www.dn.dk/politikker/,"[{'id': 'attnVxhoxzOX5SgjA', 'url': 'https://v...",Danmarks Naturfredningsforening,2009,[National],rectK5S4rYOVokaRj,Finished,
58,Danmarks fremtidige arealanvendelse,https://klimaraadet.dk/da/analyse/danmarks-fre...,"[{'id': 'attYqqkFwLT2J9xAw', 'url': 'https://v...",Klimarådet,2024,[National],recuKzO4t5Hv40LSQ,,"[recjzem54zxhuBoJA, rechLGMjF2UVMKoRz, reclJYE..."
59,Potentiale for at reservere 30% af arealet til...,https://dce2.au.dk/pub/SR507.pdf,,,,[National],recze7CnD69Y3wnHA,,


In [57]:
# Construct the diagram
# Assuming tables is a list of Table instances that have already fetched data and generated labels and relationships
label_to_index = {}
node_labels = []
source_indices = []
target_indices = []
values = []
create_sankey_diagram(tables)

Minimum example

## SANKEY 2: LAND USE OPTIONS

Struktur (tabeller i airtable):

Target group -> Targets -> Land uses -> land conditions

Filter: medtag kun linjer i "targets” der har link til et eller flere linjer i "target group"



In [58]:
# Load data from airtable
api_key = 'patwjsizhgQyQkZkT.f9e8b1595df5b527d0d01d3a45af0dfa77eab63707e18398ad62f1f3818a9ce9'
base_id = 'apprKfEKZ2Ju74g9w'
tables = [
    Table(table_id="tblVarbVYd96JUE6f", label_column="Target Group", api_key=api_key, base_id=base_id, foreign_key_table_id="tbl7OYOXduME11uh7", foreign_key_column="Targets"),
    Table(table_id="tbl7OYOXduME11uh7", label_column="Targets", api_key=api_key, base_id=base_id, foreign_key_table_id="tblTRyuT48bBN24QG",foreign_key_column="Name"),
    Table(table_id="tblTRyuT48bBN24QG", label_column="Name", api_key=api_key, base_id=base_id, foreign_key_table_id="tbl7OYOXduME11uh7",foreign_key_column="Target name"),
    Table(table_id="tbl7OYOXduME11uh7", label_column="Target name", api_key=api_key, base_id=base_id, foreign_key_table_id="", foreign_key_column=""),
]

In [59]:
print(tables[0].labels)

[('tblVarbVYd96JUE6f_rec2F1hkw7WEobaaO', 'Klimasikring'), ('tblVarbVYd96JUE6f_rec6quEtcMxawpJkL', 'Friluftsinteresser'), ('tblVarbVYd96JUE6f_recByhTQWStmPsIcm', nan), ('tblVarbVYd96JUE6f_recCpKBy5x0lPgtvg', 'Nye former for organisering '), ('tblVarbVYd96JUE6f_recHfjiv83M1OVdvd', 'Economy'), ('tblVarbVYd96JUE6f_recLHHNJWEFiwEGZ6', 'Forestry and Biomass'), ('tblVarbVYd96JUE6f_recM72plqmOMokKav', 'Sårbar natur'), ('tblVarbVYd96JUE6f_recMyT5YOWmU5Bs5j', 'Kulturarv'), ('tblVarbVYd96JUE6f_recO2namYwsWydIP5', 'Særligt hensyn til truede og naturligt indvandrende arter'), ('tblVarbVYd96JUE6f_recQLsdhedkibpIKg', 'Biodiversity'), ('tblVarbVYd96JUE6f_recQfAWZsDtcEXTXD', 'Sundhed'), ('tblVarbVYd96JUE6f_recRZNnC7bJjfYu2u', 'Naturlig vanddynamik'), ('tblVarbVYd96JUE6f_recSxV0QQUwOrBxuS', 'Bosætning og infrastruktur'), ('tblVarbVYd96JUE6f_recUZ1l7Vbap8PbMV', 'Økologisk integritet'), ('tblVarbVYd96JUE6f_recUbIUnjNpnmRJHc', 'Pollution and ecotoxins'), ('tblVarbVYd96JUE6f_recWlPVAwBqzqg8U0', 'Råstofudgra

In [62]:
tables[1]._data

Unnamed: 0,Target name,Policy Source,Quotes (text excerpts with references),Description of target,Target time frame,Functions,Target Group,terest,Created,id,Land uses,Land conditions
0,God økologisk tilstand for vandmiljøet gennem ...,[recuKzO4t5Hv40LSQ],"""Vandmiljøet skal bringes i god økologisk tils...",God økologisk tilstand for vandmiljøet gennem ...,[2027],[rec2kJ26oS2YPcWzd],[recqPd9dgbm42CYl6],[National],2024-10-31T18:58:06.000Z,rec0HbMnXg0CRgpQs,,
1,Beskyttelse af drikkevandet gennem ændring i a...,[recuKzO4t5Hv40LSQ],"""Beskyttelse af drikkevandet kan komme til at ...",Beskyttelse af drikkevandet gennem ændring i a...,,,[recqPd9dgbm42CYl6],[National],2024-11-07T08:43:51.000Z,rec0M7xdMS03TcTKa,"[recDbsg83O0YTgaYX, recL519c7blBF7m9Z, recyVSA...",
2,Færre arbejdspladser i landbruget pga. den grø...,[recuKzO4t5Hv40LSQ],"""Jo større areal der skal omlægges, jo flere a...",Færre arbejdspladser i landbruget og i branche...,,,"[recHfjiv83M1OVdvd, recildlBRIBS2t0qy, recxpWj...",[National],2024-11-07T16:38:27.000Z,rec1dNt2JbAAeZiWt,,
3,"Opnåelse af biodiversitets-, vandmiljø- og kli...",[recuKzO4t5Hv40LSQ],"""Figur 4.3 viser, hvor meget landbrugsareal de...","Opnåelse af biodiversitets-, vandmiljø- og kli...",,,"[recqPd9dgbm42CYl6, recQLsdhedkibpIKg, recxpWj...",[National],2024-11-06T22:01:12.000Z,rec1mJ7IBRdSA2sPl,"[recNka9BU7xKaJ10u, rec8Gzd3XtM6gZ7sg, recDbsg...",
4,Sikring af akvatiske økosystemer mod udvasknin...,[recj9wzYknE1rpSmV],"""Påvirkning som følge af pesticidanvendelse\nI...","Udvaskning af pesticider fra jordbrug, skovbru...",,,"[recqPd9dgbm42CYl6, recUbIUnjNpnmRJHc]",[National],2024-11-10T11:34:51.000Z,rec2IIEoRzoDUSppM,"[reczD1hRlVE8vrPoR, recPPDE3AjKBfT40f, rec1PD6...",
...,...,...,...,...,...,...,...,...,...,...,...,...
206,Opfyldelse af vandrammedirektivet samt drivhus...,[recuKzO4t5Hv40LSQ],"""Beskyttelse af biodiversitet kan ikke vente t...",Opfyldelse af vandrammedirektivet samt drivhus...,,,"[recqPd9dgbm42CYl6, recxpWjra7wdZ6pu1]",[National],2024-11-07T08:41:19.000Z,recy0FDlwudmQJCSd,"[reckoJgk6CwYi98FD, reclm66pMVI6K3w9v, recvb22...",
207,Reduceret behov for landbrugsareal og produkti...,[recuKzO4t5Hv40LSQ],"""Bedre inddragelse af havets ressourcer\nEn he...",Reduceret behov for landbrugsareal gennem indd...,,,[recildlBRIBS2t0qy],[National],2024-11-07T16:04:51.000Z,recy0G5MTChWZgYI3,[recoRhQ61moSohrBJ],
208,Dækning af mellem 25 % til 30 % af landets are...,[recEvCBU1FrBRLoYq],"""Sammen med Natura 2000 og §3-arealer kan natu...",Dækning af 25 % af landets areal med natur gen...,,,[recxu7EKArvVZhINM],[National],2024-11-08T10:28:15.000Z,recyiY32BjXW27pXs,"[rec8Gzd3XtM6gZ7sg, reclm66pMVI6K3w9v]",
209,"Renere drikkevand gennem skovrejsning , især i...",[rec1ox4RpbP3H2cnG],"\n""Der skal være meget mere skov i Danmark. Sk...","Renere drikkevand gennem skovrejsning , især i...",[2045],[reccAKLo8uwCt0j5b],[recqPd9dgbm42CYl6],[National],2024-10-14T16:17:28.000Z,reczAmstGNXvWYnsf,[recNka9BU7xKaJ10u],Områder med kvælstofindsatsbehov


In [60]:
# Construct the diagram
# Assuming tables is a list of Table instances that have already fetched data and generated labels and relationships
label_to_index = {}
node_labels = []
source_indices = []
target_indices = []
values = []
create_sankey_diagram(tables)

KeyError: 'Targets'

In [None]:
fig = go.Figure(go.Sankey(
    arrangement = "snap",
    node = {
        "label": ["A", "B", "C", "D", "E", "F"],
        "x": [0.2, 0.1, 0.5, 0.7, 0.3, 0.5],
        "y": [0.7, 0.5, 0.2, 0.4, 0.2, 0.3],
        'pad':10},  # 10 Pixels
    link = {
        "source": [0, 0, 1, 2, 5, 4, 3, 5],
        "target": [5, 3, 4, 3, 0, 2, 2, 3],
        "value": [1, 2, 1, 1, 1, 1, 1, 2]}))

fig.show()