# Sankey diagrams - Project to primary and secondary SDG mapping
This has been tested with "Pipeline data Social Finance Israel.csv".

In [None]:
import io

import pandas as pd
import plotly.graph_objects as go

from google.colab import files

# Upload a file

In [None]:
uploaded = files.upload()

filename = list(uploaded.keys())[0]

# Create a Pandas dataframe

Convert the csv file in to a Pandas dataframe and have a quick look at the data.

In [None]:
# Use the file name from the last upload, or modify for previously uploaded files.
df = pd.read_csv(filename)

print(filename)
df

# Setup helper methods

In [None]:
# Choose the column indexes to plot
PROJECT_COL_INDEX = 17
PRIMARY_SDG_COL_INDEX = 35
SECONDARY_SDG_COL_INDEX = 36


# Setup some colours for the SDGs
DEFAULT_COLOUR = '#AAAAAA'


SDG_GOAL_MAP = {
    'Goal 1: no poverty': {
        'name': '1. No poverty',
        'node_colour': '#e5243b',
        'link_colour': 'rgba(229, 36, 59, 0.5)',
    },
    'Goal 2: zero hunger': {
        'name': '2. Zero hunger',
        'node_colour': '#DDA63A',
        'link_colour': 'rgba(221, 166, 58, 0.5)',
    },
    'Goal 3: good health and well-being': {
        'name': '3. Good health and wellbeing',
        'node_colour': '#4C9F38',
        'link_colour': 'rgba(76, 159, 56, 0.5)',
    },
    'Goal 4: quality education': {
        'name': '4. Quality education',
        'node_colour': '#C5192D',
        'link_colour': 'rgba(197, 25, 45, 0.5)',
    },
    'Goal 5: gender equality': {
        'name': '5. Gender equality',
        'node_colour': '#FF3A21',
        'link_colour': 'rgba(255, 58, 33, 0.5)',
    },
    'Goal 6': {
        'name': '6. Clean water and sanitation',
        'node_colour': '#26BDE2',
        'link_colour': 'rgba(38, 189, 226, 0.5)',
    },
    'Goal 7': {
        'name': '7. Affordable and clean energy',
        'node_colour': '#FCC30B',
        'link_colour': 'rgba(252, 195, 11, 0.5)',
    },
    'Goal 8: decent work and economic growth': {
        'name': '8. Work and economic growth',
        'node_colour': '#A21942',
        'link_colour': 'rgba(162, 25, 66, 0.5)',
    },
    'Goal 9': {
        'name': '9. Industry, innovation and infrastructure',
        'node_colour': '#FD6925',
        'link_colour': 'rgba(253, 105, 37, 0.5)',
    },
    'Goal 10: reduced inequality': {
        'name': '10. Reduced inequalities',
        'node_colour': '#DD1367',
        'link_colour': 'rgba(221, 19, 103, 0.5)',
    },
    'Goal 11': {
        'name': '11. Sustainable cities and communities',
        'node_colour': '#FD9D24',
        'link_colour': 'rgba(253, 157, 36, 0.5)',
    },
    'Goal 12': {
        'name': '12. Responsible consumption and production',
        'node_colour': '#BF8B2E',
        'link_colour': 'rgba(191, 139, 46, 0.5)',
    },
    'Goal 13': {
        'name': '13. Climate action',
        'node_colour': '#3F7E44',
        'link_colour': 'rgba(63, 126, 68, 0.5)',
    },
    'Goal 14': {
        'name': '14. Life below water',
        'node_colour': '#0A97D9',
        'link_colour': 'rgba(10, 151, 217, 0.5)',
    },
    'Goal 15': {
        'name': '15. Life on land',
        'node_colour': '#56C02B',
        'link_colour': 'rgba(86, 192, 43, 0.5)',
    },
    'Goal 16': {
        'name': '16. Peace, justice and strong institutions',
        'node_colour': '#00689D',
        'link_colour': 'rgba(0, 104, 157, 0.5)',
    },
    'Goal 17': {
        'name': '17. Partnerships for the goals',
        'color': '#19486A',
        'link_colour': 'rgba(25, 72, 106, 0.5)',
    },
}



def add_node(nodes, source, target, value):
    """
    Add a node with source and target
    """
    if source and target:
        key = f'{source}_{target}'

        if key in nodes:
            nodes[key]['value'] += value
        else:
            nodes[key] = {
                'source': source,
                'target': target,
                'value': value,
            }


def make_node_data(labels):
    """
    Make the sankey diagram node data
    """
    names = []
    node_colours = []

    for label in labels:

        # Set the node names and colours
        names.append(SDG_GOAL_MAP.get(label, {}).get('name') or label)
        
        node_color = SDG_GOAL_MAP.get(label, {}).get('node_colour') or DEFAULT_COLOUR
        node_colours.append(node_color)

    return {
        'label': names,
        'color': node_colours,
    }


def make_link_data(nodes, labels):
    """
    Make the sankey diagram link data
    """
    source = []
    target = []
    value = []
    link_colours = []

    for node in nodes.values():
        node_source = node['source']
        node_target = node['target']
        node_value = node['value']

        source.append(labels.index(node_source))
        target.append(labels.index(node_target))
        value.append(node_value)

        link_colour = None

        # Try to find the colour for the link in the SDG_GOAL_MAP
        for key in [node_source, node_target]:
            link_colour = SDG_GOAL_MAP.get(key, {}).get('link_colour')

            if link_colour:
                break
        
        link_colours.append(link_colour or DEFAULT_COLOUR)

    return {
        'source': source,
        'target': target,
        'value': value,
        'color': link_colours,
    }

# Plot method


In [None]:
def build_plot(source_index, target_index, title):
    nodes = {}
    source_labels = []
    target_labels = []

    # Generate plot data
    for i, row in df.iterrows():

        # Skip the first header row
        if i == 0:
            continue

        # Get the project and primary SDG
        project = row[source_index]
        sdg = row[target_index]
        
        # Check we have some data and it is the right type
        if project and isinstance(sdg, str):
            
            source = project.strip()
            target = sdg.strip()

            source_labels.append(source)
            target_labels.append(target)

            add_node(nodes, source, target, 1)
    
    # Build the plot
    labels = source_labels + target_labels

    # Make the node data
    node_data = make_node_data(labels)

    node_data.update({
        'pad': 20,
        'thickness': 15,
        'line': {
            'color': 'white',
            'width': 1,
        },
    })

    # Make the link data dictionary
    link_data = make_link_data(nodes, labels)

    plot_data = [
        go.Sankey(
            node=node_data,
            link=link_data,
            arrangement='perpendicular',
        ),
    ]

    fig = go.Figure(
        data=plot_data,
    )

    fig.update_layout(
        height=800,
    )

    fig.update_layout(
        title=title,
        title_x=0.5,
    )
    fig.show()

# Build plot

In [None]:
build_plot(PROJECT_COL_INDEX, PRIMARY_SDG_COL_INDEX, 'Projects to Primary SDGs')
build_plot(PROJECT_COL_INDEX, SECONDARY_SDG_COL_INDEX, 'Projects to Secondary SDGs')