# Funds and SDG/SDT Sankeys



## Libs and setup


In [None]:
!pip install --upgrade plotly
!pip install kaleido

In [None]:
import requests
import plotly.graph_objects as go
from collections import defaultdict
import textwrap



SDG_GOAL_MAP = {
    '1': {
        'name': '1. No poverty',
        'node_colour': '#e5243b',
        'link_colour': 'rgba(229, 36, 59, 0.5)',
    },
    '2': {
        'name': '2. Zero hunger',
        'node_colour': '#DDA63A',
        'link_colour': 'rgba(221, 166, 58, 0.5)',
    },
    '3': {
        'name': '3. Good health and wellbeing',
        'node_colour': '#4C9F38',
        'link_colour': 'rgba(76, 159, 56, 0.5)',
    },
    '4': {
        'name': '4. Quality education',
        'node_colour': '#C5192D',
        'link_colour': 'rgba(197, 25, 45, 0.5)',
    },
    '5': {
        'name': '5. Gender equality',
        'node_colour': '#FF3A21',
        'link_colour': 'rgba(255, 58, 33, 0.5)',
    },
    '6': {
        'name': '6. Clean water and sanitation',
        'node_colour': '#26BDE2',
        'link_colour': 'rgba(38, 189, 226, 0.5)',
    },
    '7': {
        'name': '7. Affordable and clean energy',
        'node_colour': '#FCC30B',
        'link_colour': 'rgba(252, 195, 11, 0.5)',
    },
    '8': {
        'name': '8. Work and economic growth',
        'node_colour': '#A21942',
        'link_colour': 'rgba(162, 25, 66, 0.5)',
    },
    '9': {
        'name': '9. Industry, innovation and infrastructure',
        'node_colour': '#FD6925',
        'link_colour': 'rgba(253, 105, 37, 0.5)',
    },
    '10': {
        'name': '10. Reduced inequalities',
        'node_colour': '#DD1367',
        'link_colour': 'rgba(221, 19, 103, 0.5)',
    },
    '11': {
        'name': '11. Sustainable cities and communities',
        'node_colour': '#FD9D24',
        'link_colour': 'rgba(253, 157, 36, 0.5)',
    },
    '12': {
        'name': '12. Responsible consumption and production',
        'node_colour': '#BF8B2E',
        'link_colour': 'rgba(191, 139, 46, 0.5)',
    },
    '13': {
        'name': '13. Climate action',
        'node_colour': '#3F7E44',
        'link_colour': 'rgba(63, 126, 68, 0.5)',
    },
    '14': {
        'name': '14. Life below water',
        'node_colour': '#0A97D9',
        'link_colour': 'rgba(10, 151, 217, 0.5)',
    },
    '15': {
        'name': '15. Life on land',
        'node_colour': '#56C02B',
        'link_colour': 'rgba(86, 192, 43, 0.5)',
    },
    '16': {
        'name': '16. Peace, justice and strong institutions',
        'node_colour': '#00689D',
        'link_colour': 'rgba(0, 104, 157, 0.5)',
    },
    '17': {
        'name': '17. Partnerships for the goals',
        'node_colour': '#19486A',
        'link_colour': 'rgba(25, 72, 106, 0.5)',
    },
}


INDIGO_DATABASE_API = 'https://golab-indigo-data-store.herokuapp.com/app/api1'


def api_get_item(endpoint, public_id=None):
    """
    Get individual item details from the API

    E.g. 
    item = api_get_item('project', 'INDIGO-POJ-0158')
    """
    try:
        if public_id:
            response = requests.get(f'{INDIGO_DATABASE_API}/{endpoint}/{public_id}')
        else:
            response = requests.get(f'{INDIGO_DATABASE_API}/{endpoint}')
        item = response.json()
        return item
    except Exception as e:
        print(f'\nFailed to retrieve {endpoint} "{public_id}".\nError: {e}')
        return False


def _get_sdg_values_for_fund_data(fund_data):
    primary_value = fund_data.get('purpose_and_classifications',{}).get('primary_sdg_goal',{}).get('value','')
    secondary_values = fund_data.get('purpose_and_classifications',{}).get('secondary_sdg_goals',{}).get('value','')
    sdg_values = set()
    if primary_value and isinstance(primary_value, int):
        sdg_values.add(str(primary_value))
    if primary_value and isinstance(primary_value, str) and primary_value.strip():
        sdg_values.add(primary_value.strip())
    if secondary_values:
        [sdg_values.add(i.strip()) for i in str(secondary_values).split(',') if i.strip()]
    return list(sdg_values)
        
def _get_sdg_values_for_project_data(project_id):
    primary_value = projects_data[project_id]['primary_sdg']
    secondary_values = projects_data[project_id]['secondary_sdgs']
    sdg_values = set()
    if primary_value and isinstance(primary_value, int):
        sdg_values.add(str(primary_value))
    if primary_value and isinstance(primary_value, str) and primary_value.strip():
        sdg_values.add(primary_value.strip())
    if secondary_values:
        [sdg_values.add(i.strip()) for i in str(secondary_values).split(',') if i.strip()]
    return [i.split('-')[0].strip() for i in list(sdg_values)]
        
def sankey_viz_for_fund(fund_id, sankey_height=400, filename='out.png', textwrap_width=50):
    # Pass 1: Make sure we have all the nodes listed
    funds_id_to_name = {}
    funds_id_to_name[fund_id] = funds_data[fund_id].get('name',{}).get('value','')
    projects_id_to_name = {}
    sdg_nodes = set()
    for project_id in fund_has_projects[fund_id]:
        for sdg_value in _get_sdg_values_for_project_data(project_id):
            projects_id_to_name[project_id] = projects_data[project_id]['name']
            sdg_nodes.add(sdg_value)
    if not sdg_nodes:
        # No project SDG data means the output is the same as other graphs we have
        return
    for sdg_value in _get_sdg_values_for_fund_data(funds_data[fund_id]):
        sdg_nodes.add(sdg_value)
    
    # Pass 2: Get links
    # Links from Fund to SDG
    links_funds_to_sdgs = defaultdict(dict)
    for sdg_value in _get_sdg_values_for_fund_data(funds_data[fund_id]):
        links_funds_to_sdgs[fund_id][sdg_value] = 1
    # Links from Project to SDG
    links_projects_to_sdgs = defaultdict(dict)
    for project_id in fund_has_projects[fund_id]:
        for sdg_value in _get_sdg_values_for_project_data(project_id):
            links_projects_to_sdgs[project_id][sdg_value] = 1

    # Turn into data suitable for sankey
    node_labels = []
    node_colours = []
    fund_nodes_idx = {}
    project_nodes_idx = {}
    sdg_nodes_idx = {}
    for project_id, project_name in projects_id_to_name.items():
        node_labels.append(textwrap.shorten(project_id + ": "+ project_name, textwrap_width))
        node_colours.append("blue")
        project_nodes_idx[project_id] = len(node_labels) - 1
    for fund_id, fund_name in funds_id_to_name.items():
        node_labels.append(textwrap.shorten(fund_id + ": "+ fund_name, textwrap_width))
        node_colours.append("black")
        fund_nodes_idx[fund_id] = len(node_labels) - 1
    for sdg_node in sdg_nodes:
        node_labels.append(textwrap.shorten(SDG_GOAL_MAP[sdg_node]['name'], textwrap_width))
        node_colours.append(SDG_GOAL_MAP[sdg_node]['node_colour'])
        sdg_nodes_idx[sdg_node] = len(node_labels) - 1

    sankey_node_data = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = node_labels,
      color = node_colours,
    )

    links_sources = []
    links_targets = []
    links_value = []
    links_color = []

    for fund_id in links_funds_to_sdgs.keys():
        for sdg in links_funds_to_sdgs[fund_id].keys():
            links_sources.append(fund_nodes_idx[fund_id])
            links_targets.append(sdg_nodes_idx[sdg])
            links_value.append(links_funds_to_sdgs[fund_id][sdg])
            links_color.append(SDG_GOAL_MAP[sdg]['link_colour'])
    for project_id in links_projects_to_sdgs.keys():
        for sdg in links_projects_to_sdgs[project_id].keys():
            links_sources.append(sdg_nodes_idx[sdg])
            links_targets.append(project_nodes_idx[project_id])
            links_value.append(links_projects_to_sdgs[project_id][sdg])
            links_color.append(SDG_GOAL_MAP[sdg]['link_colour'])


    sankey_link_data = dict(
      source = links_sources,
      target = links_targets,
      value = links_value,
      color = links_color
    )

    # make Sankey
    fig = go.Figure(
        data=go.Sankey(
            node=sankey_node_data,
            link=sankey_link_data,
            arrangement='perpendicular',
        ),
    )

    fig.update_layout(
        height=sankey_height,
    )

    fig.show()
    fig.write_image(filename)




## Get Fund data
Call the INDIGO API 'fund' endpoint and retrieve the data used for the plot.

By default this will get data for all the funds, but you can pass a list of fund ID's and get only some. See the comments in the code.

In [None]:
# Call the API and pull down the data for each fund
# and store in a fund for use later.
# 
# You can set public_ids to some funds only
# eg:
# public_ids = ['INDIGO-FUND-0001', ]
# or pass an empty list, in which case it will get data from all funds
# eg:
# public_ids = []

public_ids = []
endpoint = 'fund'

if not public_ids:
    for fund_data in api_get_item(endpoint).get('funds'):
        if fund_data.get('public'):
            public_ids.append(fund_data.get('id'))

funds_data = {}

for fund_id in public_ids:
    print("Getting Fund " + fund_id)
    funds_data[fund_id] = api_get_item(endpoint, fund_id).get('fund',{}).get('data',{})

## Get Project Data

In [None]:
!wget https://golab-indigo-data-store.herokuapp.com/app/all_public_data_file_per_data_type_csv.zip
!unzip -o all_public_data_file_per_data_type_csv.zip

In [None]:
import csv
from collections import defaultdict


# Get a list of which funds have which projects
fund_has_projects = defaultdict(list)
with open('projects_outcome_funds.csv', newline='') as csvfile:
     outcomefundreader = csv.DictReader(csvfile)
     for row in outcomefundreader:
         fund_has_projects[row['Id']].append(row['Project ID'])

# Get a list of Project Data
projects_data =  {}
with open('projects.csv', newline='') as csvfile:
     outcomefundreader = csv.DictReader(csvfile)
     for row in outcomefundreader:
         projects_data[row['ID']] = {
             'name': row['Impact Bond Name - (Value)'],
             'primary_sdg': row['Purpose and classifications - Primary SDG goal - (Value)'],
             'secondary_sdgs': row['Purpose and classifications - Secondary SDG goals - (Value)'],
         }




## Basic Sankey for each fund

In [None]:
for fund_id in funds_data.keys():
    print("Fund " + fund_id)
    sankey_viz_for_fund(fund_id, sankey_height=400, filename='sankey-'+fund_id+'.png', textwrap_width=45)
