# Make Sankey for Project Outcomes SDG 

Save the team spreadsheet as a CSV and upload it as "data.csv"

In [None]:
data = []
import csv
with open('data.csv') as csvfile:
     reader = csv.reader(csvfile)
     # Skip header row
     next(reader)
     # get data
     for row in reader:
        if row[1].strip() and row[11].strip():
            item = {
                'project_id': row[1].strip(),
                'outcome_id': row[11].strip(),
                'primary_goal': row[22].strip(),
                'secondary_goals': row[23].strip().split(','),
                'primary_target': row[24].strip(),
                'secondary_targets': row[25].strip().split(','),
            }
            data.append(item)


In [None]:
#A class to process data & some constants

DEFAULT_COLOUR = '#AAAAAA'

SDG_GOAL_MAP = {
    1: {
        'name': '1. No poverty',
        'node_colour': '#e5243b',
        'link_colour': 'rgba(229, 36, 59, 0.5)',
    },
    2: {
        'name': '2. Zero hunger',
        'node_colour': '#DDA63A',
        'link_colour': 'rgba(221, 166, 58, 0.5)',
    },
    3: {
        'name': '3. Good health and wellbeing',
        'node_colour': '#4C9F38',
        'link_colour': 'rgba(76, 159, 56, 0.5)',
    },
    4: {
        'name': '4. Quality education',
        'node_colour': '#C5192D',
        'link_colour': 'rgba(197, 25, 45, 0.5)',
    },
    5: {
        'name': '5. Gender equality',
        'node_colour': '#FF3A21',
        'link_colour': 'rgba(255, 58, 33, 0.5)',
    },
    6: {
        'name': '6. Clean water and sanitation',
        'node_colour': '#26BDE2',
        'link_colour': 'rgba(38, 189, 226, 0.5)',
    },
    7: {
        'name': '7. Affordable and clean energy',
        'node_colour': '#FCC30B',
        'link_colour': 'rgba(252, 195, 11, 0.5)',
    },
    8: {
        'name': '8. Work and economic growth',
        'node_colour': '#A21942',
        'link_colour': 'rgba(162, 25, 66, 0.5)',
    },
    9: {
        'name': '9. Industry, innovation and infrastructure',
        'node_colour': '#FD6925',
        'link_colour': 'rgba(253, 105, 37, 0.5)',
    },
    10: {
        'name': '10. Reduced inequalities',
        'node_colour': '#DD1367',
        'link_colour': 'rgba(221, 19, 103, 0.5)',
    },
    11: {
        'name': '11. Sustainable cities and communities',
        'node_colour': '#FD9D24',
        'link_colour': 'rgba(253, 157, 36, 0.5)',
    },
    12: {
        'name': '12. Responsible consumption and production',
        'node_colour': '#BF8B2E',
        'link_colour': 'rgba(191, 139, 46, 0.5)',
    },
    13: {
        'name': '13. Climate action',
        'node_colour': '#3F7E44',
        'link_colour': 'rgba(63, 126, 68, 0.5)',
    },
    14: {
        'name': '14. Life below water',
        'node_colour': '#0A97D9',
        'link_colour': 'rgba(10, 151, 217, 0.5)',
    },
    15: {
        'name': '15. Life on land',
        'node_colour': '#56C02B',
        'link_colour': 'rgba(86, 192, 43, 0.5)',
    },
    16: {
        'name': '16. Peace, justice and strong institutions',
        'node_colour': '#00689D',
        'link_colour': 'rgba(0, 104, 157, 0.5)',
    },
    17: {
        'name': '17. Partnerships for the goals',
        'color': '#19486A',
        'link_colour': 'rgba(25, 72, 106, 0.5)',
    },
}


class SanKeyData:
    def __init__(self):
        self.nodes_target = []
        self.nodes_goal = []
        self.nodes_project = []
        self.links_target_to_project = {}
        self.links_goal_to_target = {}

    def add_target(self, label):
        if label not in [n['label'] for n in self.nodes_target]:
            try:
                goal = int(float(label))
            except ValueError:
                goal = -1
            if goal in SDG_GOAL_MAP.keys():
                self.nodes_target.append({
                    'label': label,
                    'color': SDG_GOAL_MAP[goal]['node_colour']
                })
            else:
                self.nodes_target.append({
                    'label': label,
                    'color': 'blue'
                })

    def add_goal(self, label):
        if label not in [n['label'] for n in self.nodes_goal]:
            if int(label) in SDG_GOAL_MAP.keys():
                self.nodes_goal.append({
                    'label': label,
                    'color': SDG_GOAL_MAP[int(label)]['node_colour']
                })
            else:
                self.nodes_goal.append({
                    'label': label,
                    'color': DEFAULT_COLOUR
                })

    def add_project(self, id):
        if id not in [n['label'] for n in self.nodes_project]:
            self.nodes_project.append({
                'label': id,
                'color': 'blue'
            })


    def add_link_target_to_project(self, node1, node2):
        if node1 not in self.links_target_to_project:
            self.links_target_to_project[node1] = {}

        if node2 not in self.links_target_to_project[node1]:
            self.links_target_to_project[node1][node2] = 0

        self.links_target_to_project[node1][node2] += 1

    def add_link_goal_to_target(self, node1, node2):
        if node1 not in self.links_goal_to_target:
            self.links_goal_to_target[node1] = {}

        if node2 not in self.links_goal_to_target[node1]:
            self.links_goal_to_target[node1][node2] = 0

        self.links_goal_to_target[node1][node2] += 1

    def get_plotly_data(self):
        nodes = {
            'label': [],
            'color': [],
            'pad': 20,
            'thickness': 15,
            'line': {
                'color': 'black',
                'width': 1,
            },
        }
        for n in self.nodes_project:
            nodes['label'].append(n['label'])
            nodes['color'].append(n['color'])
        for n in self.nodes_goal:
            nodes['label'].append(n['label'])
            nodes['color'].append(n['color'])
        for n in self.nodes_target:
            nodes['label'].append(n['label'])
            nodes['color'].append(n['color'])
        links = {
            'source': [],
            'target': [],
            'value': [],
            'color': [],
        }
        for source, source_data in self.links_target_to_project.items():
            try:
                goal = int(float(source))
            except ValueError:
                goal = -1
            color = SDG_GOAL_MAP[goal]['link_colour'] if goal in SDG_GOAL_MAP else 'black'
            for target, weight in source_data.items():
                links['source'].append(nodes['label'].index(source))
                links['target'].append(nodes['label'].index(target))
                links['value'].append(weight)
                links['color'].append(color)        
        for source, source_data in self.links_goal_to_target.items():
            color = SDG_GOAL_MAP[int(source)]['link_colour'] if int(source) in SDG_GOAL_MAP else 'black'
            for target, weight in source_data.items():
                links['source'].append(nodes['label'].index(source))
                links['target'].append(nodes['label'].index(target))
                links['value'].append(weight)
                links['color'].append(color)
        return nodes, links



        

In [None]:
# Process data
sankey_data = SanKeyData()
for d in data:
    goals = [ d['primary_goal'] ] + d['secondary_goals']
    goals = [ g for g in goals if g ]
    targets = [ d['primary_target'] ] + d['secondary_targets']
    targets = [ t for t in targets if t ]
    if d['project_id'] and goals and targets:
        #print(d)
        #print(goals)
        #print(targets)
        sankey_data.add_project(d['project_id'])
        for goal in goals: 
            sankey_data.add_goal(goal)
        for target in targets:
            sankey_data.add_target(target)
            sankey_data.add_link_target_to_project(target, d['project_id'])
            for goal in goals: 
                try:
                    goal_for_target = int(float(target))
                    if goal_for_target == int(goal):
                        sankey_data.add_link_goal_to_target(goal, target)
                except ValueError:
                    pass

sankey_nodes, sankey_links = sankey_data.get_plotly_data()

#print(sankey_nodes)
#print(sankey_links)

In [None]:
# Draw viz!
import plotly.graph_objects
plot_data = [
    plotly.graph_objects.Sankey(
        node=sankey_nodes,
        link=sankey_links,
        arrangement='perpendicular',
    ),
]

fig = plotly.graph_objects.Figure(
    data=plot_data,
)

fig.update_layout(
    height=800,
)

fig.update_layout(
    title="title",
    title_x=0.5,
)
fig.show()