# Make Sankey for Project Outcomes SDG 

Save the team spreadsheet as a CSV and upload it as "data.csv"

In [None]:
data = []
import csv
with open('data.csv') as csvfile:
     reader = csv.reader(csvfile)
     # Skip header row
     next(reader)
     # get data
     for row in reader:
        if row[1].strip() and row[11].strip():
            item = {
                'project_id': row[1].strip(),
                'outcome_id': row[11].strip(),
                'primary_goal': row[22].strip(),
                'secondary_goals': row[23].strip().split(','),
                'primary_target': row[24].strip(),
                'secondary_targets': row[25].strip().split(','),
            }
            data.append(item)


In [None]:
#A class to process data
class SanKeyData:
    def __init__(self):
        self.nodes_target = []
        self.nodes_goal = []
        self.nodes_project = []
        self.links = {}

    def add_target(self, label):
        if label not in [n['label'] for n in self.nodes_target]:
            self.nodes_target.append({
                'label': label,
                'color': 'blue'
            })

    def add_goal(self, label):
        if label not in [n['label'] for n in self.nodes_goal]:
            self.nodes_goal.append({
                'label': label,
                'color': 'blue'
            })

    def add_project(self, id):
        if id not in [n['label'] for n in self.nodes_project]:
            self.nodes_project.append({
                'label': id,
                'color': 'blue'
            })

    def add_link(self, node1, node2):
        if node1 not in self.links:
            self.links[node1] = {}

        if node2 not in self.links[node1]:
            self.links[node1][node2] = 0

        self.links[node1][node2] += 1

    def get_plotly_data(self):
        nodes = {
            'label': [],
            'color': [],
            'pad': 20,
            'thickness': 15,
            'line': {
                'color': 'black',
                'width': 1,
            },
        }
        for n in self.nodes_project:
            nodes['label'].append(n['label'])
            nodes['color'].append(n['color'])
        for n in self.nodes_goal:
            nodes['label'].append(n['label'])
            nodes['color'].append(n['color'])
        for n in self.nodes_target:
            nodes['label'].append(n['label'])
            nodes['color'].append(n['color'])
        links = {
            'source': [],
            'target': [],
            'value': [],
            'color': [],
        }
        for source, source_data in self.links.items():
            for target, weight in source_data.items():
                links['source'].append(nodes['label'].index(source))
                links['target'].append(nodes['label'].index(target))
                links['value'].append(weight)
                links['color'].append('black')
        return nodes, links



        

In [None]:
# Process data
sankey_data = SanKeyData()
for d in data:
    if d['project_id'] and d['primary_goal'] and d['primary_target']:
        sankey_data.add_project(d['project_id'])
        sankey_data.add_goal(d['primary_goal'])
        sankey_data.add_target(d['primary_target'])
        sankey_data.add_link(d['primary_goal'], d['primary_target'])
        sankey_data.add_link(d['primary_target'], d['project_id'])

sankey_nodes, sankey_links = sankey_data.get_plotly_data()

#print(sankey_nodes)
#print(sankey_links)

In [None]:
# Draw viz!
import plotly.graph_objects
plot_data = [
    plotly.graph_objects.Sankey(
        node=sankey_nodes,
        link=sankey_links,
        arrangement='perpendicular',
    ),
]

fig = plotly.graph_objects.Figure(
    data=plot_data,
)

fig.update_layout(
    height=800,
)

fig.update_layout(
    title="title",
    title_x=0.5,
)
fig.show()