# Sankey diagram - Donor organisation to recipient organisations

In [None]:
import pandas as pd
import plotly.graph_objects as go

from google.colab import files

## Upload a file
You should be able to upload the `fcra_2009_19_full.csv` file in the next step for use in the visualisation. This can take quite a while. Alternatively, upload the file to your Google drive and then mount the drive.

In [None]:
uploaded = files.upload()

filename = list(uploaded.keys())[0]

## Create a Pandas dataframe
Convert the csv file in to a Pandas dataframe and have a quick look at the data.

In [None]:
# Use the file name from the last upload, or modify for previously uploaded files.
#fcra_df = pd.read_csv(filename)

fcra_df = pd.read_csv('drive/MyDrive/hackteam11/test/fcra_2009_19_full.csv')

print(fcra_df.shape)

fcra_df.head(2)

In [None]:
SINGLE = True

if SINGLE: 

    # Single donor plots
    donor = 'WorldVision'
    #donor = 'Gospel for Asia'
    #donor = 'Fundacion Vicente Ferrer'

    df = fcra_df[fcra_df['donor_name'] == donor]
    total_donations = df['Amount'].sum() / 1e9
    title = f'Donor organisation to recipient organisations - {donor} ({total_donations:.2f}B INR)'

else:

    # Multi donor plots
    donors = [
        'WorldVision', 
        'Gospel for Asia',
        'Fundacion Vicente Ferrer',
        'Compassion International',
    ]
    
    df = fcra_df[fcra_df['donor_name'].isin(donors)]
    total_donations = df['Amount'].sum() / 1e9
    title = f'Top four religious charitable donor organisations and recipients - {total_donations:.2f}B INR'

print(title)
print(df.shape)

df.head(2)

## Setup helper methods

In [None]:
DEFAULT_COLOUR = '#AAAAAA'

# Add more colour mappings here...
COLOUR_MAP = {
    'WorldVision': {
        'node_colour': '#FD6925',
        'link_colour': 'rgba(253, 105, 37, 0.5)',
    },
    'Gospel for Asia': {
        'node_colour': '#4C9F38',
        'link_colour': 'rgba(76, 159, 56, 0.5)',
    },
    'Fundacion Vicente Ferrer': {
        'node_colour': '#0A97D9',
        'link_colour': 'rgba(10, 151, 217, 0.5)',
    },
    'Bill and Melinda Gates Foundation': {
        'node_colour': '#FCC30B',
        'link_colour': 'rgba(252, 195, 11, 0.5)',
    },
    'Compassion International': {
        'color': '#19486A',
        'link_colour': 'rgba(25, 72, 106, 0.5)',
    },
}


def add_node(nodes, source, target, value):
    """
    Add a node with source and target
    """
    if source and target:
        key = f'{source}_{target}'

        if key in nodes:
            nodes[key]['value'] += value
        else:
            nodes[key] = {
                'source': source,
                'target': target,
                'value': value,
            }


def make_node_data(labels):
    """
    Make the sankey diagram node data
    """
    names = []
    node_colours = []

    for label in labels:

        # Set the node names
        names.append(label)
        
        node_color = COLOUR_MAP.get(label, {}).get('node_colour') or DEFAULT_COLOUR
        node_colours.append(node_color)

    return {
        'label': names,
        'color': node_colours,
    }


def make_link_data(nodes, labels):
    """
    Make the sankey diagram link data
    """
    source = []
    target = []
    value = []
    link_colours = []

    for node in nodes.values():
        node_source = node['source']
        node_target = node['target']
        node_value = node['value']

        source.append(labels.index(node_source))
        target.append(labels.index(node_target))
        value.append(node_value)

        link_colour = None

        # Try to find the colour for the link in the COLOUR_MAP
        for key in [node_source, node_target]:
            link_colour = GOAL_MAP.get(key, {}).get('link_colour')

            if link_colour:
                break
        
        link_colours.append(link_colour or DEFAULT_COLOUR)

    return {
        'source': source,
        'target': target,
        'value': value,
        'color': link_colours,
    }

## Create plot data

In [None]:
nodes = {}
input_labels = []
target_labels = []

for i, row in df.iterrows():
    
    donor_name = row.donor_name
    amount = row['Amount']
    association_name = row['Association Name']

    if not isinstance(association_name, str):
        continue
        
    value = amount
    
    input = str(donor_name).strip()
    target = str(association_name).strip()

    input_labels.append(input)
    target_labels.append(target)
    
    add_node(nodes, input, target, value)

# Build plot

In [None]:
target_labels = sorted(set(target_labels))
input_labels = sorted(set(input_labels))

labels = input_labels + target_labels

#print(labels)

# Make the node data
node_data = make_node_data(labels)

node_data.update({
    'pad': 20,
    'thickness': 15,
    'line': {
        'color': 'white',
        'width': 1,
    },
})

# Make the link data dictionary
link_data = make_link_data(nodes, labels)

plot_data = [
    go.Sankey(
        node=node_data,
        link=link_data,
        arrangement='perpendicular',
    ),
]

fig = go.Figure(
    data=plot_data,
)

fig.update_layout(
    height=900,
)

fig.update_layout(
    title=title,
    title_x=0.5,
)
fig.show()
