# Sankey diagram - Visualising Foreign Philanthropy Inflows to India
This has been tested with `Donors 2018-19.csv` from the CSIP "UPDATED OCTOBER 2020: Foreign Philanthropy (FCRA) dataset".

This dataset can be downloaded from here: https://csip.ashoka.edu.in/estimating-philanthropic-capital-india-data/

In [None]:
import io

from collections import OrderedDict

import pandas as pd
import plotly.graph_objects as go

from google.colab import files

## Upload a file
Once you have downloaded and uzipped the data set, you should be able to upload the `Donors 2018-19.csv` file in the next step for use in the visualisation. This can take a while.

In [None]:
uploaded = files.upload()

filename = list(uploaded.keys())[0]

## Create a Pandas dataframe
Convert the csv file in to a Pandas dataframe and have a quick look at the data.

In [None]:
# Use the file name from the last upload, or modify for previously uploaded files.
df = pd.read_csv(filename)

print(filename)
df

In [None]:
filename

In [None]:
n = df.nunique(axis=0) 
  
print("No.of.unique values in each column :\n", n)

## Setup helper methods

In [None]:
DEFAULT_COLOUR = '#AAAAAA'

GOAL_MAP = {
    'Economic': {
        'node_colour': '#4C9F38',
        'link_colour': 'rgba(76, 159, 56, 0.5)',
    },
    'Educational': {
        'node_colour': '#C5192D',
        'link_colour': 'rgba(197, 25, 45, 0.5)',
    },
    'Social': {
        'node_colour': '#FCC30B',
        'link_colour': 'rgba(252, 195, 11, 0.5)',
    },
    'Religious': {
        'node_colour': '#0A97D9',
        'link_colour': 'rgba(10, 151, 217, 0.5)',
    },
    'Cultural': {
        'color': '#19486A',
        'link_colour': 'rgba(25, 72, 106, 0.5)',
    },
}


def add_node(nodes, source, target, value):
    """
    Add a node with source and target
    """
    if source and target:
        key = f'{source}_{target}'

        if key in nodes:
            nodes[key]['value'] += value
        else:
            nodes[key] = {
                'source': source,
                'target': target,
                'value': value,
            }


def make_node_data(labels):
    """
    Make the sankey diagram node data
    """
    names = []
    node_colours = []

    for label in labels:

        # Set the node names
        names.append(label)
        
        node_color = GOAL_MAP.get(label, {}).get('node_colour') or DEFAULT_COLOUR
        node_colours.append(node_color)

    return {
        'label': names,
        'color': node_colours,
    }


def make_link_data(nodes, labels):
    """
    Make the sankey diagram link data
    """
    source = []
    target = []
    value = []
    link_colours = []

    for node in nodes.values():
        node_source = node['source']
        node_target = node['target']
        node_value = node['value']

        source.append(labels.index(node_source))
        target.append(labels.index(node_target))
        value.append(node_value)

        link_colour = None

        # Try to find the colour for the link in the SDG_GOAL_MAP
        for key in [node_source, node_target]:
            link_colour = GOAL_MAP.get(key, {}).get('link_colour')

            if link_colour:
                break
        
        link_colours.append(link_colour or DEFAULT_COLOUR)

    return {
        'source': source,
        'target': target,
        'value': value,
        'color': link_colours,
    }

## Define a list of countries

In [None]:
COUNTRIES = [
    'Argentina',
    'Austria',
    'Australia',
    'Bahrain',
    'Belgium',
    'Bosnia',
    'Brazil',
    'Canada',
    'CaymanIslands',
    'China',
    'CzechRepublic',
    'Denmark',
    'France',
    'Germany',
    'HongKong',
    'India',
    'Indonesia',
    'Ireland',
    'Italy',
    'Japan',
    'Kenya',
    'Malaysia',
    'Mauritius',
    'Netherlands',
    'Netherland',
    'NewZealand',
    'NorthKorea',
    'Norway',
    'Pakistan',
    'Peru',
    'Philippines',
    'Poland',
    'Portugal',
    'Qatar',
    'Romania',
    'Russia',
    'Serbia',
    'Singapore',
    'SouthAfrica',
    'Spain',
    'SriLanka',
    'SouthKorea',
    'Sweden',
    'Switzerland',
    'Thailand',
    'TrinidadandTobago',
    'Turkey',
    'UnitedArabEmirates',
    'UnitedKingdom',
    'UnitedStatesofAmerica',
    'Vietnam',
    'Zambia',
    'Jordan',
    'Hungary',
    'Luxembourg',
    'Liechtenstein',
    'Mexico',
    'Uganda',
    'Latvia',
    'Chile',
    'Finland',
    'Nepal',
    'Zimbabwe',
    'USA',
    'UK',
    'Kuwait',
    'Oman',
    'Taiwan',
    'SaudiArabia',
    'Croatia',
    'Panama',
    'Lebanon',
    'Slovakia',
    'Slovenia',
    'Fiji',
    'Greece',
    'Swaziland',
    'Macau',
    'Nigeria',
    'Venezuela',
    'Bangladesh',
    'Myanmar',
    'Malta',
    'Jamaica',
    'Gambia',
    'Rwanda',
    'Israel',
    'Iran',
    'Cambodia',
    'Iceland',
    'Seychelles',
    'Angola',
    'Mozambique',
    'UnitedStates',
    'Liberia',
    'Malawi',
    'Uruguay',
    'Columbia',
    'Monaco',
    'Swaziland',
    'Bolivia',
    'Mongolia',
    'Estonia',
    'Azerbaijan',
    'CostaRica',
    'Bulgaria',
    'Congo',
    'Mali',
    'Tanzania',
    'Bhutan',
    'Afghanistan',
    'Kyrgyzstan',
    'Maldives',
    'Brunei',
    'Iraq',
    'Bahamas',
    'Togo',
    'Morocco',
    'Albania',
    'Sudan',
    'ReunionIsland',
    'Botswana',
    'Lithuania',
    'Lesotho',
    'ENGLAND',
    'Armenia',
    'Palestine',
    'England',
    'UNITEDKINGDOM',
    'Belarus',
    'PapuaNewGuinea',
    'Dubai',
    'Barbados',
    'Turks&Caicos',
    'Kazakhstan',
    'UAE',
    'U.S.A',
    'ofAmerica',
    'Kingdom',
    'UnitedK',
    'Aomerica',
    'VaticanCity',
    'InHdia',
    'Indifa',
    'IndiAa',
    'Inedia',
    'Inodia',
    'IHndia',
    'IndHia',
    'Indfia',
    'Gerfmany',
    'Germaony',
    'Goermany',
    'GermHany',
    'Cameroon',
    'Swefden',
    'CanaHda',
    'SoouthKorea',
    'Switzerlaend',
    'Swiftzerland',
    'Nicaragua',
    'Franfce',
    'Tajikistan',
    'Noorway',
    'Senegal',
    'Paraguay',
    'Ghana',
    'Madagascar',
    'Ukraine',
    'Cyprus',
]

fix_mapping = {
    'Aomerica': 'UnitedStatesofAmerica',
    'ofAmerica': 'UnitedStatesofAmerica',
    'USA': 'UnitedStatesofAmerica',
    'U.S.A': 'UnitedStatesofAmerica',
    'UnitedStates': 'UnitedStatesofAmerica',
    'UNITEDKINGDOM': 'UnitedKingdom',
    'UK': 'UnitedKingdom',
    'ENGLAND': 'UnitedKingdom',
    'England': 'UnitedKingdom',
    'Kingdom': 'UnitedKingdom',
    'UnitedK': 'UnitedKingdom',
    'InHdia': 'India',
    'Indifa': 'India',
    'IndiAa': 'India',
    'Inedia': 'India',
    'Inodia': 'India',
    'IHndia': 'India',
    'IndHia': 'India',
    'Indfia': 'India',
    'Gerfmany': 'Germany',
    'Germaony': 'Germany',
    'Goermany': 'Germany',
    'GermHany': 'Germany',
    'Swefden': 'Sweden',
    'CanaHda': 'Canada',
    'SoouthKorea': 'SouthKorea',
    'Switzerlaend': 'Switzerland',
    'Swiftzerland': 'Switzerland',
    'Franfce': 'France',
    'Noorway': 'Norway',
}

## Create plot data

In [None]:
nodes = {}
input_labels = []
target_labels = []
goal_labels = []
total_donations = {}
total_receipts = {}

for i, row in df.iterrows():

    # For Donors 2018-19.csv
    state = row.State
    purpose = row.Purpose
    donor_type = row.Donor_type
    amount = row['Amount (INR)']
    address = row['Donor_Address']
    
    """
    # For Donors 2015-16.csv
    state = row['state']
    purpose = row['Purpose']
    donor_type = row['Donor type']
    amount = row['Amount']
    address = row['Address']
    """

    # Limit to Institutional donors
    if donor_type != 'Institutional':
        continue
    
    if not isinstance(purpose, str):
        continue
    
    # Limit to Social purposes for now (comment this out to show all purposes)
    #if purpose.strip() != 'Social':
    #    continue

    found = [country for country in COUNTRIES if isinstance(address, str) and country in address.replace(' ', '')]
    
    if found:
        country = found[0]
        country = fix_mapping.get(country, country) 
    else:
        # print(address)
        country = 'Unknown'
        
    value = amount
    
    input = str(country).strip()
    target = str(purpose).strip()
    goal = str(state).strip()

    input_labels.append(input)
    target_labels.append(target)
    goal_labels.append(goal)
    
    add_node(nodes, input, target, value)
    add_node(nodes, target, goal, value)

    # Calculate the total donations per country
    if input in total_donations:
        total_donations[input] += value
    else:
        total_donations[input] = value
    
    # Calculate the total receipts per state
    if goal in total_receipts:
        total_receipts[goal] += value
    else:
        total_receipts[goal] = value

# Build plot

In [None]:
# Determine which inputs to show, i.e. top 10 donors (based on total donations for each country)
ordered_total_donations = OrderedDict(sorted(total_donations.items(), key=lambda x: x[1], reverse=True))
ordered_donation_countries = list(ordered_total_donations.keys())

num_countries = 10
include_countries = ordered_donation_countries[:num_countries]
exclude_countries = ordered_donation_countries[num_countries:]

# Determine which outputs to show, i.e. top 10 recipient (based on total receipts for each state)
ordered_total_receipts = OrderedDict(sorted(total_receipts.items(), key=lambda x: x[1], reverse=True))
ordered_receipt_states = list(ordered_total_receipts.keys())

num_states = 10
include_states = ordered_receipt_states[:num_states]
exclude_states = ordered_receipt_states[num_states:]

#print(include)
#print(exclude)

pruned_nodes = {}
other = {}
other_labels = set()

# Prune nodes and create OtherCountries inputs and OtherStates outputs for nodes we don't want to show.
for k, v in nodes.items():

    if v['source'] in include_countries or v['target'] in include_states:
        pruned_nodes[k] = v
    else:
        # Make a 'other' input or goal node for excluded countries and states
        if v['source'] in exclude_countries:
            source = 'OtherCountries'
            target = v['target']
            other_labels.add(source)
        elif v['target'] in exclude_states:
            source = v['source']
            target = 'OtherStates'
            other_labels.add(target)
        else:
            continue
        
        value = v['value']
        key = f'{source}_{target}'

        if key in pruned_nodes:
            pruned_nodes[key]['value'] += value
        else:
            pruned_nodes[key] = {
                'source': source,
                'target': target,
                'value': value,
            }

goal_labels = sorted(set(goal_labels))
target_labels = sorted(set(target_labels))
input_labels = sorted(set(input_labels))

labels = input_labels + target_labels + goal_labels + list(other_labels)

# print(pruned_nodes)
# print(labels)

# Make the node data
node_data = make_node_data(labels)

node_data.update({
    'pad': 20,
    'thickness': 15,
    'line': {
        'color': 'white',
        'width': 1,
    },
})

# Make the link data dictionary
link_data = make_link_data(pruned_nodes, labels)

plot_data = [
    go.Sankey(
        node=node_data,
        link=link_data,
        arrangement='perpendicular',
    ),
]

fig = go.Figure(
    data=plot_data,
)

fig.update_layout(
    height=1000,
)

fig.update_layout(
    title=f'Top ten donor countries and recipient states ({filename})',
    title_x=0.5,
)
fig.show()
