# Sankey Diagrams

## Python Setup

In [1]:
from pysankey import sankey
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

## Data Read In

In [2]:
one_to_one = pd.read_csv('../output/rcdc_grnt_1_pubs_1_comp.csv')
one_to_many = pd.read_csv('../output/rcdc_grnt_1_pubs_comp.csv')
many_to_many = pd.read_csv('../output/rcdc_grnt_pubs_comp.csv')

In [3]:
funders = set(list(one_to_one['funder_name']))

In [4]:
one_to_one = one_to_one.dropna()
one_to_many = one_to_many.dropna()
many_to_many = many_to_many.dropna()

## Sankey Function

In [5]:
def create_other_category(df):
    tot_pubs = df['nb_pubs'].sum()
    
    df['pub_rcdc_cat'] = np.where(df['nb_pubs']>0.02*tot_pubs, df['pub_rcdc'], "Other")
    df = df.groupby(['funder_name', 'grnt_rcdc', 'pub_rcdc_cat'])
    df = pd.DataFrame({'nb_grnts': df['nb_grnts'].sum()
                       , 'nb_obs': df['nb_obs'].sum()
                       , 'nb_pubs': df['nb_pubs'].sum()}).reset_index()
    
    return df

In [6]:
def create_sankey(df, method, specialty, funder):
    if df.shape[0]==0:
        print("\nNo {} grants with {} as main RCDC code".format(funder, specialty))
    else:
        df['grnts'] = "{} ({} grants)".format(specialty, df['nb_grnts'].sum())
        df['pubs'] = df['pub_rcdc_cat'] + " (" + df['nb_pubs'].astype(str) + " publications)"    
        sankey(left=df['grnts'], right=df['pubs'], rightWeight=df['nb_pubs'], leftWeight=df['nb_grnts']
               , aspect=5, fontsize=5, figureName="../output/sankeys/{} - {} - {}".format(method, specialty, funder))

## Create Graphs

In [7]:
specialty = "Breast Cancer"

### One to One

In [8]:
for funder in funders:
    df = one_to_one[(one_to_one['funder_name']==funder)&(one_to_one['grnt_rcdc']==specialty)].copy()
    
    df = create_other_category(df)
    
    create_sankey(df, "Method 1", specialty, funder)
    
plt.close('all')


No Wellcome Trust grants with Breast Cancer as main RCDC code

No French National Cancer Institute - ORCID Confirmed grants with Breast Cancer as main RCDC code

No Ministère des Affaires sociales et de la Santé - ORCID Confirmed grants with Breast Cancer as main RCDC code

No Ministère des Affaires sociales et de la Santé grants with Breast Cancer as main RCDC code

No French National Cancer Institute grants with Breast Cancer as main RCDC code

No French Institute of Health and Medical Research - ORCID Confirmed grants with Breast Cancer as main RCDC code

No French Institute of Health and Medical Research grants with Breast Cancer as main RCDC code


### One to Many

In [9]:
for funder in funders:
    df = one_to_many[(one_to_many['funder_name']==funder)&(one_to_many['grnt_rcdc']==specialty)].copy()
    
    df = create_other_category(df)
    
    create_sankey(df, "Method 2", specialty, funder)
    
plt.close('all')


No Wellcome Trust grants with Breast Cancer as main RCDC code

No French National Cancer Institute - ORCID Confirmed grants with Breast Cancer as main RCDC code

No Ministère des Affaires sociales et de la Santé - ORCID Confirmed grants with Breast Cancer as main RCDC code

No Ministère des Affaires sociales et de la Santé grants with Breast Cancer as main RCDC code

No French National Cancer Institute grants with Breast Cancer as main RCDC code

No French Institute of Health and Medical Research - ORCID Confirmed grants with Breast Cancer as main RCDC code

No French Institute of Health and Medical Research grants with Breast Cancer as main RCDC code


### Many to Many

In [10]:
for funder in funders:
    df = many_to_many[(many_to_many['funder_name']==funder)&(many_to_many['grnt_rcdc']==specialty)].copy()
    
    df = create_other_category(df)
    
    create_sankey(df, "Method 3", specialty, funder)
    
plt.close('all')


No Ministère des Affaires sociales et de la Santé - ORCID Confirmed grants with Breast Cancer as main RCDC code

No Ministère des Affaires sociales et de la Santé grants with Breast Cancer as main RCDC code

No French Institute of Health and Medical Research - ORCID Confirmed grants with Breast Cancer as main RCDC code

No French Institute of Health and Medical Research grants with Breast Cancer as main RCDC code


## Sandbox