In [2]:
import pandas as pd
import json
import numpy as np

In [3]:
def prep_companies_sankey(source_file_path,write_to_file_name):
    df = pd.read_csv(source_file_path)
    df_sankey = df

    df_sankey.rename(columns={'company_name': 'Company_name_cl'}, inplace=True)

    df_sankey['type'] = 'entity'
    df_sankey['target_type'] = ''

    company_totals = df_sankey.pivot_table(index=['Company_name_cl'], aggfunc='sum')['per_government_final']
    company_totals = company_totals.to_frame()
    company_totals.rename(columns={'per_government_final': 'total_payments'}, inplace=True)
    company_totals.reset_index(level=0, inplace=True)
    company_totals.sort_values(by=['total_payments'], ascending = False, inplace=True)

    df_sankey = pd.merge(df_sankey, company_totals, on='Company_name_cl')

    df_sankey = df_sankey[df_sankey["per_government_final"] > 0]
    df_sankey = df_sankey.sort_values(by=['total_payments'], ascending=False)
    df_sankey.drop(['Unnamed: 0'], axis=1)

    links_companies = pd.DataFrame(columns=['source','target','value','type'])

    df_sankey.rename(columns={'per_government_final': 'value_reported'}, inplace=True)

    to_append = df_sankey.groupby(['name_of_revenue_stream','paid_to'],as_index=False)['type','value_reported','total_payments'].sum()

    #to_append["target"] = "Myanmar Gems Enterprise"
    to_append.rename(columns = {'name_of_revenue_stream':'source', 'value_reported' : 'value', 'paid_to': 'target'}, inplace = True)

    to_append = to_append.sort_values(by=['value'], ascending = False)
    to_append['target_type'] = 'entity'

    links_companies = pd.concat([links_companies,to_append])

    to_append = df_sankey.groupby(['name_of_revenue_stream','Company_name_cl','type'],as_index=False) \
        ['value_reported','total_payments'] \
        .agg({'value_reported':sum,'total_payments':'first'})
    to_append.rename(columns = {'Company_name_cl':'source','name_of_revenue_stream':'target', 'value_reported' : 'value'}, inplace = True)
    to_append = to_append.sort_values(by=['total_payments'], ascending = False)
    links_companies = pd.concat([links_companies,to_append])

    print(to_append['value'].sum())

    [unique_list_companies,replace_dict_companies] = prep_nodes_links(links_companies)

    write_nodes_links(write_to_file_name,unique_list_companies,replace_dict_companies,links_companies)#


In [4]:
def prep_nodes_links(links):
    unique_source = links['source'].unique()
    unique_targets = links['target'].unique()

    unique_source = pd.merge(pd.DataFrame(unique_source), links, left_on=0, right_on='source', how='left')
    unique_source = unique_source.filter([0,'type'])
    unique_targets = pd.merge(pd.DataFrame(unique_targets), links, left_on=0, right_on='target', how='left')
    unique_targets = unique_targets.filter([0,'target_type'])
    unique_targets.rename(columns = {'target_type':'type'}, inplace = True)

    unique_list = pd.concat([unique_source[0], unique_targets[0]]).unique()

    unique_list = pd.merge(pd.DataFrame(unique_list), \
                           pd.concat([unique_source, unique_targets]), left_on=0, right_on=0, how='left')

    unique_list.drop_duplicates(subset=0, keep='first', inplace=True)

    replace_dict = {k: v for v, k in enumerate(unique_list[0])}
    unique_list
    return [unique_list,replace_dict]



In [5]:
def write_nodes_links(filename,unique_list,replace_dict,links):
    links_replaced = links.replace({"source": replace_dict,"target": replace_dict})
    nodes = pd.DataFrame(unique_list)
    nodes.rename(columns = {0:'name'}, inplace = True)
    nodes_json= pd.DataFrame(nodes).to_json(orient='records')
    links_json= pd.DataFrame(links_replaced).to_json(orient='records')
    
    data = { 'links' : json.loads(links_json), 'nodes' : json.loads(nodes_json) }
    data_json = json.dumps(data)
    data_json = data_json.replace("\\","")
    #print(data_json)
    #with open('sankey_data.json', 'w') as outfile:
    #    json.dump(data_json, outfile)

    text_file = open(filename + ".json", "w")
    text_file.write(data_json)
    text_file.close()

In [6]:
prep_companies_sankey('2014-2015/2014-15_oil_gas.csv','sankey_data_2014-15_oil_and_gas_companies')
prep_companies_sankey('2015-2016/2015-16_oil_gas.csv','sankey_data_2015-16_oil_and_gas_companies')

prep_companies_sankey('2014-2015/2014-15_oil_gas_transport.csv','sankey_data_2014-15_oil_and_gas_transport_companies')
prep_companies_sankey('2015-2016/2015-16_oil_gas_transport.csv','sankey_data_2015-16_oil_and_gas_transport_companies')

prep_companies_sankey('2014-2015/2014-15_other_minerals.csv','sankey_data_2014-15_other_minerals_companies')
prep_companies_sankey('2015-2016/2015-16_other_minerals.csv','sankey_data_2015-16_other_minerals_companies')

prep_companies_sankey('2016-2017/2016-17_gems_jade.csv','sankey_data_2016-17_gems_and_jade_companies')
prep_companies_sankey('2016-2017/2016-17_pearl.csv','sankey_data_2016-17_pearl_companies')
prep_companies_sankey('2016-2017/2016-17_other_minerals.csv','sankey_data_2016-17_other_minerals_companies')
prep_companies_sankey('2016-2017/2016-17_oil_gas.csv','sankey_data_2016-17_oil_and_gas_companies')
prep_companies_sankey('2016-2017/2016-17_oil_gas_transport.csv','sankey_data_2016-17_oil_and_gas_transport_companies')



of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




2348800796772.6797
2148636352421.5056
492467408143.75995
403814072375.8115


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




66156245090.09781
26646487476.95664
281868118541.8627
1912036683.33
46149014827.39881
30528713440.75
281647817309.35004


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


