In [1]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In BU approach, Sources = methane sources, Targets = regions

#Output: json file formatted for Sankey diagram

#Created:       31.05.2016
#Last modified: 02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
#               03.06.2016 Add "Not affected" category; Exclude 0 and -99 values
#               06.06.2016 Output json files (Sankey format) with min and max values, respectively
#               07.06.2016 Min/max values output only when mean != -99 or != 0
#               09.06.2016 three levels
#               10.06.2016 New data files from Marielle
#               11.05.2020 Updated data files 2mai2020 from Marielle
#               13.05.2020 Updated data files 2mai2020 from Marielle

In [2]:
import numpy as np
import pandas as pd
import collections
import os
import xlrd

In [3]:
#Subcategories
#Pour Wetlands, il n'y a pas de sous catégories.
#Pour Other Natural, tu pourrais essayer avec :
#  Fresh water (122)
#  Geological source (52)
#  Wild animals (10)
#  Termites( 9)
#  Permafrost and hydrates (3)
#  Wild fires (1)

In [4]:
mydir = '../../../Documents/PROJECTS/METHANE_BUDGET/2020/data/'

data_date='11mai'

# Data file
# fname=mydir + 'Sankey_BU_2008-2017_31March2020.txt'
fname=mydir + 'Sankey_BU_2008-2017_' + data_date + '2020.txt'

# Output files
mean_name = '../data/Sankey_BU_2008-2017_' + data_date + '2020_mean_3levels.json'
min_name = '../data/Sankey_BU_2008-2017_' + data_date + '2020_min_3levels.json'
max_name = '../data/Sankey_BU_2008-2017_' + data_date + '2020_max_3levels.json'

In [5]:
# Read data file

df_BU = pd.read_csv(fname, header=1, delim_whitespace=True)
df_BU

Unnamed: 0,proc,USA,Canada,Central_America,Northern_South_America,Brazil,Southwest_South_America,Europe,Northern_Africa,Equatorial_Africa,Southern_Africa,Russia,Central_Asia,Middle_East,China,Korean_Japan,South_Asia,Southeast_Asia,Oceania,GLO
Wetlands,mean,6,11,4,8,22,16,2,6,19,5,12,1,1,3,0,5,24,4,149
Wetlands,min,3,4,1,4,11,11,1,-1,7,0,4,0,-1,1,0,1,5,0,102
Wetlands,max,12,23,7,13,32,22,4,12,30,10,22,3,2,4,1,5,40,7,182
OtherNatural,mean,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,222
OtherNatural,min,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,143
OtherNatural,max,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,306
AgriWaste,mean,16,2,7,4,16,8,18,13,9,3,6,3,8,29,3,36,18,8,206
AgriWaste,min,13,2,6,4,13,7,17,9,7,3,5,3,6,23,3,26,15,5,191
AgriWaste,max,17,2,10,4,17,9,20,15,12,5,6,4,9,38,3,44,23,19,223
Fossil,mean,14,2,2,5,1,2,6,7,6,3,18,5,17,26,1,5,7,2,128


In [6]:
df_BU.rename(columns = {'proc':'stats'}, inplace = True)
df_BU.index.name = 'proc'
df_BU

Unnamed: 0_level_0,stats,USA,Canada,Central_America,Northern_South_America,Brazil,Southwest_South_America,Europe,Northern_Africa,Equatorial_Africa,Southern_Africa,Russia,Central_Asia,Middle_East,China,Korean_Japan,South_Asia,Southeast_Asia,Oceania,GLO
proc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Wetlands,mean,6,11,4,8,22,16,2,6,19,5,12,1,1,3,0,5,24,4,149
Wetlands,min,3,4,1,4,11,11,1,-1,7,0,4,0,-1,1,0,1,5,0,102
Wetlands,max,12,23,7,13,32,22,4,12,30,10,22,3,2,4,1,5,40,7,182
OtherNatural,mean,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,222
OtherNatural,min,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,143
OtherNatural,max,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,306
AgriWaste,mean,16,2,7,4,16,8,18,13,9,3,6,3,8,29,3,36,18,8,206
AgriWaste,min,13,2,6,4,13,7,17,9,7,3,5,3,6,23,3,26,15,5,191
AgriWaste,max,17,2,10,4,17,9,20,15,12,5,6,4,9,38,3,44,23,19,223
Fossil,mean,14,2,2,5,1,2,6,7,6,3,18,5,17,26,1,5,7,2,128


In [7]:
df_BU['Middle_East']

proc
Wetlands         1
Wetlands        -1
Wetlands         2
OtherNatural   -99
OtherNatural   -99
OtherNatural   -99
AgriWaste        8
AgriWaste        6
AgriWaste        9
Fossil          17
Fossil          16
Fossil          19
BioBurBiof       0
BioBurBiof       0
BioBurBiof       1
SumSources      26
SumSources      21
SumSources      30
Name: Middle_East, dtype: int64

In [8]:
df_BU['Northern_South_America']

proc
Wetlands         8
Wetlands         4
Wetlands        13
OtherNatural   -99
OtherNatural   -99
OtherNatural   -99
AgriWaste        4
AgriWaste        4
AgriWaste        4
Fossil           5
Fossil           2
Fossil          14
BioBurBiof       0
BioBurBiof       0
BioBurBiof       1
SumSources      18
SumSources      10
SumSources      32
Name: Northern_South_America, dtype: int64

In [9]:
#Add "Not affected" column and set values to 0
df_BU['Not affected'] = 0
df_BU

Unnamed: 0_level_0,stats,USA,Canada,Central_America,Northern_South_America,Brazil,Southwest_South_America,Europe,Northern_Africa,Equatorial_Africa,...,Russia,Central_Asia,Middle_East,China,Korean_Japan,South_Asia,Southeast_Asia,Oceania,GLO,Not affected
proc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wetlands,mean,6,11,4,8,22,16,2,6,19,...,12,1,1,3,0,5,24,4,149,0
Wetlands,min,3,4,1,4,11,11,1,-1,7,...,4,0,-1,1,0,1,5,0,102,0
Wetlands,max,12,23,7,13,32,22,4,12,30,...,22,3,2,4,1,5,40,7,182,0
OtherNatural,mean,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,-99,222,0
OtherNatural,min,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,-99,143,0
OtherNatural,max,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,-99,306,0
AgriWaste,mean,16,2,7,4,16,8,18,13,9,...,6,3,8,29,3,36,18,8,206,0
AgriWaste,min,13,2,6,4,13,7,17,9,7,...,5,3,6,23,3,26,15,5,191,0
AgriWaste,max,17,2,10,4,17,9,20,15,12,...,6,4,9,38,3,44,23,19,223,0
Fossil,mean,14,2,2,5,1,2,6,7,6,...,18,5,17,26,1,5,7,2,128,0


In [10]:
#Copy GLO values for OtherNat into "Not affected" col
df_BU['Not affected']['OtherNatural'] = df_BU['GLO']['OtherNatural']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [11]:
df_BU

Unnamed: 0_level_0,stats,USA,Canada,Central_America,Northern_South_America,Brazil,Southwest_South_America,Europe,Northern_Africa,Equatorial_Africa,...,Russia,Central_Asia,Middle_East,China,Korean_Japan,South_Asia,Southeast_Asia,Oceania,GLO,Not affected
proc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Wetlands,mean,6,11,4,8,22,16,2,6,19,...,12,1,1,3,0,5,24,4,149,0
Wetlands,min,3,4,1,4,11,11,1,-1,7,...,4,0,-1,1,0,1,5,0,102,0
Wetlands,max,12,23,7,13,32,22,4,12,30,...,22,3,2,4,1,5,40,7,182,0
OtherNatural,mean,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,-99,222,222
OtherNatural,min,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,-99,143,143
OtherNatural,max,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,-99,306,306
AgriWaste,mean,16,2,7,4,16,8,18,13,9,...,6,3,8,29,3,36,18,8,206,0
AgriWaste,min,13,2,6,4,13,7,17,9,7,...,5,3,6,23,3,26,15,5,191,0
AgriWaste,max,17,2,10,4,17,9,20,15,12,...,6,4,9,38,3,44,23,19,223,0
Fossil,mean,14,2,2,5,1,2,6,7,6,...,18,5,17,26,1,5,7,2,128,0


In [12]:
fname2=mydir + 'Sankey_BU_subcategories_withFake.csv'
sub_df = pd.read_csv(fname2, header=0, sep="\;", engine='python')
sub_df

Unnamed: 0,proc,stats,Fossil,AgriWaste,BioBurBiof,OtherNatural,Wetlands
0,Coal,mean,42,0,0,0,0
1,Coal,min,29,0,0,0,0
2,Coal,max,60,0,0,0,0
3,GasAndOilIndustry,mean,75,0,0,0,0
4,GasAndOilIndustry,min,66,0,0,0,0
5,GasAndOilIndustry,max,92,0,0,0,0
6,Industry,mean,3,0,0,0,0
7,Industry,min,0,0,0,0,0
8,Industry,max,7,0,0,0,0
9,Transport,mean,4,0,0,0,0


In [13]:
sub_df.index = sub_df['proc']
sub_df = sub_df.drop(['proc'], axis=1)
sub_df.head(10)

Unnamed: 0_level_0,stats,Fossil,AgriWaste,BioBurBiof,OtherNatural,Wetlands
proc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Coal,mean,42,0,0,0,0
Coal,min,29,0,0,0,0
Coal,max,60,0,0,0,0
GasAndOilIndustry,mean,75,0,0,0,0
GasAndOilIndustry,min,66,0,0,0,0
GasAndOilIndustry,max,92,0,0,0,0
Industry,mean,3,0,0,0,0
Industry,min,0,0,0,0,0
Industry,max,7,0,0,0,0
Transport,mean,4,0,0,0,0


In [14]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_BU_piv = df_BU.pivot(columns='stats', index=df_BU.index)
df_BU_piv

Unnamed: 0_level_0,USA,USA,USA,Canada,Canada,Canada,Central_America,Central_America,Central_America,Northern_South_America,...,Southeast_Asia,Oceania,Oceania,Oceania,GLO,GLO,GLO,Not affected,Not affected,Not affected
stats,max,mean,min,max,mean,min,max,mean,min,max,...,min,max,mean,min,max,mean,min,max,mean,min
proc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AgriWaste,17,16,13,2,2,2,10,7,6,4,...,15,19,8,5,223,206,191,0,0,0
BioBurBiof,2,1,0,1,1,0,1,1,0,1,...,2,2,1,0,40,30,26,0,0,0
Fossil,16,14,10,3,2,2,2,2,1,14,...,6,2,2,2,154,128,113,0,0,0
OtherNatural,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,306,222,143,306,222,143
SumSources,47,36,28,29,16,8,19,13,9,32,...,28,15,11,6,881,737,594,0,0,0
Wetlands,12,6,3,23,11,4,7,4,1,13,...,5,7,4,0,182,149,102,0,0,0


In [15]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
sub_df_piv = sub_df.pivot(columns='stats', index=sub_df.index)
sub_df_piv

Unnamed: 0_level_0,Fossil,Fossil,Fossil,AgriWaste,AgriWaste,AgriWaste,BioBurBiof,BioBurBiof,BioBurBiof,OtherNatural,OtherNatural,OtherNatural,Wetlands,Wetlands,Wetlands
stats,max,mean,min,max,mean,min,max,mean,min,max,mean,min,max,mean,min
proc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
BiofuelBurning,0,0,0,0,0,0,14,12,10,0,0,0,0,0,0
BiogenicOceanic,0,0,0,0,0,0,0,0,0,10,6,4,0,0,0
BiomassBurning,0,0,0,0,0,0,26,17,14,0,0,0,0,0,0
Coal,60,42,29,0,0,0,0,0,0,0,0,0,0,0,0
EntericFermentationAndManure,0,0,0,116,111,106,0,0,0,0,0,0,0,0,0
Freshwater,0,0,0,0,0,0,0,0,0,212,159,117,0,0,0
GasAndOilIndustry,92,75,66,0,0,0,0,0,0,0,0,0,0,0,0
GeologicalSources,0,0,0,0,0,0,0,0,0,65,45,18,0,0,0
Industry,7,3,0,0,0,0,0,0,0,0,0,0,0,0,0
LandfillsAndWaste,0,0,0,69,65,60,0,0,0,0,0,0,0,0,0


In [16]:
df_BU_piv.columns[0][0] #Bor_NAme
df_BU_piv.columns[3][0] #contUSA
df_BU_piv.columns[6][0] #CentName
df_BU_piv.columns[9][0] #Trop_SAme

'Northern_South_America'

In [17]:
df_BU_piv['USA']['mean']

proc
AgriWaste       16
BioBurBiof       1
Fossil          14
OtherNatural   -99
SumSources      36
Wetlands         6
Name: mean, dtype: int64

In [18]:
df_BU_piv['USA'].loc['AgriWaste']

stats
max     17
mean    16
min     13
Name: AgriWaste, dtype: int64

In [19]:
df_BU_piv['USA'].loc['BioBurBiof']['mean']

1

In [20]:
list(df_BU)

['stats',
 'USA',
 'Canada',
 'Central_America',
 'Northern_South_America',
 'Brazil',
 'Southwest_South_America',
 'Europe',
 'Northern_Africa',
 'Equatorial_Africa',
 'Southern_Africa',
 'Russia',
 'Central_Asia',
 'Middle_East',
 'China',
 'Korean_Japan',
 'South_Asia',
 'Southeast_Asia',
 'Oceania',
 'GLO',
 'Not affected']

In [21]:
#Create regions, categories and subcategories array

#regions
#Store region names in list
numRegions = len(list(df_BU))

idx = 0
regions = []
for num in range(0,numRegions):
    if list(df_BU)[num] != "GLO": #exclude GLO column
        if list(df_BU)[num] != "stats": #exclude stats column
            regions.append(list(df_BU)[num])
            idx = idx + 3

#categories    
categories = df_BU_piv.index.tolist()
 
#subcategories
subcategories = sub_df.index.tolist()
subcategories = list(set(subcategories)) #finds unique values
   
print(regions)
print(categories)
print(subcategories)

['USA', 'Canada', 'Central_America', 'Northern_South_America', 'Brazil', 'Southwest_South_America', 'Europe', 'Northern_Africa', 'Equatorial_Africa', 'Southern_Africa', 'Russia', 'Central_Asia', 'Middle_East', 'China', 'Korean_Japan', 'South_Asia', 'Southeast_Asia', 'Oceania', 'Not affected']
['AgriWaste', 'BioBurBiof', 'Fossil', 'OtherNatural', 'SumSources', 'Wetlands']
['fakeSource', 'GasAndOilIndustry', 'BiofuelBurning', 'Permafrost', 'BiomassBurning', 'GeologicalSources', 'WildAnimals', 'Transport', 'Rice', 'Freshwater', 'Termites', 'Coal', 'LandfillsAndWaste', 'BiogenicOceanic', 'Industry', 'EntericFermentationAndManure']


In [22]:
nodes = categories + regions + subcategories
nodes

['AgriWaste',
 'BioBurBiof',
 'Fossil',
 'OtherNatural',
 'SumSources',
 'Wetlands',
 'USA',
 'Canada',
 'Central_America',
 'Northern_South_America',
 'Brazil',
 'Southwest_South_America',
 'Europe',
 'Northern_Africa',
 'Equatorial_Africa',
 'Southern_Africa',
 'Russia',
 'Central_Asia',
 'Middle_East',
 'China',
 'Korean_Japan',
 'South_Asia',
 'Southeast_Asia',
 'Oceania',
 'Not affected',
 'fakeSource',
 'GasAndOilIndustry',
 'BiofuelBurning',
 'Permafrost',
 'BiomassBurning',
 'GeologicalSources',
 'WildAnimals',
 'Transport',
 'Rice',
 'Freshwater',
 'Termites',
 'Coal',
 'LandfillsAndWaste',
 'BiogenicOceanic',
 'Industry',
 'EntericFermentationAndManure']

In [23]:
#Replace -99 values with 0
#df_BU_piv = df_BU_piv.replace(-99, 1)
#df_BU_piv

#Replace -99 and 0 values with -1
#df_BU_piv = df_BU_piv.replace(-99, -1)
#df_BU_piv = df_BU_piv.replace(0, -1)
df_BU_piv



Unnamed: 0_level_0,USA,USA,USA,Canada,Canada,Canada,Central_America,Central_America,Central_America,Northern_South_America,...,Southeast_Asia,Oceania,Oceania,Oceania,GLO,GLO,GLO,Not affected,Not affected,Not affected
stats,max,mean,min,max,mean,min,max,mean,min,max,...,min,max,mean,min,max,mean,min,max,mean,min
proc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AgriWaste,17,16,13,2,2,2,10,7,6,4,...,15,19,8,5,223,206,191,0,0,0
BioBurBiof,2,1,0,1,1,0,1,1,0,1,...,2,2,1,0,40,30,26,0,0,0
Fossil,16,14,10,3,2,2,2,2,1,14,...,6,2,2,2,154,128,113,0,0,0
OtherNatural,-99,-99,-99,-99,-99,-99,-99,-99,-99,-99,...,-99,-99,-99,-99,306,222,143,306,222,143
SumSources,47,36,28,29,16,8,19,13,9,32,...,28,15,11,6,881,737,594,0,0,0
Wetlands,12,6,3,23,11,4,7,4,1,13,...,5,7,4,0,182,149,102,0,0,0


In [24]:
sub_df_piv

Unnamed: 0_level_0,Fossil,Fossil,Fossil,AgriWaste,AgriWaste,AgriWaste,BioBurBiof,BioBurBiof,BioBurBiof,OtherNatural,OtherNatural,OtherNatural,Wetlands,Wetlands,Wetlands
stats,max,mean,min,max,mean,min,max,mean,min,max,mean,min,max,mean,min
proc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
BiofuelBurning,0,0,0,0,0,0,14,12,10,0,0,0,0,0,0
BiogenicOceanic,0,0,0,0,0,0,0,0,0,10,6,4,0,0,0
BiomassBurning,0,0,0,0,0,0,26,17,14,0,0,0,0,0,0
Coal,60,42,29,0,0,0,0,0,0,0,0,0,0,0,0
EntericFermentationAndManure,0,0,0,116,111,106,0,0,0,0,0,0,0,0,0
Freshwater,0,0,0,0,0,0,0,0,0,212,159,117,0,0,0
GasAndOilIndustry,92,75,66,0,0,0,0,0,0,0,0,0,0,0,0
GeologicalSources,0,0,0,0,0,0,0,0,0,65,45,18,0,0,0
Industry,7,3,0,0,0,0,0,0,0,0,0,0,0,0,0
LandfillsAndWaste,0,0,0,69,65,60,0,0,0,0,0,0,0,0,0


In [25]:
#df_BU_piv['Bor_NAme'].loc['Agriwast']['mean']
sub_df_piv['OtherNatural'].loc['Freshwater'][2] #[0] = max, [1] = min, [2] = mean

sub_df_piv['AgriWaste'].loc['Termites'][2] #[0] = max, [1] = min, [2] = mean

0

# Save to json

In [26]:
#Write json file for 3-level Sankey, MEAN VALUES ONLY
#GLO omitted

file = open(mean_name, 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    print(category)
    if category!='SumSources':
        for region in regions:
            #print region
            value = df_BU_piv[region].loc[category]['mean']     
            if value != -99:
                if value != 0:
                    #print value
                    file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))                
    
#source-target pairs for subcategories -> categories
for subcategory in subcategories:    
    for category in categories:
        if category!='SumSources':
            value2 = sub_df_piv[category].loc[subcategory][1] #[0] = max, [1] = mean, [2] = min
            if value2 != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(subcategory, category, float(value2)))

# remove last comma
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()

AgriWaste
BioBurBiof
Fossil
OtherNatural
SumSources
Wetlands


In [27]:
#Write json file for 3-level Sankey, MIN VALUES ONLY
#GLO omitted


file = open(min_name, 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    print(category)
    if category!='SumSources':
        for region in regions:
            #print region
            value = df_BU_piv[region].loc[category]['min']     
            if value != -99:
                if value != 0:
                    #print value
                    file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))                
    
#source-target pairs for subcategories -> categories
for subcategory in subcategories:    
    for category in categories:
        if category!='SumSources':
            value2 = sub_df_piv[category].loc[subcategory][2] #[0] = max, [1] = mean, [2] = min
            if value2 != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(subcategory, category, float(value2)))

# remove last comma
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()

AgriWaste
BioBurBiof
Fossil
OtherNatural
SumSources
Wetlands


In [28]:
#Write json file for 3-level Sankey, MAX VALUES ONLY
#GLO omitted

file = open(max_name, 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

#source-target pairs for categories -> regions
for category in categories:
    print(category)
    if category!='SumSources':
        for region in regions:
            #print region
            value = df_BU_piv[region].loc[category]['max']     
            if value != -99:
                if value != 0:
                    #print value
                    file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(category, region, float(value)))                
    
#source-target pairs for subcategories -> categories
for subcategory in subcategories:    
    for category in categories:
        if category!='SumSources':
            value2 = sub_df_piv[category].loc[subcategory][0] #[0] = max, [1] = mean, [2] = min
            if value2 != 0:
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(subcategory, category, float(value2)))

# remove last comma
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()

AgriWaste
BioBurBiof
Fossil
OtherNatural
SumSources
Wetlands
