In [4]:
#Script to reformat methane data Top-Down file for two-level Sankey
#In TD approach, Sources = regions, Targets = methane sources

#Output: json file formatted for Sankey diagram

#Created:       30.05.2016
#Last modified: 31.05.2016 Read in orig file (no manual reformatting necessary)
#               02.06.2016 New data files from Marielle Saunois; remove GLO category from final file
#               06.06.2016 Output json files (Sankey format) with min and max values, respectively
#               07.06.2016 Min/max values output only when mean != -99 or != 0
#               10.06.2016 New data files from Marielle
#               05.04.2020 Updated data files from Marielle

In [5]:
import numpy as np
import pandas as pd
import collections
import os

In [21]:
mydir = '../../../Documents/PROJECTS/METHANE_BUDGET/2020/data/'
fname=mydir + 'Sankey_TD_2008-2017_31March2020.txt'

df_TD = pd.read_csv(fname, header=1, delim_whitespace=True)
df_TD

Unnamed: 0,proc,USA,Canada,Central_America,Northern_South_America,Brazil,Southwest_South_America,Europe,Northern_Africa,Equatorial_Africa,Southern_Africa,Russia,Central_Asia,Middle_East,China,Korean_Japan,South_Asia,Southeast_Asia,Oceania,GLO
Wetlands,mean,14,14,2,10,34,14,3,7,20,6,12,0,1,5,0,13,22,2,180
Wetlands,min,8,9,1,6,26,9,1,4,12,2,8,0,0,2,0,6,11,1,155
Wetlands,max,24,21,3,14,44,21,5,11,27,9,16,0,1,9,1,18,33,3,200
OtherNatural,mean,2,1,1,1,3,1,2,3,3,2,2,1,3,1,0,1,3,2,38
OtherNatural,min,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,1,21
OtherNatural,max,3,2,1,2,5,2,3,4,7,3,3,1,6,1,0,1,4,3,50
AgriWaste,mean,19,3,6,5,20,9,19,11,10,5,5,3,7,28,3,38,22,5,221
AgriWaste,min,14,2,5,4,11,6,13,10,6,4,4,3,5,23,2,24,20,3,207
AgriWaste,max,26,4,7,6,26,12,26,14,12,6,7,4,10,35,3,51,27,5,240
Fossil,mean,11,2,2,3,2,2,7,5,6,3,13,5,13,17,1,4,7,2,107


In [22]:
df_TD.rename(columns = {'proc':'stats'}, inplace = True)
df_TD.index.name = 'proc'
df_TD

Unnamed: 0_level_0,stats,USA,Canada,Central_America,Northern_South_America,Brazil,Southwest_South_America,Europe,Northern_Africa,Equatorial_Africa,Southern_Africa,Russia,Central_Asia,Middle_East,China,Korean_Japan,South_Asia,Southeast_Asia,Oceania,GLO
proc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Wetlands,mean,14,14,2,10,34,14,3,7,20,6,12,0,1,5,0,13,22,2,180
Wetlands,min,8,9,1,6,26,9,1,4,12,2,8,0,0,2,0,6,11,1,155
Wetlands,max,24,21,3,14,44,21,5,11,27,9,16,0,1,9,1,18,33,3,200
OtherNatural,mean,2,1,1,1,3,1,2,3,3,2,2,1,3,1,0,1,3,2,38
OtherNatural,min,1,0,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,1,21
OtherNatural,max,3,2,1,2,5,2,3,4,7,3,3,1,6,1,0,1,4,3,50
AgriWaste,mean,19,3,6,5,20,9,19,11,10,5,5,3,7,28,3,38,22,5,221
AgriWaste,min,14,2,5,4,11,6,13,10,6,4,4,3,5,23,2,24,20,3,207
AgriWaste,max,26,4,7,6,26,12,26,14,12,6,7,4,10,35,3,51,27,5,240
Fossil,mean,11,2,2,3,2,2,7,5,6,3,13,5,13,17,1,4,7,2,107


In [23]:
#Pivot table so that "stats" Mean, Min, Max become three columns under each region column
#and "proc" becomes index
df_TD_piv = df_TD.pivot(columns='stats', index=df_TD.index)
df_TD_piv

Unnamed: 0_level_0,USA,USA,USA,Canada,Canada,Canada,Central_America,Central_America,Central_America,Northern_South_America,...,South_Asia,Southeast_Asia,Southeast_Asia,Southeast_Asia,Oceania,Oceania,Oceania,GLO,GLO,GLO
stats,max,mean,min,max,mean,min,max,mean,min,max,...,min,max,mean,min,max,mean,min,max,mean,min
proc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AgriWaste,26,19,14,4,3,2,7,6,5,6,...,24,27,22,20,5,5,3,240,221,207
BioBurBiof,1,1,0,1,1,1,1,1,0,0,...,2,7,5,4,1,1,0,36,31,27
Fossil,18,11,9,4,2,1,3,2,1,5,...,2,9,7,5,2,2,1,131,107,81
OtherNatural,3,2,1,2,1,0,1,1,1,2,...,1,4,3,1,3,2,1,50,38,21
SumSources,62,47,34,30,20,16,16,12,9,27,...,42,67,60,47,15,11,8,594,576,540
Wetlands,24,14,8,21,14,9,3,2,1,14,...,6,33,22,11,3,2,1,200,180,155


In [24]:
df_TD_piv.columns[0][0]

'USA'

In [25]:
df_TD_piv.columns[0][0] #Bor_NAme
df_TD_piv.columns[3][0] #contUSA
df_TD_piv.columns[6][0] #CentName
df_TD_piv.columns[9][0] #Trop_SAme

'Northern_South_America'

In [35]:
list(df_TD)

['stats',
 'USA',
 'Canada',
 'Central_America',
 'Northern_South_America',
 'Brazil',
 'Southwest_South_America',
 'Europe',
 'Northern_Africa',
 'Equatorial_Africa',
 'Southern_Africa',
 'Russia',
 'Central_Asia',
 'Middle_East',
 'China',
 'Korean_Japan',
 'South_Asia',
 'Southeast_Asia',
 'Oceania',
 'GLO']

In [27]:
df_TD_piv['Canada']['mean']

proc
AgriWaste        3
BioBurBiof       1
Fossil           2
OtherNatural     1
SumSources      20
Wetlands        14
Name: mean, dtype: int64

In [28]:
df_TD_piv['Canada'].loc['AgriWaste']['mean']

3

In [29]:
df_TD_piv['Canada'].loc['BioBurBiof']['mean']

1

In [34]:
print(df_TD_piv.shape)
df_TD_piv

(6, 57)


Unnamed: 0_level_0,USA,USA,USA,Canada,Canada,Canada,Central_America,Central_America,Central_America,Northern_South_America,...,South_Asia,Southeast_Asia,Southeast_Asia,Southeast_Asia,Oceania,Oceania,Oceania,GLO,GLO,GLO
stats,max,mean,min,max,mean,min,max,mean,min,max,...,min,max,mean,min,max,mean,min,max,mean,min
proc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AgriWaste,26,19,14,4,3,2,7,6,5,6,...,24,27,22,20,5,5,3,240,221,207
BioBurBiof,1,1,0,1,1,1,1,1,0,0,...,2,7,5,4,1,1,0,36,31,27
Fossil,18,11,9,4,2,1,3,2,1,5,...,2,9,7,5,2,2,1,131,107,81
OtherNatural,3,2,1,2,1,0,1,1,1,2,...,1,4,3,1,3,2,1,50,38,21
SumSources,62,47,34,30,20,16,16,12,9,27,...,42,67,60,47,15,11,8,594,576,540
Wetlands,24,14,8,21,14,9,3,2,1,14,...,6,33,22,11,3,2,1,200,180,155


In [47]:
#Store region names in list
regions = list(df_TD)
if 'stats' in regions: regions.remove('stats')
if 'GLO' in regions: regions.remove('GLO')
numRegions = len(regions)

idx = 0
sources = []
for num in range(0,numRegions):
    print(num)
    sources.append(df_TD_piv.columns[idx][0])
    idx = idx + 3
   
print(sources)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
['USA', 'Canada', 'Central_America', 'Northern_South_America', 'Brazil', 'Southwest_South_America', 'Europe', 'Northern_Africa', 'Equatorial_Africa', 'Southern_Africa', 'Russia', 'Central_Asia', 'Middle_East', 'China', 'Korean_Japan', 'South_Asia', 'Southeast_Asia', 'Oceania']


In [51]:
#Get target list
targets = df_TD_piv.index.tolist()
targets

['AgriWaste', 'BioBurBiof', 'Fossil', 'OtherNatural', 'SumSources', 'Wetlands']

In [52]:
if 'SumSources' in targets: targets.remove('SumSources')
targets

['AgriWaste', 'BioBurBiof', 'Fossil', 'OtherNatural', 'Wetlands']

In [53]:
nodes = sources + targets
nodes

['USA',
 'Canada',
 'Central_America',
 'Northern_South_America',
 'Brazil',
 'Southwest_South_America',
 'Europe',
 'Northern_Africa',
 'Equatorial_Africa',
 'Southern_Africa',
 'Russia',
 'Central_Asia',
 'Middle_East',
 'China',
 'Korean_Japan',
 'South_Asia',
 'Southeast_Asia',
 'Oceania',
 'AgriWaste',
 'BioBurBiof',
 'Fossil',
 'OtherNatural',
 'Wetlands']

In [67]:
jname='../data/Sankey_TD_2003-2012_31mar2020_mean_noZeros.json'
file = open(jname, 'w')

file.write('{\n')
file.write('"nodes": [\n')
for node in nodes:
    file.write('{"name": "%s"},\n' %(node))
# remove last comma
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes
file.truncate()
file.write('\n],\n')

file.write('"links": [\n')

for source in sources:
    print(source)
    for target in targets:
        print(target)
        #print df_TD_piv[source].loc[target]['mean']
        value = df_TD_piv[source].loc[target]['mean']
        if value != -99:
            if value != 0:
                print(value)
                file.write('{"source": "%s", "target": "%s", "value": "%.2f"},\n' %(source, target, float(value)))

# remove last comma           
file.seek(0, os.SEEK_END)              # seek to end of file; f.seek(0, 2) is legal
file.seek(file.tell() - 2, os.SEEK_SET)   # go backwards 3 bytes  
file.truncate()
file.write('\n]\n')
file.write('}\n')

file.close()

USA
AgriWaste
19
BioBurBiof
1
Fossil
11
OtherNatural
2
Wetlands
14
Canada
AgriWaste
3
BioBurBiof
1
Fossil
2
OtherNatural
1
Wetlands
14
Central_America
AgriWaste
6
BioBurBiof
1
Fossil
2
OtherNatural
1
Wetlands
2
Northern_South_America
AgriWaste
5
BioBurBiof
Fossil
3
OtherNatural
1
Wetlands
10
Brazil
AgriWaste
20
BioBurBiof
2
Fossil
2
OtherNatural
3
Wetlands
34
Southwest_South_America
AgriWaste
9
BioBurBiof
1
Fossil
2
OtherNatural
1
Wetlands
14
Europe
AgriWaste
19
BioBurBiof
1
Fossil
7
OtherNatural
2
Wetlands
3
Northern_Africa
AgriWaste
11
BioBurBiof
2
Fossil
5
OtherNatural
3
Wetlands
7
Equatorial_Africa
AgriWaste
10
BioBurBiof
5
Fossil
6
OtherNatural
3
Wetlands
20
Southern_Africa
AgriWaste
5
BioBurBiof
3
Fossil
3
OtherNatural
2
Wetlands
6
Russia
AgriWaste
5
BioBurBiof
2
Fossil
13
OtherNatural
2
Wetlands
12
Central_Asia
AgriWaste
3
BioBurBiof
Fossil
5
OtherNatural
1
Wetlands
Middle_East
AgriWaste
7
BioBurBiof
Fossil
13
OtherNatural
3
Wetlands
1
China
AgriWaste
28
BioBurBiof
4
Fossil
17
O