# USEPA to Exiobase 

In [1]:
import pathlib

In [2]:
import pandas as pd

In [3]:
import fedelemflowlist

In [4]:
ef_usepa = pd.read_csv('../correspondence_tables/data/raw/USEPA_EF_20200120.csv',low_memory=False)

we'd need to know what makes a unique UUID, seems to be `flowable` and `context`

In [5]:
assert ef_usepa.set_index('Flow UUID')[['Flowable','Context']].duplicated().sum()==0

in principle we'd link to the preferred flows

In [6]:
ef_usepa.sample().T

Unnamed: 0,105706
Flowable,"C.I. Acid Red 18, trisodium salt"
CAS No,2611-82-7
Formula,C20H11N2Na3O10S3
Synonyms,"SX purple; 1,3-Naphthalenedisulfonic acid, 7-h..."
Unit,kg
Class,Chemicals
External Reference,https://ofmpub.epa.gov/sor_internet/registry/s...
Preferred,0
Context,emission/ground/human-dominated/commercial
Flow UUID,e8904a6f-17a9-3288-8390-b83704c0a9c1


Exiobase flows

In [7]:
exiobase_airborn = pd.read_excel('../correspondence_tables/data/raw/Classifications_v_3_3_17.xlsx',sheet_name='Emissions')

in principle, we should not care about the unit

In [8]:
exiobase_airborn.sample()

Unnamed: 0,Emission name,Unit,Compartment
62,C,tonnes,air


In [9]:
# it needs some cleaning
exiobase_airborn['Emission name'] = exiobase_airborn['Emission name'].str.strip()

store results in a dictionary with keys equal to pairs (emission name, compartment)

In [10]:
exiobase3_usepa={}

for i,row in exiobase_airborn.iterrows():
    exiobase3_usepa[(row['Emission name'],row['Compartment'])]=None

from inspection, we can see that not having a secondary context is an "non preferred" flowable. We'll have to stick to non-preferred

we can start by those whose name coincide with the formula

In [11]:
match1_1 = pd.merge(left = exiobase_airborn[exiobase_airborn.Compartment=='air'],
                  right=ef_usepa[ef_usepa.Context=='emission/air'],
                  left_on='Emission name',
                  right_on='Formula',how='inner')
len(match1_1)

18

In [12]:
match1_1=match1_1[['Emission name','Compartment','Flow UUID']]

In [13]:
exiobase3_usepa.update(match1_1.set_index(['Emission name','Compartment'])['Flow UUID'].to_dict())

In [14]:
match1_2 = pd.merge(left = exiobase_airborn[exiobase_airborn.Compartment=='soil'],
                  right=ef_usepa[ef_usepa.Context=='emission/ground'],
                  left_on='Emission name',
                  right_on='Formula',how='inner')
len(match1_2)

10

In [15]:
match1_2=match1_2[['Emission name','Compartment','Flow UUID']]

In [16]:
exiobase3_usepa.update(match1_2.set_index(['Emission name','Compartment'])['Flow UUID'].to_dict())

this stores more than one value, but we'll clean it after for the values that only have one

same flowable name

In [17]:
'''dict of flowable uuid where the flowable name coincides with exiobase name. 
there is no more than one because that would break the set_index call due to 
repeated elements''' 
match2_1 = ef_usepa.loc[ef_usepa.Flowable.isin(list(exiobase_airborn.loc[exiobase_airborn.Compartment=='air','Emission name']))&
             (ef_usepa.Context=='emission/air'),['Flowable','Flow UUID']]\
.set_index('Flowable')['Flow UUID'].to_dict()

In [18]:
for flowable,uuid in match2_1.items():
    exiobase3_usepa[(flowable,'air')]=uuid

In [19]:
'''dict of flowable uuid where the flowable name coincides with exiobase name. 
there is no more than one because that would break the set_index call due to 
repeated elements''' 
match2_2 = ef_usepa.loc[ef_usepa.Flowable.isin(list(exiobase_airborn.loc[exiobase_airborn.Compartment=='soil','Emission name']))&
             (ef_usepa.Context=='emission/ground'),['Flowable','Flow UUID']]\
.set_index('Flowable')['Flow UUID'].to_dict()

In [20]:
match2_2

{}

manual matching

In [21]:
exiobase3_usepa[('CO ', 'air')] = ef_usepa.loc[(ef_usepa.Flowable.str.contains('Carbon monoxide'))&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [22]:
exiobase3_usepa[('Carbon dioxide, fossil', 'air')] = ef_usepa.loc[(ef_usepa.Formula=='CO2')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [23]:
exiobase3_usepa[('Carbon dioxide, biogenic', 'air')] = ef_usepa.loc[(ef_usepa.Formula=='CO2')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [24]:
exiobase3_usepa[('DDT', 'air')]=ef_usepa.loc[ef_usepa.Flowable.str.contains('DDT')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [25]:
exiobase3_usepa[('SF6','air')]=ef_usepa.loc[(ef_usepa.Flowable=='Sulfur hexafluoride')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [26]:
# there are more dioxins too, more specific
exiobase3_usepa[('dioxin','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Dioxins')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [27]:
exiobase3_usepa[('HFCs','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Hydrofluorocarbons')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [28]:
exiobase3_usepa[('PFCs','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Perfluorocarbons')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [29]:
exiobase3_usepa[('NH3','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Ammonia')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

PM10 is PM bigger than 10 or smaller than then microns?

In [30]:
exiobase3_usepa[('PM10','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Particulate matter, ≤ 10μm')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [31]:
exiobase3_usepa[('SOx','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Sulfur oxides')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [32]:
exiobase3_usepa[('NOX','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Nitrogen oxides')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [33]:
# close matching, but not exact. 
exiobase3_usepa[('NMVOC','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Volatile organic compounds')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [34]:
exiobase3_usepa[('Benzo-[b]-fluoranthene','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Benzo(b)fluoranthene')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [35]:
exiobase3_usepa[('Benzo-[k]-fluoranthene','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Benzo[k]fluoranthene')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [36]:
exiobase3_usepa[('Benzo-[a]-pyrene','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Benzo[a]pyrene')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [37]:
exiobase3_usepa[('PCB','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Polychlorinated biphenyls')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [38]:
exiobase3_usepa[('Formaldehyd','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Formaldehyde')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [39]:
exiobase3_usepa[('PM2.5','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Particulate matter, ≤ 2.5μm')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [40]:
exiobase3_usepa[('1,3 Butadiene','air')] = ef_usepa.loc[(ef_usepa.Flowable=='1,3-Butadiene')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [41]:
exiobase3_usepa[('Indeno-[1,2,3-cd]-pyrene','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Indeno[1,2,3-cd]pyrene')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [42]:
exiobase3_usepa[('HCB','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Hexachlorobenzene')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [43]:
exiobase3_usepa[('BOD','water')] = ef_usepa.loc[(ef_usepa.Flowable=='Biological Oxygen Demand')&(ef_usepa.Context=='emission/water'),'Flow UUID'].values

In [44]:
exiobase3_usepa[('N','water')] = ef_usepa.loc[(ef_usepa.Flowable=='Nitrogen')&(ef_usepa.Context=='emission/water'),'Flow UUID'].values

In [45]:
exiobase3_usepa[('N','soil')] = ef_usepa.loc[(ef_usepa.Flowable=='Nitrogen')&(ef_usepa.Context=='emission/ground'),'Flow UUID'].values

In [46]:
exiobase3_usepa[('P','water')] = ef_usepa.loc[(ef_usepa.Flowable=='Phosphorus')&(ef_usepa.Context=='emission/water'),'Flow UUID'].values

In [47]:
exiobase3_usepa[('CFCs', 'air')] = ef_usepa.loc[ef_usepa.Flowable.str.startswith('CFC-')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [48]:
exiobase3_usepa[('HCFCs', 'air')] = ef_usepa.loc[ef_usepa.Flowable.str.startswith('HCFC-')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [49]:
exiobase3_usepa[('HCH','air')] = ef_usepa.loc[(ef_usepa.Flowable.str.endswith('Hexachlorocyclohexane'))&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [50]:
# partial match
exiobase3_usepa[('PCDD/F (dioxins and furans)','air')] = ef_usepa.loc[(ef_usepa.Flowable=='Dioxins')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [51]:
# partial match
exiobase3_usepa[('HCH','air')] = ef_usepa.loc[(ef_usepa.Flowable.str.endswith('Hexachlorocyclohexane'))&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [52]:
# partial match
exiobase3_usepa[('PAH (total of 4 components, sum of EM_AIR.43, 45, 46, 47)','air')] =\
ef_usepa.loc[(ef_usepa.Flowable=='PAH/POM, unspecified')&(ef_usepa.Context=='emission/air'),'Flow UUID'].values

In [53]:
ef_usepa.loc[ef_usepa.Flowable.str.lower().str.contains('P')&(ef_usepa.Context=='emission/water'),:]

Unnamed: 0,Flowable,CAS No,Formula,Synonyms,Unit,Class,External Reference,Preferred,Context,Flow UUID,AltUnit,AltUnitConversionFactor


In [54]:
ef_usepa.loc[ef_usepa.Flowable.str.lower().str.contains('phosphorus')&(ef_usepa.Context=='emission/water'),:]

Unnamed: 0,Flowable,CAS No,Formula,Synonyms,Unit,Class,External Reference,Preferred,Context,Flow UUID,AltUnit,AltUnitConversionFactor
205967,Phosphorus,7723-14-0,P,,kg,Chemicals,https://ofmpub.epa.gov/sor_internet/registry/s...,0,emission/water,1733622b-b54a-3202-8e58-4670a96ef6b3,,
217631,Phosphorus pentoxide,1314-56-3,O10P4,Phosphorus oxide (P2O5),kg,Chemicals,https://ofmpub.epa.gov/sor_internet/registry/s...,0,emission/water,2d9e33c6-c46b-3a9e-a869-ae787ff2dc85,,
221663,Phosphorus-32,14596-37-3,-,"Phosphorus, isotope of mass 32",kg,Chemicals,https://ofmpub.epa.gov/sor_internet/registry/s...,0,emission/water,c86eae04-e6a9-3c8a-be9e-1878b4a12fdd,,


check because here are emissions to non-air too!

In [55]:
for (substance,archetype),equivalent in exiobase3_usepa.items():
    if equivalent is None:
        print(substance,archetype)


Hexabr.-biph. air
PBDEs air
TSP (total suspended particulate) air
other emissions undef
Other emissions nec air


Polychlorinated dibenzo(p)dioxin and furan (PCDD/F)

Hexabr.-biph appears to be Hexabromobiphenyl but there's no inventories about this

In [56]:
exiobase3_usepa_known = {k:v for k,v in exiobase3_usepa.items() if v is not None}

In [57]:
single_values = {k:v[0] for k,v in exiobase3_usepa_known.items() if len(v)==1}

In [58]:
exiobase3_usepa_known.update(single_values)

In [59]:
for (substance,compartment),uuid in exiobase3_usepa_known.items():
    
    if isinstance(uuid,str):
        toprint_list=[substance,ef_usepa.loc[ef_usepa['Flow UUID']==uuid,'Flowable'].values[0],compartment,ef_usepa.loc[ef_usepa['Flow UUID']==uuid,'Context'].values[0]]
        print(' - '.join(toprint_list))
    

Carbon dioxide, fossil - Carbon dioxide - air - emission/air
N2O - Nitrous oxide - air - emission/air
CH4 - Methane - air - emission/air
HFCs - Hydrofluorocarbons - air - emission/air
PFCs - Perfluorocarbons - air - emission/air
SF6 - Sulfur hexafluoride - air - emission/air
NOX - Nitrogen oxides - air - emission/air
SOx - Sulfur oxides - air - emission/air
NH3 - Ammonia - air - emission/air
NMVOC - Volatile organic compounds - air - emission/air
CO - Carbon monoxide - air - emission/air
Pb - Lead - air - emission/air
Cd - Cadmium - air - emission/air
Hg - Mercury(II) - air - emission/air
As - Arsenic - air - emission/air
Cr - Chromium - air - emission/air
Cu - Copper - air - emission/air
Ni - Nickel - air - emission/air
Se - Selenium - air - emission/air
Zn - Zinc - air - emission/air
Aldrin - Aldrin - air - emission/air
Chlordane - Chlordane - air - emission/air
Chlordecone - Chlordecone - air - emission/air
Dieldrin - Dieldrin - air - emission/air
Endrin - Endrin - air - emission/ai

zut, there is `Hg - Mercury(II) - soil - emission/air` and other metals

now reverse and unpack

In [60]:
usepa_2_exiobase= {}
for flow,uuids in exiobase3_usepa_known.items():
    
    if isinstance(uuids,str):
        usepa_2_exiobase[uuids]=flow
    else:
        for uuid in uuids:
            usepa_2_exiobase[uuid]=flow

In [61]:
correspondence = pd.Series(usepa_2_exiobase).to_frame('exiobase 3 (flow,context)')

In [62]:
correspondence.index.name='USEPA uuid'

In [63]:
correspondence.sample()

Unnamed: 0_level_0,"exiobase 3 (flow,context)"
USEPA uuid,Unnamed: 1_level_1
cfee0524-7ad6-300b-b050-6249135a2492,"(N2O, air)"


In [64]:
correspondence.to_csv('../correspondence_tables/data/intermediate/lcacommons_exiobase_ef.csv')

***

## Resources
all the resources have at least one non-zero value

In [65]:
exiobase_classification_d =pd.read_excel('../correspondence_tables/data/raw/Classifications_v_3_3_17.xlsx',sheet_name=None)

In [66]:
[k for k in exiobase_classification_d.keys()]

['Resources',
 'Land',
 'Emissions',
 'waste',
 'Crop_residues',
 'Country',
 'Activities',
 'Products_HSUTs',
 'Products_HIOT',
 'Final_demand',
 'Correspondence_products']

In [67]:
exiobase_resources = exiobase_classification_d['Resources']

In [68]:
exiobase_resources.sample(5)

Unnamed: 0,Resource name,Unit
2,Building stones,tonne
18,Salt,tonne
28,Anthracite,tonne
8,Inland waters fish catch,tonne
22,Uranium and thorium ores,tonne


In [69]:
exiobase3_usepa_resources={}

for i,row in exiobase_resources.iterrows():
    exiobase3_usepa_resources[row['Resource name']]=None

In [71]:
flowable_list = list(ef_usepa['Flowable'].unique())

In [72]:
exiobase3_usepa_resources['Slate'] = ef_usepa.loc[(ef_usepa['Flowable']=='Slate')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [73]:
# there is only natural gas in air, which is a bit strange
exiobase3_usepa_resources['Natural gas'] = ef_usepa.loc[(ef_usepa['Flowable']=='Natural gas')&(ef_usepa.Context=='resource/air'),'Flow UUID'].values

In [74]:
exiobase3_usepa_resources['Peat'] = ef_usepa.loc[(ef_usepa['Flowable']=='Peat')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [75]:
exiobase3_usepa_resources['Peat'] = ef_usepa.loc[(ef_usepa['Flowable']=='Peat')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

there seems to be Oxygen just as an emission, not a resource.

In [76]:
#exiobase3_usepa_resources['Oxygen'] = 
ef_usepa.loc[(ef_usepa['Flowable']=='Oxygen')&(ef_usepa.Context=='emission/ground'),:]

Unnamed: 0,Flowable,CAS No,Formula,Synonyms,Unit,Class,External Reference,Preferred,Context,Flow UUID,AltUnit,AltUnitConversionFactor
144615,Oxygen,7782-44-7,O2,,kg,Chemicals,https://ofmpub.epa.gov/sor_internet/registry/s...,0,emission/ground,2ebb9865-03df-3f23-bf37-bdfcc29024cd,,


In [77]:
# close match
exiobase3_usepa_resources['Bauxite and aluminium ores'] =\
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('bauxite'))&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [78]:
exiobase3_usepa_resources['Copper ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Copper ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [79]:
exiobase3_usepa_resources['Gold ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Gold ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [80]:
exiobase3_usepa_resources['Iron ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Iron ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [81]:
exiobase3_usepa_resources['Lead ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Lead ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [82]:
exiobase3_usepa_resources['Nickel ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Nickel ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [83]:
exiobase3_usepa_resources['Tin ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Tin ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [84]:
exiobase3_usepa_resources['Zinc ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Zinc ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [85]:
exiobase3_usepa_resources['Silver ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Silver ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [86]:
# partial match
exiobase3_usepa_resources['Uranium and thorium ores'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Uranium ore')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [87]:
exiobase3_usepa_resources['Sub-bituminous coal'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Coal, sub-bituminous')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [88]:
exiobase3_usepa_resources['Anthracite'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Coal, anthracite')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [89]:
# partial match
exiobase3_usepa_resources['Lignite/brown coal'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Coal, lignite')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [90]:
# partial match ?
exiobase3_usepa_resources['Other bituminous coal'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Coal, bituminous')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [91]:
exiobase3_usepa_resources['Building stones'] =\
ef_usepa.loc[(ef_usepa['Flowable']=='Stone')&(ef_usepa.Context=='resource/ground'),'Flow UUID'].values

In [92]:
c1 = (ef_usepa['Flowable']=='Limestone')
c2 = (ef_usepa['Flowable']=='Gypsum')
c3 = (ef_usepa['Flowable']=='Chalk')
c4 = (ef_usepa['Flowable']=='Dolomite')
cground= (ef_usepa.Context=='resource/ground')

In [93]:
exiobase3_usepa_resources['Limestone, gypsum, chalk, dolomite'] =\
ef_usepa.loc[(c1|c2|c3|c4)&cground,'Flow UUID'].values

In [94]:
ef_usepa.loc[(ef_usepa['Flowable']=='Limestone')&(ef_usepa.Context=='resource/ground'),'Flow UUID']

228339    13f50f4e-6aee-34f6-b3af-20503373f75d
Name: Flow UUID, dtype: object

In [95]:
ckaolin = (ef_usepa['Flowable']=='Kaoline')
cclay = (ef_usepa['Flowable']=='Clay')

exiobase3_usepa_resources['Clays and kaolin'] = ef_usepa.loc[(ckaolin|cclay)&cground,'Flow UUID'].values

In [96]:
cgravel = (ef_usepa['Flowable']=='Gravel')
csand = (ef_usepa['Flowable']=='Sand')
exiobase3_usepa_resources['Gravel and sand'] = ef_usepa.loc[(cgravel|csand)&cground,'Flow UUID'].values

In [135]:
exiobase3_usepa_resources['Natural gas'] = \
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('natural gas'))&(ef_usepa.Context=='resource/air/subterranean'),'Flow UUID'].values

In [136]:
exiobase3_usepa_resources['Carbon dioxide, in air'] = \
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('Carbon dioxide, in air'))&(ef_usepa.Context=='resource/air'),'Flow UUID'].values

In [114]:
# kind of close match. PGM is platinum group metals
exiobase3_usepa_resources['PGM ores']=\
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('platinum ore')),'Flow UUID'].values

In [118]:
exiobase3_usepa_resources['Crude oil']=\
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('crude oil')),'Flow UUID'].values

In [138]:
# guessing here
exiobase3_usepa_resources['Salt']=\
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('sodium chloride'))&(ef_usepa.Context.str.startswith('resource')),'Flow UUID'].values

coking grade-coal seems to be bitiminous mainly 

In [146]:
# close relationship
exiobase3_usepa_resources['Coking coal']=\
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('coal, bituminous'))&(ef_usepa.Context.str.startswith('resource')),'Flow UUID'].values

In [150]:
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('water'))&(ef_usepa.Context.str.startswith('resource')),'Context'].unique()

array(['resource/air', 'resource/air/subterranean',
       'resource/air/troposphere', 'resource/water/subterranean',
       'resource/water/subterranean/fresh water body',
       'resource/water/subterranean/brackish water body',
       'resource/water/subterranean/saline water body',
       'resource/water/subterranean/fresh water body/confined aquifer',
       'resource/water/subterranean/brackish water body/confined aquifer',
       'resource/water/subterranean/saline water body/confined aquifer',
       'resource/water/subterranean/fresh water body/unconfined aquifer',
       'resource/water/subterranean/brackish water body/unconfined aquifer',
       'resource/water/subterranean/saline water body/unconfined aquifer',
       'resource/water', 'resource/water/fresh water body',
       'resource/water/brackish water body',
       'resource/water/saline water body',
       'resource/water/fresh water body/lake',
       'resource/water/fresh water body/river',
       'resource/water/b

In [156]:
# water cons and water withdrawal ? what is the difference ?
exiobase3_usepa_resources['Water Consumption Blue']=\
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('water'))&(ef_usepa.Context.str.startswith('resource/water/subterranean')),'Flow UUID'].values

In [166]:
ef_usepa.loc[(ef_usepa.Class=='Biological'),'Flowable'].unique()

array(['Biomass', 'Hardwood', 'Softwood', 'Wood', 'Bacillus subtilis',
       'Bacillus thuringiensis var. aizawai',
       'Bacillus thuringiensis var. israeliensis',
       'Bacillus thuringiensis var. kurstaki', 'Entomopathogenic virus'],
      dtype=object)

In [158]:
[k for k,v in exiobase3_usepa_resources.items() if (v is None)]

['Aquatic plants',
 'Chemical and fertilizer minerals',
 'Inland waters fish catch',
 'Marine fish catch',
 'Other (e.g. Aquatic mammals)',
 'Other minerals',
 'Other non-ferrous metal ores',
 'Natural gas liquids',
 'Minerals nec. (incl nitrogen and hydrogen)',
 'Oxygen',
 'Water Consumption Green',
 'Water Withdrawal Blue']

the rest looks pretty hard to find

# Land

In [181]:
exiobase3_usepa_land={}

In [182]:
exiobase_classification_d['Land']

Unnamed: 0,Land type,Unit
0,"Land use, arable land",ha
1,"Land use, forest land",ha
2,"Land use, grassland",ha
3,"Land use, other land for woodfuel",ha
4,"Land use, infrastructure land",ha


In [183]:
ef_usepa.loc[(ef_usepa.Class=='Land'),:].sample(2)

Unnamed: 0,Flowable,CAS No,Formula,Synonyms,Unit,Class,External Reference,Preferred,Context,Flow UUID,AltUnit,AltUnitConversionFactor
238158,Land use,,,,m2*a,Land,,1,resource/water/fresh water body/river/urban,0e3e8b33-cb94-334f-8dd0-09cb08109d16,,
238143,Land use,,,,m2*a,Land,,0,resource/ground/human-dominated/rural,4b286b72-43b8-30c4-8b46-fa100dadd3b2,,


In [184]:
ef_usepa.loc[(ef_usepa['Flowable'].str.lower().str.contains('land')),'Context'].unique()

array(['resource/ground', 'resource/ground/subterranean',
       'resource/ground/human-dominated',
       'resource/ground/human-dominated/industrial',
       'resource/ground/human-dominated/commercial',
       'resource/ground/human-dominated/agricultural',
       'resource/ground/human-dominated/residential',
       'resource/ground/terrestrial/wetland',
       'resource/ground/terrestrial/barren land',
       'resource/ground/terrestrial/snow and ice',
       'resource/ground/terrestrial/grassland',
       'resource/ground/terrestrial/shrubland',
       'resource/ground/terrestrial/forest',
       'resource/ground/human-dominated/urban',
       'resource/ground/human-dominated/rural',
       'resource/ground/human-dominated/industrial/urban',
       'resource/ground/human-dominated/industrial/rural',
       'resource/ground/human-dominated/commercial/urban',
       'resource/ground/human-dominated/commercial/rural',
       'resource/ground/human-dominated/agricultural/urban',
    

In [187]:
exiobase3_usepa_land['Land use, arable land']=\
ef_usepa.loc[(ef_usepa.Class=='Land')&(ef_usepa.Context.str.contains('human-dominated/agricultural')),'Flow UUID'].values