In [42]:
import pandas as pd
from pathlib import Path
from datetime import datetime, timezone

In [7]:
def get_list(version):
    return pd.read_csv(
        Path.cwd().parent / 
        "Elementary flow lists" / 
        "outputs" / 
        f"ecoinvent-{version}" / 
        f"ecoinvent-{version}.csv"
    )

In [9]:
flows_36 = get_list("3.6")
flows_37 = get_list("3.7")
flows_38 = get_list("3.8")
flows_39 = get_list("3.9")

In [10]:
flows_36

Unnamed: 0,Flowable,CAS No,Formula,Synonyms,Unit,Class,ExternalReference,Preferred,Context,Flow UUID,AltUnit,Second CAS
0,"1,3-Dioxolan-2-one",0000096-49-1,,,kg,chemical,,,water|unspecified,5b7d620e-2238-5ec9-888a-6999218b6974,,96-49-1
1,"1,4-Butanediol",0000110-63-4,,Butylene glycol,kg,chemical,,,"air|low population density, long-term",d21da01e-f96f-4db5-9746-7b70db8a1f2c,,110-63-4|36684-44-3|28324-25-6
2,"1,4-Butanediol",0000110-63-4,,Butylene glycol,kg,chemical,,,air|lower stratosphere + upper troposphere,90653a29-2f53-4b1b-88bd-9ae2fe64a8d6,,110-63-4|36684-44-3|28324-25-6
3,"1,4-Butanediol",0000110-63-4,,Butylene glycol,kg,chemical,,,air|non-urban air or from high stacks,83bafcf1-2f2e-4a32-89a0-f1f16ca10626,,110-63-4|36684-44-3|28324-25-6
4,"1,4-Butanediol",0000110-63-4,,Butylene glycol,kg,chemical,,,air|unspecified,09db39be-d9a6-4fc3-8d25-1f80b23e9131,,110-63-4|36684-44-3|28324-25-6
...,...,...,...,...,...,...,...,...,...,...,...,...
4313,t-Butylamine,0000075-64-9,,,kg,chemical,,,water|surface water,a66849fd-060a-40b5-bd9d-04caa632b75c,,75-64-9
4314,t-Butylamine,0000075-64-9,,,kg,chemical,,,water|unspecified,365dffb2-9e7a-44fa-a512-8c851fc7094c,,75-64-9
4315,tau-Fluvalinate,0102851-06-9,,"(RS)-α-cyano-3-phenoxybenzyl N-(2-chloro-α,α,α...",kg,chemical,,,soil|agricultural,92681c17-2514-4151-aabf-baee3fd8e5f5,,102851-06-9
4316,"venting of argon, crude, liquid",,,,kg,,,,social|unspecified,73dbaeb8-6b92-490d-859c-88e8148c71c4,,


In [54]:
def merge_dataframes(source_version, target_version):
    source = get_list(source_version)
    target = get_list(target_version)
    source_list_name = f"ecoinvent-{source_version}.csv"
    target_list_name = f"ecoinvent-{target_version}.csv"
    
    merged = pd.merge(source, target, how='inner', on=['Flow UUID'], suffixes=('_source', '_target'))

    # Need to duplicate this column instead of renaming column
    merged["SourceFlowUUID"] = merged['Flow UUID']
    
    # Some flows won't match on UUID as the UUID changed
    included_uuids = merged['Flow UUID'].unique()

    # Get column names correct
    mapping = {
        "Context_source": "SourceFlowContext",
        "Context": "SourceFlowContext",
        "Context_target": "TargetFlowContext",
        "Flow UUID": "TargetFlowUUID",
        "Flow UUID_source": "SourceFlowUUID",
        "Flow UUID_target": "TargetFlowUUID",
        "Flowable": "SourceFlowName",
        "Flowable_source": "SourceFlowName",
        "Flowable_target": "TargetFlowName",
        "Unit_source": "SourceUnit",
        "Unit_target": "TargetUnit",
    }
    merged.rename(columns={k: v for k, v in mapping.items() if k in merged.columns}, inplace=True)
    
    extra = pd.merge(
        source[~source['Flow UUID'].isin(included_uuids)],
        target[~target['Flow UUID'].isin(included_uuids)],
        how='inner',
        on=['Flowable', 'Context'],
        suffixes=('_source', '_target')
    )
    # Need to duplicate this column instead of renaming column
    extra["TargetFlowName"] = extra['Flowable']
    extra["TargetContext"] = extra['Context']
    extra.rename(columns={k: v for k, v in mapping.items() if k in extra.columns}, inplace=True)
    
    left_mask_one = source.merge(
        target, 
        how='left', 
        on=['Flow UUID'], 
        suffixes=('_source', '_target')
    ).merge(
        target, 
        how='left', 
        on=['Flowable', 'Context'], 
        suffixes=('_source', '_target')
    )['Flowable_target'].isnull()
    
    mask_left = 
    missing_left = 
    
    data = pd.concat([merged, extra])

    changed_units = data["SourceUnit"] != data['TargetUnit']
    if changed_units.sum():
        from IPython.display import display
        print("Inconsistent units, fix manually")
        display(data[changed_units])
    
    # Add missing columns
    data['SourceListName'] = source_list_name
    data['TargetListName'] = target_list_name
    data['MatchCondition'] = "="
    data['ConversionFactor'] = 1.0
    data['Mapper'] = "Chris Mutel"
    data['Verifier'] = ""
    data['LastUpdated'] = datetime.now(timezone.utc).astimezone().isoformat()
    data['MemoMapper'] = 'Automated match. Notebook: Map ecoinvent EF lists'
    data['MemoSource'] = ''
    data['MemoTarget'] = ''
    data['MemoVerifier'] = ''

    SPEC_COLUMNS = [
        "SourceListName", "SourceFlowName", "SourceFlowUUID", "SourceFlowContext", "SourceUnit", 
        "MatchCondition", "ConversionFactor", "TargetListName", "TargetFlowName", "TargetFlowUUID", 
        "TargetFlowContext", "TargetUnit", "Mapper", "Verifier", "LastUpdated", "MemoMapper", 
        "MemoVerifier", "MemoSource", "MemoTarget"
    ]
    
    data = data[[col for col in SPEC_COLUMNS if col in data.columns]]
    data.sort_values(by=["SourceFlowName", "SourceFlowContext", "SourceUnit"], inplace=True, ignore_index=True)
    return data

In [60]:
PAIRS = [
    ("3.6", "3.7"),
    ("3.7", "3.8"),
    ("3.8", "3.9"),
]

In [65]:
for x, y in PAIRS:
    output = Path.cwd() / "outputs" / f"ecoinvent-{x}-ecoinvent-{y}"
    output.mkdir(parents=True, exist_ok=True)
    
    df = merge_dataframes(x, y)
    df.to_csv(output / f"ecoinvent-{x}-ecoinvent-{y}.csv", index=False)

Inconsistent units, fix manually


Unnamed: 0,SourceFlowName,CAS No_source,Formula_source,Synonyms_source,SourceUnit,Class_source,ExternalReference_source,Preferred_source,SourceFlowContext,TargetFlowUUID,...,Synonyms_target,TargetUnit,Class_target,ExternalReference_target,Preferred_target,TargetFlowContext,AltUnit_target,Second CAS_target,SourceFlowUUID,TargetContext
1845,"Gas, mine, off-gas, process, coal mining",0008006-14-2,,,m3,fossil/nuclear,,,natural resource|in ground,3ed5f377-344f-423a-b5ec-9a9a1162b944,...,,Sm3,fossil/nuclear,,,natural resource|in ground,,,3ed5f377-344f-423a-b5ec-9a9a1162b944,
1846,"Gas, natural, in ground",0008006-14-2,,,m3,fossil/nuclear,,,natural resource|in ground,7c337428-fb1b-45c7-bbb2-2ee4d29e17ba,...,,Sm3,,,,natural resource|in ground,,,7c337428-fb1b-45c7-bbb2-2ee4d29e17ba,
