Dependencies

In [1]:
#from functions_v2 import*
from methods import MethodFinder

import brightway2 as bw
import bw2data as bd
import bw2analyzer as ba
import bw2calc as bc

#reduce?
import ast
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import dopo
import activity_filter
from activity_filter import generate_sets_from_filters

import copy

Setup up bw project and databases

In [2]:
bd.projects.set_current("premise-validation-try1")
bw.bw2setup()

bio3=bw.Database('biosphere3')
ei39=bw.Database('ecoinvent 3.9.1 cutoff')
ei39SSP2=bw.Database('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27')

Biosphere database already present!!! No setup is needed


Setup method dictionary

In [3]:
#Get Methods
finder=MethodFinder()

finder.find_and_create_method(criteria=['IPCC', '2013', 'GWP100'], exclude=['no LT'])
finder.find_and_create_method(criteria=['EN15804','Cumulative', 'non-renewable' ])
# finder.find_and_create_method(criteria=['land occupation','selected'])
# finder.find_and_create_method(criteria=['EN15804','fresh water'])

method_dict=finder.get_all_methods()
method_dict

{'method_1': {'object': Brightway2 Method: IPCC 2013: climate change: global warming potential (GWP100),
  'method name': ('IPCC 2013',
   'climate change',
   'global warming potential (GWP100)'),
  'short name': 'global warming potential (GWP100)',
  'unit': 'kg CO2-Eq'},
 'method_2': {'object': Brightway2 Method: EN15804: inventory indicators ISO21930: Cumulative Energy Demand - non-renewable energy resources,
  'method name': ('EN15804',
   'inventory indicators ISO21930',
   'Cumulative Energy Demand - non-renewable energy resources'),
  'short name': 'Cumulative Energy Demand - non-renewable energy resources',
  'unit': 'megajoule'}}

Define sectors & setup databse dictionaries containing sector activity lists

In [4]:
cement = 'cement_small.yaml'
electricity = 'electricity_small.yaml'
fuels= 'fuels_small.yaml'
steel = 'steel_small.yaml'
transport = 'transport_small.yaml'

In [5]:
files_dict={}
files_dict['Cement']={'yaml': 'yamls\cement_small.yaml',
                      'yaml identifier': 'Cement'}
#files_dict['Electricity']= {'yaml':'yamls\electricity_small.yaml',
                            #'yaml identifier': 'Electricity'} #yaml identifier is the name of the filter in the yaml file, in the first line.
files_dict['Steel']={'yaml':'yamls\steel_small.yaml',
                     'yaml identifier': 'Steel'}
files_dict

{'Cement': {'yaml': 'yamls\\cement_small.yaml', 'yaml identifier': 'Cement'},
 'Steel': {'yaml': 'yamls\\steel_small.yaml', 'yaml identifier': 'Steel'}}

In [6]:
def process_yaml_files(files_dict, database):
    '''
    - Runs through the files_dict reading the defined filters in the yaml files.
    - With another function a list that contains the filtered activities is created from the chosen database.
    - This activity list is saved within the corresponding key (sector) in the dictionary main_dict which is based on the files_dict.

    :param files_dict: dictionary of dictionaries. It should hold the yaml file path and the title in the first row of the yaml file. 
                        Like so: files_dict['Cement']={'yaml': 'yamls\cement_small.yaml', 'yaml identifier': 'Cement'}
    :param database: premise or ecoinvent database of choice.

    It returns an updated dictionary which contains filtered activity lists for each sector.
    '''

    main_dict = copy.deepcopy(files_dict)

    for key, value in main_dict.items():
        yaml_file = value['yaml']
        yaml_identifier = value['yaml identifier']
        
        #debug
        print(f"Processing {key} with database {database.name}")
        
        # Generate the sector activities
        sector_activities = generate_sets_from_filters(yaml_file, database)
        
        #debug
        print(f"Activities for {key}:")
        for activity in sector_activities[yaml_identifier]:
            print(f"  {activity.key}")

        # Convert the set of activities to a list
        activities_list = list(sector_activities[yaml_identifier])
        
        # Add to the sectors_dict
        main_dict[key]['activities'] = activities_list
        
    return main_dict

In [7]:
premise_dict = process_yaml_files(files_dict=files_dict, database=ei39SSP2)

Processing Cement with database ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27
Activities for Cement:
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', 'df49e8f525497f2fbd56bcdc80ff0cde')
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', 'a3c2064d83411f7963af550c04c869a1')
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', '3c16b45db40210cd97de6574b2f47aaf')
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', 'fcb666edf2a01467e555eeff5b4a5bbb')
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', '86841f8c7ee2668f244d3b8e34f41932')
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', 'f8b84f45f50d3bd7ff4feaabdb493f6a')
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', '36a53c174f34e672bc15b7e55563685e')
Processing Steel with database ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27
Activities for Steel:
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', '2baa0deb3adc89dfe8cb89d5e078ba8d')
  ('ei_cutoff_3.9_image_SSP2-RCP19_2050 2024-06-27', 'af6bd1221fc0206541fbaf481397bf0d

In [8]:
eco_dict = process_yaml_files(files_dict=files_dict, database=ei39)

Processing Cement with database ecoinvent 3.9.1 cutoff
Activities for Cement:
  ('ecoinvent 3.9.1 cutoff', 'df49e8f525497f2fbd56bcdc80ff0cde')
  ('ecoinvent 3.9.1 cutoff', 'f8b84f45f50d3bd7ff4feaabdb493f6a')
  ('ecoinvent 3.9.1 cutoff', 'a3c2064d83411f7963af550c04c869a1')
  ('ecoinvent 3.9.1 cutoff', 'fcb666edf2a01467e555eeff5b4a5bbb')
  ('ecoinvent 3.9.1 cutoff', '86841f8c7ee2668f244d3b8e34f41932')
  ('ecoinvent 3.9.1 cutoff', '3c16b45db40210cd97de6574b2f47aaf')
  ('ecoinvent 3.9.1 cutoff', '36a53c174f34e672bc15b7e55563685e')
Processing Steel with database ecoinvent 3.9.1 cutoff
Activities for Steel:
  ('ecoinvent 3.9.1 cutoff', '2baa0deb3adc89dfe8cb89d5e078ba8d')
  ('ecoinvent 3.9.1 cutoff', '18b0dcf01dd401e1549b3796e3786213')
  ('ecoinvent 3.9.1 cutoff', '1dffacc9e0ca08fb55c6b780d7e677dc')
  ('ecoinvent 3.9.1 cutoff', 'af6bd1221fc0206541fbaf481397bf0d')


In [None]:
keys_prem = [activity.key for activity in premise_dict['Cement']['activities']]
keys_eco = [activity.key for activity in eco_dict['Cement']['activities']]

In [None]:
keys_prem

In [None]:
keys_eco

In [None]:
premise_dict['Cement']['activities'][0].key

In [None]:
eco_dict['Cement']['activities'][0].key

Calculate lca scores for each sectors activities, store them each in a dataframe

In [9]:
import pandas as pd

def lca_scores_compare(database_dict, method_dict):
    # Dictionary to store DataFrames for each sector
    sector_dataframes = {}

    # Labels for the DataFrame columns
    labels = [
        "activity",
        "activity key",
        "reference product",
        "location",
        "method name",
        "method unit",
        "total",
    ]

    # Loop through each sector in the database_dict
    for sector, sector_data in database_dict.items():
        # Initialize a dictionary to hold DataFrames for each method in the current sector
        method_dataframes = {}

        # Loop through each method in method_dict
        for meth_key, meth_info in method_dict.items():
            data = []  # Initialize a new list to hold data for the current method
            
            # Extract the 'method name' tuple from the current method info
            method_name = meth_info['method name']
            method_unit = meth_info['unit']

            # Now loop through each activity in the sector
            for act in sector_data['activities']:
                # Ensure the activity is an instance of the expected class
                if not isinstance(act, bd.backends.peewee.proxies.Activity):
                    raise ValueError("`activities` must be an iterable of `Activity` instances")
                
                # Perform LCA calculations
                lca = bw.LCA({act: 1}, method_name)
                lca.lci()
                lca.lcia()
                
                # Collect data for the current activity and method
                data.append([
                    act["name"],
                    act.key,
                    act.get("reference product"),
                    act.get("location", "")[:25],
                    method_name,
                    method_unit,
                    lca.score,
                ])
            
            # Convert the data list to a DataFrame and store it in the sector's dictionary
            method_dataframes[meth_key] = pd.DataFrame(data, columns=labels)

        # Store the method_dataframes dictionary in the sector_dataframes dictionary
        sector_dataframes[sector] = method_dataframes

    # Now `sector_dataframes` is a dictionary where each key is a sector, and the value is another dictionary with method names and their corresponding DataFrames
    return sector_dataframes


Ecoinvent scores

In [10]:
eco_scores=lca_scores_compare(eco_dict,method_dict)

In [None]:
eco_scores['Cement']

In [None]:
eco_scores['Cement']['method_1']

In [None]:
eco_scores['Steel']['method_2']

Premise scores

In [11]:
premise_scores=lca_scores_compare(premise_dict,method_dict) #dictionary containing sectors = keys and dataframes by method = values

In [None]:
premise_scores['Steel']['method_1'] #what is happening here?, something wrong with sector!

Relative changes

In [12]:
import pandas as pd

def relative_changes_df(ecoinvent_scores, premise_scores):

    dictionary = {}

    # Iterate over sectors
    for sector_key in ecoinvent_scores:
        # Initialize the sector key in the output dictionary
        if sector_key not in dictionary:
            dictionary[sector_key] = {}

        # Iterate over methods within the sector
        for method_key in ecoinvent_scores[sector_key]:
            # Check if the method_key exists in both dictionaries to avoid KeyError
            if method_key in premise_scores.get(sector_key, {}):
                # Get the corresponding DataFrames
                df_ei = ecoinvent_scores[sector_key][method_key]
                df_premise = premise_scores[sector_key][method_key]

                #print(df_ei['activity key'])
                #print(df_premise)

                # Split the 'activity key' to extract the second part
                df_ei['activity_code'] = df_ei['activity key'].apply(lambda x: x[1])  # Access the second element of the tuple
                df_premise['activity_code'] = df_premise['activity key'].apply(lambda x: x[1])

                # Merge the two dataframes based on the activity code and method name
                merged_df = pd.merge(df_ei, df_premise, on=['activity_code', 'method name'], suffixes=('_ei', '_premise'))

                # Calculate the relative change
                merged_df['relative_change'] = ((merged_df['total_premise'] - merged_df['total_ei']) / merged_df['total_ei']) * 100

                # Store the result in the dictionary
                dictionary[sector_key][method_key] = merged_df

    return dictionary


In [13]:
relative_dict = relative_changes_df(eco_scores, premise_scores)

In [None]:
relative_dict['Cement']['method_1']

In [29]:
from dopo_excel import add_sector_marker

# Prepare to save each LCA score table to a different worksheet in the same Excel file
excel_file = 'compare_tables_v7.xlsx'
column_positions = {} #stores the indexes of columns for plotting
with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
    for sector in relative_dict.keys():
        relative_changes = relative_dict[sector]
        
        for method, table in relative_changes.items():
            # Create a DataFrame for the current LCA score table
            df = pd.DataFrame(table)

            # Add sector marker
            df = add_sector_marker(df, sector) #!! ADJUST POSITION            

            # Sort the DataFrame by 'relative_change' from largest negative to largest positive
            df = df.sort_values(by='relative_change', ascending=False)

             # Add a 'rank' column based on the 'relative_change', ranking from most negative to least negative
            df['rank'] = df['relative_change'].rank(ascending=False, method='dense').astype(int)
 
            # Get the index values of columns
            columns_of_interest = ["rank", "relative_change", "method", "method unit", ]
            positions = {col: df.columns.get_loc(col) for col in columns_of_interest if col in df.columns}
            column_positions[method] = positions

            # Generate worksheet name
            worksheet_name = f"{sector}_{method}"
            if len(worksheet_name) > 31:
                worksheet_name = worksheet_name[:31]

            # Save the DataFrame to the Excel file in a new worksheet
            df.to_excel(writer, sheet_name=worksheet_name, index=False)
print(column_positions)

{'method_1': {'rank': 16, 'relative_change': 14}, 'method_2': {'rank': 16, 'relative_change': 14}}


Plots

In [18]:
from openpyxl import load_workbook

def categorize_sheets_by_sector(file_path):
    # Load the workbook
    workbook = load_workbook(filename=file_path, read_only=True)
    
    # Initialize a dictionary to hold sectors and their corresponding sheet names
    worksheet_dict = {}
    
    # Iterate over all sheet names in the workbook
    for sheet_name in workbook.sheetnames:
        # Split the sheet name to extract the sector (assumes sector is the first part)
        sector = sheet_name.split('_')[0]
        
        # Add the sheet name to the corresponding sector in the dictionary
        if sector in worksheet_dict:
            worksheet_dict[sector].append(sheet_name)
        else:
            worksheet_dict[sector] = [sheet_name]
    
    return worksheet_dict

In [33]:
import pandas as pd
import openpyxl
from openpyxl.chart import BarChart, Reference

def compare_database_charts(filename, worksheet_dict, index_positions=None):

    # Load the workbook and select the sheet
    wb = openpyxl.load_workbook(filename)

    # Iterate over each sector and its associated worksheets
    for sector, worksheet_names in worksheet_dict.items():
        
        # Create or get the chart sheet for the current sector
        chart_sheet_name = f"{sector}_charts"
        if chart_sheet_name in wb.sheetnames:
            ws_charts = wb[chart_sheet_name]
        else:
            ws_charts = wb.create_sheet(chart_sheet_name)  
        
        # Initial position for the first chart
        current_row = 1  # Start placing charts from row 1
        current_col = 1  # Start placing charts from column 1
        chart_height = 30  # Number of rows a chart occupies
        chart_width = 12   # Number of columns a chart occupies
        charts_per_row = 2  # Number of charts per row
    
        # Iterate over each worksheet name in the current sector
        for i, worksheet_name in enumerate(worksheet_names):
            ws = wb[worksheet_name]

            # # Find the key in index_positions that contains worksheet_name
            # matching_key = None
            # for key in index_positions.keys():
            #     if worksheet_name in key:
            #         matching_key = key
            #         break

            # if not matching_key:
            #     print(f"Warning: No matching key found for worksheet '{worksheet_name}'. Skipping...")
            #     continue

            # Retrieve the column positions from the index_positions dictionary
            # positions = index_positions[matching_key]

            # Find min_row, max_row and max_column
            min_col_data = 15 #positions.get("relative_change", None) + 1
            rank_col = 17#positions.get("rank", None) + 1
            method_col = 5#positions.get("method", None) + 1
            method_unit_col = 6#positions.get("method unit", None) + 1

            # Create a bar chart
            chart = BarChart()
            chart.type="bar"
            chart.style=2
            chart.overlap= 100
            chart.title = "Relative Change in LCA Scores"
            chart.x_axis.title = "Activity"
            chart.y_axis.title = "Relative Change (%)"

            # Set the data for the chart
            data = Reference(ws, min_col=min_col_data, min_row=1, max_row=ws.max_row)
            categories = Reference(ws, min_col=rank_col, min_row=2, max_row=ws.max_row)
            chart.add_data(data, titles_from_data=True)
            chart.set_categories(categories)

            # Modify each series in the chart to disable the inversion of negative values 
            for series in chart.series:
                series.invertIfNegative = False

            # x-axis tickes
            chart.x_axis.tickLblPos = "low"
            chart.x_axis.majorGridlines = None 
            chart.x_axis.tickMarkSkip = 1  # Show all tick marks, this adresses the tick lines 
            chart.x_axis.tickLblSkip = 1  # Show all labels, doesnt work
            chart.x_axis.delete = False  # Ensure axis is not deleted

            # Chart titles
            method_value = ws.cell(row=2, column=method_col).value
            chart.title = f"{sector} {method_value} database lca scores relative changes"

            method_unit_value = ws.cell(row=2, column=method_unit_col).value
            chart.x_axis.title = f"{method_unit_value}"
            
            chart.y_axis.title = 'relative change (%)' #its switched..... should be x_axis

            # Avoid overlap
            chart.title.overlay = False
            chart.x_axis.title.overlay = False
            chart.y_axis.title.overlay = False 
            chart.legend.overlay = False

            # Adjust chart dimensions
            chart.width = 20  # Width of the chart
            chart.height = 14  # Height of the chart

            # Calculate the position for this chart
            position = ws_charts.cell(row=current_row, column=current_col).coordinate
            ws_charts.add_chart(chart, position)

            # Update position for the next chart
            current_col += chart_width +1 
            if (i + 1) % charts_per_row == 0:  # Move to the next row after placing `charts_per_row` charts
                current_row += chart_height +1
                current_col = 1  # Reset to the first column

        # Move the chart sheet to the first position
        wb._sheets.remove(ws_charts)
        wb._sheets.insert(0, ws_charts)

            # Add the chart to a new worksheet
            # new_sheet = wb.create_sheet(title="LCA Chart")
            # new_sheet.add_chart(chart, "A1")

    # Save the workbook
    wb.save(filename)

    print(f"Results and chart saved to {filename}")

In [26]:
categorize_sheets_by_sector('compare_tables_v4.xlsx')

{'Cement': ['Cement_method_1', 'Cement_method_2'],
 'Steel': ['Steel_method_1', 'Steel_method_2'],
 'LCA Chart': ['LCA Chart']}

In [32]:
compare_database_charts('compare_tables_v7.xlsx',categorize_sheets_by_sector('compare_tables_v7.xlsx')) #index_positions=column_positions)

Results and chart saved to compare_tables_v7.xlsx
