In [None]:
# Import required library for file upload in Colab
from google.colab import files

# Upload the file
print("Click the 'Choose Files' button below to upload your Excel file")
uploaded = files.upload()

# Check if any file was uploaded
if not uploaded:
    print("No file was uploaded.")
else:

    filename = list(uploaded.keys())[0]
    print(f"Successfully uploaded: {filename}")
    print(f"File size: {len(uploaded[filename])} bytes")


    if not (filename.lower().endswith('.xlsx') or filename.lower().endswith('.xls')):
        print("Warning: The uploaded file doesn't appear to be an Excel file by extension.")
        print("However, we'll still try to process it as an Excel file.")
    else:
        print("Excel file detected. Ready for processing.")

    # Save the filename for use in the main script
    print(f"\nTo use this file in the main processing script, use the filename: '{filename}'")

Click the 'Choose Files' button below to upload your Excel file


Saving Basecase_model5.XLSX to Basecase_model5.XLSX
Successfully uploaded: Basecase_model5.XLSX
File size: 9626 bytes
Excel file detected. Ready for processing.

To use this file in the main processing script, use the filename: 'Basecase_model5.XLSX'


In [None]:
# Import only necessary libraries
import pandas as pd
import os
# Import Google Colab files module if running in Colab
try:
    from google.colab import files
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

# Define Key Variables
processes = [
    'Dilution & Agitation of Thin Stillage',
    'Drying',
    'Fermentation and Aeration',
    'Inoculum production for Fermentation',
    'Sterilization of Thin Stillage',
    'Water for dilution of Thin Stillage',
    'RO25%_Organic_Wastewater Treatment for 1 kg',
    'Water for biomass washing'
]

process_names = [
    'Dilution & Agitation',
    'Drying',
    'Fermentation',
    'Inoculum Preparation',
    'Sterilization',
    'Water for Dilution',
    'Wastewater',
    'Water for Washing'
]

# Define selected impact categories
selected_categories = [
    'Global warming',
    'Terrestrial acidification',
    'Freshwater eutrophication',
    'Marine eutrophication',
    'Land use',
    'Fossil resource scarcity',
    'Water consumption'
]

# File Processing Function
def process_simapro_excel(filename):
    print(f"Processing file: {filename}")

    # Read the Excel file, skipping the first 15 rows
    try:
        df = pd.read_excel(filename, header=15)
    except Exception as e:
        print(f"First attempt failed: {e}")
        try:
            print("Trying with openpyxl engine...")
            df = pd.read_excel(filename, header=15, engine='openpyxl')
        except Exception as e2:
            print(f"Second attempt failed: {e2}")
            try:
                print("Trying with xlrd engine...")
                df = pd.read_excel(filename, header=15, engine='xlrd')
            except Exception as e3:
                print(f"All attempts to read the Excel file failed.")
                print(f"Error details: {e3}")
                raise Exception("Could not read Excel file with any available engine.")

    print(f"Initial dataframe shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")

    if df.empty:
        raise Exception("Dataframe is empty. Please check the file format.")

    if 'Impact category' not in df.columns:
        print(f"Warning: 'Impact category' column not found. Available columns: {df.columns.tolist()}")
        raise Exception("Required column 'Impact category' not found in the Excel file.")

    if len(df.columns) > 3:
        column_to_remove = df.columns[3]
        df = df.drop(column_to_remove, axis=1)
        print(f"Removed column: {column_to_remove}")

    # Identify dynamic columns
    wastewater_columns = [col for col in df.columns if isinstance(col, str) and 'Wastewater Treatment for 1 kg' in col]
    wastewater_column = wastewater_columns[0] if wastewater_columns else None
    print(f"Wastewater column identified: {wastewater_column}")

    sodium_columns = [col for col in df.columns if isinstance(col, str) and 'Sodium hydroxide' in col]
    sodium_column = sodium_columns[0] if sodium_columns else None
    print(f"Sodium hydroxide column identified: {sodium_column}")

    # Prevent duplicates in dynamic_processes
    dynamic_processes = processes.copy()
    if wastewater_column and wastewater_column not in dynamic_processes:
        dynamic_processes.append(wastewater_column)
    if sodium_column and sodium_column not in dynamic_processes:
        dynamic_processes.append(sodium_column)

    # Keep only existing columns
    dynamic_processes = [proc for proc in dynamic_processes if proc in df.columns]
    print(f"Dynamic processes to analyze: {dynamic_processes}")

    # Remove CO2 fermentation column if it exists
    co2_columns = [col for col in df.columns if isinstance(col, str) and 'CO2 Release of Fermentation' in col]
    if co2_columns:
        for col in co2_columns:
            if col in df.columns:
                df = df.drop(col, axis=1)
                print(f"Removed CO2 column: {col}")

    # Create mapping from column names to display names
    column_to_process_name = {}
    for i, proc in enumerate(processes):
        if i < len(process_names) and proc in dynamic_processes:
            column_to_process_name[proc] = process_names[i]

    if wastewater_column and wastewater_column not in column_to_process_name:
        column_to_process_name[wastewater_column] = 'Wastewater'
    if sodium_column and sodium_column not in column_to_process_name:
        column_to_process_name[sodium_column] = 'pH adjustment'

    print("Column name to display name mapping:")
    for col, name in column_to_process_name.items():
        print(f"{col} -> {name}")

    # Filter by selected categories
    df_selected = df.loc[df['Impact category'].isin(selected_categories)]
    print(f"Selected impact categories found: {df_selected['Impact category'].unique().tolist()}")

    water_consumption_df = df_selected[df_selected['Impact category'] == 'Water consumption'].copy()
    df_without_water = df_selected[df_selected['Impact category'] != 'Water consumption'].copy()

    if 'Total' not in df.columns:
        print("Warning: 'Total' column not found. Cannot calculate percentage contributions.")
        df_selected['Total'] = df_selected[dynamic_processes].sum(axis=1)
        print("Created 'Total' column as sum of all process columns.")

    # Calculate contribution percentages
    try:
        df_pct_without_water = df_without_water[dynamic_processes].div(df_without_water['Total'], axis=0) * 100
        df_pct_for_plot = df_pct_without_water.copy()
        df_pct_for_plot.columns = [column_to_process_name.get(col, col) for col in df_pct_for_plot.columns]
        df_pct_for_plot.index = df_without_water['Impact category']
        heatmap_data = df_pct_for_plot.T
    except Exception as e:
        print(f"Error calculating percentages: {e}")
        df_pct_for_plot = pd.DataFrame()
        heatmap_data = pd.DataFrame()

    # Export processed data
    try:
        df_selected.to_csv('all_impact_categories.csv', index=False)
        water_consumption_df.to_csv('water_consumption_data.csv', index=False)
        df_without_water.to_csv('impact_categories_without_water.csv', index=False)

        if not df_pct_for_plot.empty:
            df_pct_for_plot.to_csv('process_contribution_pct.csv')
            heatmap_data.to_csv('heatmap_data_processes_by_impact.csv')

        print("Data processing complete. Files saved:")
        print("- all_impact_categories.csv")
        print("- water_consumption_data.csv")
        print("- impact_categories_without_water.csv")
        if not df_pct_for_plot.empty:
            print("- process_contribution_pct.csv")
            print("- heatmap_data_processes_by_impact.csv")
    except Exception as e:
        print(f"Error saving CSV files: {e}")

    return {
        'raw_df': df,
        'selected_impacts': df_selected,
        'water_consumption': water_consumption_df,
        'other_impacts': df_without_water,
        'percent_contribution': df_pct_for_plot,
        'heatmap_data': heatmap_data
    }

# Main execution block
if __name__ == "__main__":
    if IN_COLAB:
        import glob
        excel_files = glob.glob("*.xlsx") + glob.glob("*.xls") + glob.glob("*.XLSX") + glob.glob("*.XLS")
        if excel_files:
            print(f"Found Excel files: {excel_files}")
            if len(excel_files) == 1:
                filename = excel_files[0]
                print(f"Using {filename}")
            else:
                filename = input(f"Multiple Excel files found. Enter the one to use {excel_files}: ")
        else:
            print("No Excel files found. Please upload one.")
            try:
                uploaded = files.upload()
                if uploaded:
                    filename = list(uploaded.keys())[0]
                    print(f"Using uploaded file: {filename}")
                else:
                    print("No file uploaded.")
                    exit()
            except KeyboardInterrupt:
                print("\nUpload was interrupted. Please run the cell again or use a previously uploaded file.")
                exit()
    else:
        import sys
        if len(sys.argv) > 1:
            filename = sys.argv[1]
        else:
            filename = input("Enter the Excel file path: ")

    try:
        if os.path.exists(filename):
            results = process_simapro_excel(filename)
            if IN_COLAB:
                print("\nWould you like to download the output files? (y/n)")
                download_choice = input()
                if download_choice.lower() == 'y':
                    output_files = [
                        'all_impact_categories.csv',
                        'water_consumption_data.csv',
                        'impact_categories_without_water.csv',
                        'process_contribution_pct.csv',
                        'heatmap_data_processes_by_impact.csv'
                    ]
                    for file in output_files:
                        if os.path.exists(file):
                            files.download(file)
                            print(f"Downloaded: {file}")
        else:
            print(f"File not found: {filename}")
    except Exception as e:
        print(f"An error occurred during processing: {e}")
        print("Please check your file format and try again.")


Found Excel files: ['Basecase_model5.XLSX']
Using Basecase_model5.XLSX
Processing file: Basecase_model5.XLSX
Initial dataframe shape: (18, 13)
Columns: ['Impact category', 'Unit', 'Total', 'RO_1kg_model_5_full', 'CO2 Release from Fermentation', 'Dilution & Agitation of Thin Stillage', 'Drying', 'Fermentation and Aeration', 'Inoculum production for Fermentation', 'Sterilization of Thin Stillage', 'Water for dilution of Thin Stillage', 'RO25%_Organic_Wastewater Treatment for 1 kg', 'Water for biomass washing']
Removed column: RO_1kg_model_5_full
Wastewater column identified: RO25%_Organic_Wastewater Treatment for 1 kg
Sodium hydroxide column identified: None
Dynamic processes to analyze: ['Dilution & Agitation of Thin Stillage', 'Drying', 'Fermentation and Aeration', 'Inoculum production for Fermentation', 'Sterilization of Thin Stillage', 'Water for dilution of Thin Stillage', 'RO25%_Organic_Wastewater Treatment for 1 kg', 'Water for biomass washing']
Column name to display name mapping

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded: all_impact_categories.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded: water_consumption_data.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded: impact_categories_without_water.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded: process_contribution_pct.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded: heatmap_data_processes_by_impact.csv
