# Scenario Comparison Data Processing (Fixed, Fully Updated Version)

## Block 1: Install and Import Libraries

In [1]:
# Install required libraries
!pip install pandas numpy matplotlib seaborn openpyxl

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
from pathlib import Path
from google.colab import files



## Block 2: Upload Files and Name Scenarios

In [2]:
print("Please upload your Excel files...")
uploaded = files.upload()

file_paths = list(uploaded.keys())
print("\nUploaded files:")
for i, filename in enumerate(file_paths):
    print(f"{i+1}. {filename}")

scenario_names = {}
print("\nPlease provide a scenario name for each file (press Enter to use filename without extension):")
for filename in file_paths:
    default_name = os.path.splitext(os.path.basename(filename))[0]
    user_input = input(f"Scenario name for '{filename}' [default: {default_name}]: ")
    scenario_name = user_input.strip() if user_input.strip() else default_name
    scenario_names[filename] = scenario_name

Please upload your Excel files...


Saving AO_25%_M4.XLSX to AO_25%_M4.XLSX
Saving NI_25%_M4.XLSX to NI_25%_M4.XLSX
Saving RD_25%_M4.XLSX to RD_25%_M4.XLSX
Saving RO_25%_M4.XLSX to RO_25%_M4.XLSX

Uploaded files:
1. AO_25%_M4.XLSX
2. NI_25%_M4.XLSX
3. RD_25%_M4.XLSX
4. RO_25%_M4.XLSX

Please provide a scenario name for each file (press Enter to use filename without extension):
Scenario name for 'AO_25%_M4.XLSX' [default: AO_25%_M4]: Aspergillus oryzae
Scenario name for 'NI_25%_M4.XLSX' [default: NI_25%_M4]: Neurospora intermedia
Scenario name for 'RD_25%_M4.XLSX' [default: RD_25%_M4]: Rhizopus delemar
Scenario name for 'RO_25%_M4.XLSX' [default: RO_25%_M4]: Rhizopus oryzae


## Block 3: Define Functions to Process Files

In [3]:
def extract_info_from_filename(filename):
    base = Path(filename).stem
    match = re.match(r"([A-Z]+)_(\d+%)_", base)
    if match:
        prefix, percentage = match.groups()
        return prefix, percentage
    else:
        return None, None

def process_excel_file(filename):
    print(f"\nProcessing {filename}...")
    df = pd.read_excel(filename, header=15)

    column_to_remove = df.columns[3]
    print(f"Removing column at position 4: {column_to_remove}")
    df = df.drop(column_to_remove, axis=1)

    prefix, percentage = extract_info_from_filename(filename)
    if not prefix or not percentage:
        print(f"Warning: Could not detect prefix or percentage for {filename}")

    dynamic_mappings = {
        'heating_energy': {
            'AO': 'heating_energy_perLiter_at_30',
            'NI': 'heating_energy_perLiter_at_30',
            'RD': 'heating_energy_perLiter_at_30',
            'RO': 'heating_energy_perLiter_at_37',
        },
        'drying_energy': {
            'AO': 'AO_drying_energy_perKg',
            'NI': 'NI_drying_energy_perKg',
            'RD': 'RD_drying_energy_perKg',
            'RO': 'RO_drying_energy_perKg',
        },
        'filtration_energy': {
            'AO': 'AO_filtration_energy_perKg',
            'NI': 'NI_filtration_energy_perKg',
            'RD': 'RD_filtration_energy_perKg',
            'RO': 'RO_filtration_energy_perKg',
        },
    }

    processes = [
        f"Thin stillage {percentage} for 1 kg",
        dynamic_mappings['heating_energy'].get(prefix),
        dynamic_mappings['drying_energy'].get(prefix),
        dynamic_mappings['filtration_energy'].get(prefix)
    ]

    wastewater_cols = [col for col in df.columns if isinstance(col, str) and 'Wastewater Treatment for 1 kg' in col]
    if wastewater_cols:
        print(f"Found wastewater column: {wastewater_cols[0]}")
        processes.append(wastewater_cols[0])

    sodium_cols = [col for col in df.columns if isinstance(col, str) and 'Sodium hydroxide' in col]
    if sodium_cols:
        print(f"Found pH adjustment column: {sodium_cols[0]}")
        processes.append(sodium_cols[0])

    co2_cols = [col for col in df.columns if isinstance(col, str) and 'CO2 Release of Fermentation' in col]
    for co2_col in co2_cols:
        if co2_col in df.columns:
            df = df.drop(co2_col, axis=1)
            print(f"Removed CO2 fermentation column: {co2_col}")

    selected_categories = [
        'Global warming',
        'Terrestrial acidification',
        'Freshwater eutrophication',
        'Marine eutrophication',
        'Land use',
        'Fossil resource scarcity',
        'Water consumption'
    ]

    df_selected = df[df['Impact category'].isin(selected_categories)]

    display_columns = ['Impact category', 'Total']
    if 'Unit' in df.columns:
        display_columns.append('Unit')
    display_columns += [proc for proc in processes if proc in df.columns]

    return df_selected[display_columns]


## Block 4: Process All Files

In [4]:
all_results = {}

for file_path in file_paths:
    if file_path in scenario_names:
        scenario_name = scenario_names[file_path]
        print(f"\nProcessing scenario '{scenario_name}' from file: {file_path}")
        try:
            processed_df = process_excel_file(file_path)
            all_results[scenario_name] = processed_df
        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")


Processing scenario 'Aspergillus oryzae' from file: AO_25%_M4.XLSX

Processing AO_25%_M4.XLSX...
Removing column at position 4: AO_25%_TS_paccino_ph_process_1kg
Found wastewater column: AO25%_Wastewater Treatment for 1 kg
Found pH adjustment column: Sodium hydroxide, without water, in 50% solution state {GLO}| market for | APOS, S
Removed CO2 fermentation column: CO2 Release of Fermentation (thin stillage 25%)

Processing scenario 'Neurospora intermedia' from file: NI_25%_M4.XLSX

Processing NI_25%_M4.XLSX...
Removing column at position 4: NI_25%_TS_paccino_ph_process_1kg
Found wastewater column: NI25%_Wastewater Treatment for 1 kg
Found pH adjustment column: Sodium hydroxide, without water, in 50% solution state {GLO}| market for | APOS, S
Removed CO2 fermentation column: CO2 Release of Fermentation (thin stillage 25%)

Processing scenario 'Rhizopus delemar' from file: RD_25%_M4.XLSX

Processing RD_25%_M4.XLSX...
Removing column at position 4: RD_25%_TS_paccino_ph_process_1kg
Found w

## Block 5: Save and Download Outputs

In [5]:
output_folder = 'lca_results'
os.makedirs(output_folder, exist_ok=True)

saved_files = []

for scenario_name, df_result in all_results.items():
    clean_name = scenario_name.replace(' ', '_')
    output_file = f"{output_folder}/{clean_name}_all_impacts.csv"
    df_result.to_csv(output_file, index=False)
    saved_files.append(output_file)
    print(f"Saved: {output_file}")

print(f"\nSaved {len(saved_files)} CSV files in the '{output_folder}' folder.")

print("\nDownloading CSV files to your computer...")
for filepath in saved_files:
    files.download(filepath)

print("Process completed successfully!")

Saved: lca_results/Aspergillus_oryzae_all_impacts.csv
Saved: lca_results/Neurospora_intermedia_all_impacts.csv
Saved: lca_results/Rhizopus_delemar_all_impacts.csv
Saved: lca_results/Rhizopus_oryzae_all_impacts.csv

Saved 4 CSV files in the 'lca_results' folder.

Downloading CSV files to your computer...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Process completed successfully!
