In [None]:
# -*- coding: utf-8 -*-
"""
Basecase Scenario Data Processing Notebook (Fully Dynamic Version)
"""

# Block 1: Install and Import Libraries

# Install required libraries
!pip install pandas numpy matplotlib seaborn openpyxl

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
from pathlib import Path
from google.colab import files

# Block 2: Upload Files and Name Scenarios

print("Please upload your Excel files...")
uploaded = files.upload()

file_paths = list(uploaded.keys())
print("\nUploaded files:")
for i, filename in enumerate(file_paths):
    print(f"{i+1}. {filename}")

scenario_names = {}
print("\nPlease provide a scenario name for each file (press Enter to use filename without extension):")
for filename in file_paths:
    default_name = os.path.splitext(os.path.basename(filename))[0]
    user_input = input(f"Scenario name for '{filename}' [default: {default_name}]: ")
    scenario_name = user_input.strip() if user_input.strip() else default_name
    scenario_names[filename] = scenario_name

# Block 3: Define Functions to Process Files

# Define Key Variables (GLOBAL)
processes = [
    'Dilution & Agitation of Thin Stillage',
    'Drying',
    'Fermentation and Aeration',
    'Inoculum production for Fermentation',
    'Sterilization of Thin Stillage',
    'Water for dilution of Thin Stillage',
    'RO25%_Organic_Wastewater Treatment for 1 kg',
    'Water for biomass washing'
]

process_names = [
    'Dilution & Agitation',
    'Drying',
    'Fermentation',
    'Inoculum Preparation',
    'Sterilization',
    'Water for Dilution',
    'Wastewater',
    'Water for Washing'
]

selected_categories = [
    'Global warming',
    'Terrestrial acidification',
    'Freshwater eutrophication',
    'Marine eutrophication',
    'Land use',
    'Fossil resource scarcity',
    'Water consumption'
]

def process_excel_file(filename):
    print(f"\nProcessing {filename}...")

    # Try reading the file
    try:
        df = pd.read_excel(filename, header=15)
    except Exception as e:
        print(f"First attempt failed: {e}")
        try:
            print("Trying with openpyxl engine...")
            df = pd.read_excel(filename, header=15, engine='openpyxl')
        except Exception as e2:
            print(f"Second attempt failed: {e2}")
            try:
                print("Trying with xlrd engine...")
                df = pd.read_excel(filename, header=15, engine='xlrd')
            except Exception as e3:
                print(f"All attempts to read Excel file failed: {e3}")
                raise Exception("Could not read Excel file with any available engine.")

    print(f"Initial dataframe shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")

    # Drop 4th column if exists
    if len(df.columns) > 3:
        column_to_remove = df.columns[3]
        df = df.drop(column_to_remove, axis=1)
        print(f"Removed column at position 4: {column_to_remove}")

    # Dynamically match available process columns
    dynamic_processes = []
    for keyword in processes:
        matching_cols = [col for col in df.columns if isinstance(col, str) and keyword in col]
        if matching_cols:
            print(f"Found process column for '{keyword}': {matching_cols[0]}")
            dynamic_processes.append(matching_cols[0])
        else:
            print(f"Warning: No column found for '{keyword}'")

    # Remove CO2 columns if exist
    co2_cols = [col for col in df.columns if isinstance(col, str) and 'CO2 Release' in col]
    for co2_col in co2_cols:
        if co2_col in df.columns:
            df = df.drop(co2_col, axis=1)
            print(f"Removed CO2 emission column: {co2_col}")

    # Select impact categories
    if 'Impact category' not in df.columns:
        raise Exception("Missing 'Impact category' column in Excel file.")

    df_selected = df[df['Impact category'].isin(selected_categories)]

    # Prepare final display columns
    display_columns = ['Impact category', 'Total']
    if 'Unit' in df.columns:
        display_columns.append('Unit')
    display_columns += [proc for proc in dynamic_processes if proc in df.columns]

    print(f"Final selected columns: {display_columns}")
    return df_selected[display_columns]

# Block 4: Process All Files

all_results = {}

for file_path in file_paths:
    if file_path in scenario_names:
        scenario_name = scenario_names[file_path]
        print(f"\nProcessing scenario '{scenario_name}' from file: {file_path}")
        try:
            processed_df = process_excel_file(file_path)
            all_results[scenario_name] = processed_df
        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")

# Block 5: Save and Download Outputs

output_folder = 'lca_results'
os.makedirs(output_folder, exist_ok=True)

saved_files = []

for scenario_name, df_result in all_results.items():
    clean_name = scenario_name.replace(' ', '_')
    output_file = f"{output_folder}/{clean_name}_all_impacts.csv"
    df_result.to_csv(output_file, index=False)
    saved_files.append(output_file)
    print(f"Saved: {output_file}")

print(f"\nSaved {len(saved_files)} CSV files in the '{output_folder}' folder.")

print("\nDownloading CSV files to your computer...")
for filepath in saved_files:
    files.download(filepath)

print("Process completed successfully!")



Please upload your Excel files...


Saving low_yield_scenario.XLSX to low_yield_scenario.XLSX
Saving Wind_energy_scenario_model5.XLSX to Wind_energy_scenario_model5.XLSX
Saving Basecase_model5.XLSX to Basecase_model5.XLSX

Uploaded files:
1. low_yield_scenario.XLSX
2. Wind_energy_scenario_model5.XLSX
3. Basecase_model5.XLSX

Please provide a scenario name for each file (press Enter to use filename without extension):
Scenario name for 'low_yield_scenario.XLSX' [default: low_yield_scenario]: Low Yield Scenario
Scenario name for 'Wind_energy_scenario_model5.XLSX' [default: Wind_energy_scenario_model5]: Wind Energy Scenario
Scenario name for 'Basecase_model5.XLSX' [default: Basecase_model5]: Basecase

Processing scenario 'Low Yield Scenario' from file: low_yield_scenario.XLSX

Processing low_yield_scenario.XLSX...
Initial dataframe shape: (18, 13)
Columns: ['Impact category', 'Unit', 'Total', 'RO_1kg_model_5_full', 'CO2 Release from Fermentation', 'Dilution & Agitation of Thin Stillage', 'Drying', 'Fermentation and Aeration

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Process completed successfully!
