In [None]:
# Install required libraries
!pip install matplotlib
!pip install seaborn
!pip install plotly
!pip install pandas
!pip install numpy
!pip install openpyxl
!pip install xlsxwriter

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from google.colab import files
import io
import matplotlib
matplotlib.use('Agg')

Collecting xlsxwriter
  Downloading XlsxWriter-3.2.3-py3-none-any.whl.metadata (2.7 kB)
Downloading XlsxWriter-3.2.3-py3-none-any.whl (169 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.3


In [None]:
# Upload the CSV file
def upload_csv():
    print("Please upload your water_consumption_data.csv file...")
    uploaded = files.upload()


    filename = next(iter(uploaded))
    print(f"Processing file: {filename}")


    df = pd.read_csv(filename)


    print("\nPreview of the uploaded data:")
    display(df.head())


    print("\nAll available columns in the dataset:")
    for col in df.columns:
        print(f"- {col}")

    return df

In [None]:
def create_waterfall_chart(df):
    """
    Create a waterfall chart from the given dataframe

    Parameters:
    df (pandas.DataFrame): DataFrame containing water consumption data
    """
    # Define the process columns based on CSV structure
    dynamic_processes = [
        'RD_Thin stillage 25% for 1 kg',
        'heating_energy_perLiter_at_30',
        'Stirring_energy_perLiter',
        'RD_drying_energy_perKg',
        'RD_filtration_energy_perKg',
        'RD25%_Wastewater Treatment for 1 kg',
        'Sodium hydroxide, without water, in 50% solution state {GLO}| market for | APOS, S'
    ]

    # Create more readable display names for the processes
    column_to_process_name = {
        'RD_Thin stillage 25% for 1 kg': 'Thin Stillage',
        'heating_energy_perLiter_at_30': 'Heating',
        'Stirring_energy_perLiter': 'Stirring',
        'RD_drying_energy_perKg': 'Drying',
        'RD_filtration_energy_perKg': 'Filtration',
        'RD25%_Wastewater Treatment for 1 kg': 'Wastewater',
        'Sodium hydroxide, without water, in 50% solution state {GLO}| market for | APOS, S': 'pH adjustment'
    }

    # Set up the styling for the plot
    plt.rcParams.update({
        # Figure size and layout
        'figure.figsize': (6.5, 4.0),

        # Font settings
        'font.family': 'serif',
        'font.size': 10,
        'axes.labelsize': 10,
        'axes.titlesize': 11,
        'xtick.labelsize': 9,
        'ytick.labelsize': 9,
        'legend.fontsize': 9,

        # Axes settings
        'axes.spines.top': True,
        'axes.spines.right': True,
        'axes.linewidth': 0.5,

        # Grid settings
        'grid.linestyle': '--',
        'grid.alpha': 0.3,

        # Legend settings
        'legend.frameon': True,
        'legend.framealpha': 0.7,
        'legend.handlelength': 4.0,

        # Output settings
        'savefig.dpi': 300,
        'savefig.bbox': 'tight',
        'savefig.pad_inches': 0.05,
    })

    # Colorblind-friendly palette
    plt.rcParams['axes.prop_cycle'] = plt.cycler(color=[
        '#4C72B0', '#55A868', '#C44E52', '#8172B3',
        '#CCB974', '#64B5CD', '#AD8BC9', '#B0724E'
    ])

    if not df.empty:
        # Check if all required columns exist in the dataframe
        missing_columns = [col for col in dynamic_processes if col not in df.columns]
        if missing_columns:
            print(f"Error: The following columns are missing from the dataframe: {missing_columns}")
            print("Available columns: ", df.columns.tolist())
            return False

        # Get water values for each process
        water_values = df[dynamic_processes].values[0]

        # Create display names for the processes
        display_names = [column_to_process_name.get(col, col) for col in dynamic_processes]

        # Create a new figure with a clean slate
        plt.figure(figsize=(6.5, 4.0))

        # Create bar plot
        bars = plt.bar(display_names, water_values)

        # Custom coloring for water consumption/saving
        positive_color = '#C44E52'  # Red from palette
        negative_color = '#4C72B0'  # Blue from palette

        for i, bar in enumerate(bars):
            if water_values[i] >= 0:
                bar.set_color(positive_color)
            else:
                bar.set_color(negative_color)

        # Add legend entries with custom colors
        plt.plot([], [], color=positive_color, label='Water Consumption')
        plt.plot([], [], color=negative_color, label='Water Saving')

        # Add total water consumption as text
        total_water = df["Total"].values[0]
        plt.text(0.95, 0.95, f'Total: {total_water:.6f} {df["Unit"].values[0]}',
                transform=plt.gca().transAxes, ha='right', va='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

        # Customize plot
        plt.title('Water Consumption Process Contributions')
        plt.ylabel(f'Water Consumption ({df["Unit"].values[0]})')
        plt.xlabel('Process')

        # Adjust x-axis tick labels
        plt.xticks(rotation=45, ha='right')

        # Add a horizontal line at y=0
        plt.axhline(y=0, color='black', linestyle='-', linewidth=0.5)

        # Add grid for better readability
        plt.grid(axis='y', linestyle='--', alpha=0.3)

        # Add legend
        plt.legend(loc='best')

        # Ensure tight layout
        plt.tight_layout()

        # Display the figure in the notebook
        plt.show()

        # Save figures
        try:
            plt.savefig('water_consumption_waterfall.png', format='png', dpi=300, bbox_inches='tight')
            plt.savefig('water_consumption_waterfall.pdf', format='pdf', dpi=300, bbox_inches='tight')
            print("\nFigures saved successfully as 'water_consumption_waterfall.png' and 'water_consumption_waterfall.pdf'")

            # Download the files
            files.download('water_consumption_waterfall.png')
            files.download('water_consumption_waterfall.pdf')

            print("Files are available for download.")
            return True
        except Exception as e:
            print(f"Error saving figures: {e}")
            return False
        finally:
            plt.close()
    else:
        print("The dataframe is empty. Please check your CSV file.")
        return False

def main():
    """Main function to run the waterfall chart creation process"""
    # Upload CSV file and get dataframe
    df = upload_csv()

    # Create waterfall chart
    if df is not None:
        success = create_waterfall_chart(df)
        if success:
            print("Waterfall chart created successfully!")
        else:
            print("Failed to create waterfall chart.")

# Execute main function when script is run
if __name__ == "__main__":
    main()

Please upload your water_consumption_data.csv file...


Saving water_consumption_data.csv to water_consumption_data (2).csv
Processing file: water_consumption_data (2).csv

Preview of the uploaded data:


Unnamed: 0,Impact category,Unit,Total,RD_Thin stillage 25% for 1 kg,heating_energy_perLiter_at_30,Stirring_energy_perLiter,RD_drying_energy_perKg,RD_filtration_energy_perKg,RD25%_Wastewater Treatment for 1 kg,"Sodium hydroxide, without water, in 50% solution state {GLO}| market for | APOS, S"
0,Water consumption,m3,0.020154,0.071859,0.006528,0.000308,0.014885,8e-06,-0.116366,0.042933



All available columns in the dataset:
- Impact category
- Unit
- Total
- RD_Thin stillage 25% for 1 kg
- heating_energy_perLiter_at_30
- Stirring_energy_perLiter
- RD_drying_energy_perKg
- RD_filtration_energy_perKg
- RD25%_Wastewater Treatment for 1 kg
- Sodium hydroxide, without water, in 50% solution state {GLO}| market for | APOS, S

Figures saved successfully as 'water_consumption_waterfall.png' and 'water_consumption_waterfall.pdf'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Files are available for download.
Waterfall chart created successfully!
