In [1]:
# Install required libraries (only run this once)
!pip install matplotlib
!pip install seaborn
!pip install plotly
!pip install pandas
!pip install numpy
!pip install openpyxl       # Excel file reading
!pip install xlsxwriter     # Excel file writing with multiple sheets

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from google.colab import files
import io
import matplotlib
matplotlib.use('Agg')  # Force Agg backend which is more reliable for saving

# Upload the CSV file
def upload_csv():
    print("Please upload your water_consumption_data.csv file...")
    uploaded = files.upload()

    # Load the CSV file into a DataFrame
    filename = next(iter(uploaded))
    print(f"Processing file: {filename}")

    # Load the CSV file
    df = pd.read_csv(filename)

    # Display the first few rows to verify data
    print("\nPreview of the uploaded data:")
    display(df.head())

    # Print all column names to make sure we're using the correct ones
    print("\nAll available columns in the dataset:")
    for col in df.columns:
        print(f"- {col}")

    return df

def create_waterfall_chart(df):
    """
    Create a waterfall chart from the given dataframe

    Parameters:
    df (pandas.DataFrame): DataFrame containing water consumption data
    """
    # Define the process columns based on your actual CSV structure

    process_columns = [col for col in df.columns if col not in ['Impact category', 'Unit', 'Total']]

    # Create more readable display names for the processes
    column_to_process_name = {
    'Dilution & Agitation of Thin Stillage': 'Dilution & Agitation',
    'Drying': 'Drying',
    'Fermentation and Aeration': 'Fermentation',
    'Inoculum production for Fermentation': 'Inoculum Preparation',
    'Sterilization of Thin Stillage': 'Sterilization',
    'Water for dilution of Thin Stillage': 'Water for Dilution',
    'RO25%_Organic_Wastewater Treatment for 1 kg': 'Wastewater',
    'Water for biomass washing': 'Water for Washing'
}


    # Set up the styling for the plot
    plt.rcParams.update({
        # Figure size and layout
        'figure.figsize': (6.5, 4.0),

        # Font settings
        'font.family': 'serif',
        'font.size': 10,
        'axes.labelsize': 10,
        'axes.titlesize': 11,
        'xtick.labelsize': 9,
        'ytick.labelsize': 9,
        'legend.fontsize': 9,

        # Axes settings
        'axes.spines.top': True,
        'axes.spines.right': True,
        'axes.linewidth': 0.5,

        # Grid settings
        'grid.linestyle': '--',
        'grid.alpha': 0.3,

        # Legend settings
        'legend.frameon': True,
        'legend.framealpha': 0.7,
        'legend.handlelength': 4.0,

        # Output settings
        'savefig.dpi': 300,
        'savefig.bbox': 'tight',
        'savefig.pad_inches': 0.05,
    })

    # Colorblind-friendly palette
    plt.rcParams['axes.prop_cycle'] = plt.cycler(color=[
        '#4C72B0', '#55A868', '#C44E52', '#8172B3',
        '#CCB974', '#64B5CD', '#AD8BC9', '#B0724E'
    ])

    if not df.empty:
        # Check if all required columns exist in the dataframe
        missing_columns = [col for col in process_columns if col not in df.columns]
        if missing_columns:
            print(f"Error: The following columns are missing from the dataframe: {missing_columns}")
            print("Available columns: ", df.columns.tolist())
            return False

        # Get water values for each process
        water_values = df[process_columns].values[0]

        # Create display names for the processes
        display_names = [column_to_process_name.get(col, col) for col in process_columns]

        # Create a new figure with a clean slate
        plt.figure(figsize=(6.5, 4.0))

        # Create bar plot
        bars = plt.bar(display_names, water_values)

        # Custom coloring for water consumption/saving
        positive_color = '#C44E52'  # Red from palette
        negative_color = '#4C72B0'  # Blue from palette

        for i, bar in enumerate(bars):
            if water_values[i] >= 0:
                bar.set_color(positive_color)
            else:
                bar.set_color(negative_color)

        # Add legend entries with custom colors
        plt.plot([], [], color=positive_color, label='Water Consumption')
        plt.plot([], [], color=negative_color, label='Water Saving')

        # Add total water consumption as text
        total_water = df["Total"].values[0]
        plt.text(0.95, 0.95, f'Total: {total_water:.6f} {df["Unit"].values[0]}',
                transform=plt.gca().transAxes, ha='right', va='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

        # Customize plot
        plt.title('Water Consumption Process Contributions')
        plt.ylabel(f'Water Consumption ({df["Unit"].values[0]})')
        plt.xlabel('Process')

        # Adjust x-axis tick labels
        plt.xticks(rotation=45, ha='right')

        # Add a horizontal line at y=0
        plt.axhline(y=0, color='black', linestyle='-', linewidth=0.5)

        # Add grid for better readability
        plt.grid(axis='y', linestyle='--', alpha=0.3)

        # Add legend
        plt.legend(loc='best')

        # Ensure tight layout
        plt.tight_layout()

        # Display the figure in the notebook
        plt.show()

        # Save figures
        try:
            plt.savefig('water_consumption_waterfall.png', format='png', dpi=300, bbox_inches='tight')
            plt.savefig('water_consumption_waterfall.pdf', format='pdf', dpi=300, bbox_inches='tight')
            print("\nFigures saved successfully as 'water_consumption_waterfall.png' and 'water_consumption_waterfall.pdf'")

            # Download the files
            files.download('water_consumption_waterfall.png')
            files.download('water_consumption_waterfall.pdf')

            print("Files are available for download.")
            return True
        except Exception as e:
            print(f"Error saving figures: {e}")
            return False
        finally:
            plt.close()
    else:
        print("The dataframe is empty. Please check your CSV file.")
        return False

def main():
    """Main function to run the waterfall chart creation process"""
    # Upload CSV file and get dataframe
    df = upload_csv()

    # Create waterfall chart
    if df is not None:
        success = create_waterfall_chart(df)
        if success:
            print("Waterfall chart created successfully!")
        else:
            print("Failed to create waterfall chart.")

# Execute main function when script is run
if __name__ == "__main__":
    main()

Collecting xlsxwriter
  Downloading XlsxWriter-3.2.3-py3-none-any.whl.metadata (2.7 kB)
Downloading XlsxWriter-3.2.3-py3-none-any.whl (169 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.3
Please upload your water_consumption_data.csv file...


Saving water_consumption_data.csv to water_consumption_data.csv
Processing file: water_consumption_data.csv

Preview of the uploaded data:


Unnamed: 0,Impact category,Unit,Total,CO2 Release from Fermentation,Dilution & Agitation of Thin Stillage,Drying,Fermentation and Aeration,Inoculum production for Fermentation,Sterilization of Thin Stillage,Water for dilution of Thin Stillage,RO25%_Organic_Wastewater Treatment for 1 kg,Water for biomass washing
0,Water consumption,m3,0.272979,0,6.4e-05,0.077654,0.183299,0.002412,0.013039,0.064122,-0.188174,0.120563



All available columns in the dataset:
- Impact category
- Unit
- Total
- CO2 Release from Fermentation
- Dilution & Agitation of Thin Stillage
- Drying
- Fermentation and Aeration
- Inoculum production for Fermentation
- Sterilization of Thin Stillage
- Water for dilution of Thin Stillage
- RO25%_Organic_Wastewater Treatment for 1 kg
- Water for biomass washing

Figures saved successfully as 'water_consumption_waterfall.png' and 'water_consumption_waterfall.pdf'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Files are available for download.
Waterfall chart created successfully!


In [2]:
# Install required libraries (only run this once)
!pip install matplotlib
!pip install seaborn
!pip install plotly
!pip install pandas
!pip install numpy
!pip install openpyxl       # Excel file reading
!pip install xlsxwriter     # Excel file writing with multiple sheets

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from google.colab import files
import io
import matplotlib
matplotlib.use('Agg')  # Force Agg backend which is more reliable for saving

# Upload the CSV file
def upload_csv():
    print("Please upload your water_consumption_data.csv file...")
    uploaded = files.upload()

    # Load the CSV file into a DataFrame
    filename = next(iter(uploaded))
    print(f"Processing file: {filename}")

    # Load the CSV file
    df = pd.read_csv(filename)

    # Remove the unwanted column
    if 'CO2 Release from Fermentation' in df.columns:
        df = df.drop(columns=['CO2 Release from Fermentation'])

    # Display the first few rows to verify data
    print("\nPreview of the uploaded data:")
    display(df.head())

    # Print all column names to make sure we're using the correct ones
    print("\nAll available columns in the dataset:")
    for col in df.columns:
        print(f"- {col}")

    return df

def create_waterfall_chart(df):
    """
    Create a waterfall chart from the given dataframe

    Parameters:
    df (pandas.DataFrame): DataFrame containing water consumption data
    """
    # Define the process columns based on your actual CSV structure

    process_columns = [col for col in df.columns if col not in ['Impact category', 'Unit', 'Total']]

    # Create more readable display names for the processes
    column_to_process_name = {
    'Dilution & Agitation of Thin Stillage': 'Dilution & Agitation',
    'Drying': 'Drying',
    'Fermentation and Aeration': 'Fermentation',
    'Inoculum production for Fermentation': 'Inoculum Preparation',
    'Sterilization of Thin Stillage': 'Sterilization',
    'Water for dilution of Thin Stillage': 'Water for Dilution',
    'RO25%_Organic_Wastewater Treatment for 1 kg': 'Wastewater',
    'Water for biomass washing': 'Water for Washing'
}


    # Set up the styling for the plot
    plt.rcParams.update({
        # Figure size and layout
        'figure.figsize': (6.5, 4.0),

        # Font settings
        'font.family': 'serif',
        'font.size': 10,
        'axes.labelsize': 10,
        'axes.titlesize': 11,
        'xtick.labelsize': 9,
        'ytick.labelsize': 9,
        'legend.fontsize': 9,

        # Axes settings
        'axes.spines.top': True,
        'axes.spines.right': True,
        'axes.linewidth': 0.5,

        # Grid settings
        'grid.linestyle': '--',
        'grid.alpha': 0.3,

        # Legend settings
        'legend.frameon': True,
        'legend.framealpha': 0.7,
        'legend.handlelength': 4.0,

        # Output settings
        'savefig.dpi': 300,
        'savefig.bbox': 'tight',
        'savefig.pad_inches': 0.05,
    })

    # Colorblind-friendly palette
    plt.rcParams['axes.prop_cycle'] = plt.cycler(color=[
        '#4C72B0', '#55A868', '#C44E52', '#8172B3',
        '#CCB974', '#64B5CD', '#AD8BC9', '#B0724E'
    ])

    if not df.empty:
        # Check if all required columns exist in the dataframe
        missing_columns = [col for col in process_columns if col not in df.columns]
        if missing_columns:
            print(f"Error: The following columns are missing from the dataframe: {missing_columns}")
            print("Available columns: ", df.columns.tolist())
            return False

        # Get water values for each process
        water_values = df[process_columns].values[0]

        # Create display names for the processes
        display_names = [column_to_process_name.get(col, col) for col in process_columns]

        # Create a new figure with a clean slate
        plt.figure(figsize=(6.5, 4.0))

        # Create bar plot
        bars = plt.bar(display_names, water_values)

        # Custom coloring for water consumption/saving
        positive_color = '#C44E52'  # Red from palette
        negative_color = '#4C72B0'  # Blue from palette

        for i, bar in enumerate(bars):
            if water_values[i] >= 0:
                bar.set_color(positive_color)
            else:
                bar.set_color(negative_color)

        # Add legend entries with custom colors
        plt.plot([], [], color=positive_color, label='Water Consumption')
        plt.plot([], [], color=negative_color, label='Water Saving')

        # Add total water consumption as text
        total_water = df["Total"].values[0]
        plt.text(0.95, 0.95, f'Total: {total_water:.6f} {df["Unit"].values[0]}',
                transform=plt.gca().transAxes, ha='right', va='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

        # Customize plot
        plt.title('Process Contributions to Water Consumption Impact ')
        plt.ylabel(f'Water Consumption ({df["Unit"].values[0]})')
        plt.xlabel('Process')

        # Adjust x-axis tick labels
        plt.xticks(rotation=45, ha='right')

        # Add a horizontal line at y=0
        plt.axhline(y=0, color='black', linestyle='-', linewidth=0.5)

        # Add grid for better readability
        plt.grid(axis='y', linestyle='--', alpha=0.3)

        # Add legend
        plt.legend(loc='best')

        # Ensure tight layout
        plt.tight_layout()

        # Display the figure in the notebook
        plt.show()

        # Save figures
        try:
            plt.savefig('water_consumption_waterfall.png', format='png', dpi=300, bbox_inches='tight')
            plt.savefig('water_consumption_waterfall.pdf', format='pdf', dpi=300, bbox_inches='tight')
            print("\nFigures saved successfully as 'water_consumption_waterfall.png' and 'water_consumption_waterfall.pdf'")

            # Download the files
            files.download('water_consumption_waterfall.png')
            files.download('water_consumption_waterfall.pdf')

            print("Files are available for download.")
            return True
        except Exception as e:
            print(f"Error saving figures: {e}")
            return False
        finally:
            plt.close()
    else:
        print("The dataframe is empty. Please check your CSV file.")
        return False

def main():
    """Main function to run the waterfall chart creation process"""
    # Upload CSV file and get dataframe
    df = upload_csv()

    # Create waterfall chart
    if df is not None:
        success = create_waterfall_chart(df)
        if success:
            print("Waterfall chart created successfully!")
        else:
            print("Failed to create waterfall chart.")

# Execute main function when script is run
if __name__ == "__main__":
    main()


Please upload your water_consumption_data.csv file...


Saving water_consumption_data.csv to water_consumption_data (1).csv
Processing file: water_consumption_data (1).csv

Preview of the uploaded data:


Unnamed: 0,Impact category,Unit,Total,Dilution & Agitation of Thin Stillage,Drying,Fermentation and Aeration,Inoculum production for Fermentation,Sterilization of Thin Stillage,Water for dilution of Thin Stillage,RO25%_Organic_Wastewater Treatment for 1 kg,Water for biomass washing
0,Water consumption,m3,0.272979,6.4e-05,0.077654,0.183299,0.002412,0.013039,0.064122,-0.188174,0.120563



All available columns in the dataset:
- Impact category
- Unit
- Total
- Dilution & Agitation of Thin Stillage
- Drying
- Fermentation and Aeration
- Inoculum production for Fermentation
- Sterilization of Thin Stillage
- Water for dilution of Thin Stillage
- RO25%_Organic_Wastewater Treatment for 1 kg
- Water for biomass washing

Figures saved successfully as 'water_consumption_waterfall.png' and 'water_consumption_waterfall.pdf'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Files are available for download.
Waterfall chart created successfully!
