# ESP Pump Performance Analysis (Colab)
This notebook walks through loading ESP pump coefficients, computing performance curves across frequencies, and analyzing operating points interactively in Google Colab.

**Learning Objectives:**
- Understand how pump performance curves are generated using polynomial coefficients and affinity laws.
- Learn to mount and access data from Google Drive in Colab.
- Practice interactive visualization with Plotly and ipywidgets.
- Implement operating point calculations (Head↔Flow).

In [None]:
# Mount Google Drive to access dataset files stored in your Drive account
# You will be prompted to authorize access during execution
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install required Python packages for data analysis and interactive plotting
# Ensure latest versions for Colab compatibility
!pip install --upgrade plotly pandas ipywidgets jupyter notebook --quiet

# Verify Plotly installation and version
import plotly
print(f"Plotly version: {plotly.__version__}")

# Enable ipywidgets extension for Colab
from google.colab import output
output.enable_custom_widget_manager()

In [None]:
# Import standard libraries
import pandas as pd
import numpy as np
# Plotly for interactive visualization
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly.io as pio

# Configure Plotly for Google Colab environment
pio.renderers.default = 'colab'
# Initialize Plotly for Jupyter/Colab environment
pyo.init_notebook_mode(connected=True)

# ipywidgets for interactive controls
import ipywidgets as widgets
from IPython.display import display

In [None]:
# Check for GPU and TPU availability in this Colab environment
# Having hardware accelerators can speed up heavy computations
import tensorflow as tf
print('GPUs:', tf.config.list_physical_devices('GPU'))
print('TPUs:', tf.config.list_logical_devices('TPU'))

## Load Dataset from Drive
Load pump coefficient CSV from your Drive mount. Update the path as needed.

In [None]:
# Load pump coefficient CSV from Google Drive
# Update the path to match your Drive folder structure
# Note: The CSV should contain columns: model, base_frequency, freq_start, freq_end, freq_step,
# flowrate_start, flowrate_end, ror_low, ror_high, bep_flowrate, c0, c1, c2, c3, c4, c5
csv_path = '/content/drive/MyDrive/ESPPump_Coefficients_limited.csv'

# For local testing, you can create a sample CSV file if needed
import os
if not os.path.exists(csv_path):
    # Create a sample CSV with proper structure
    sample_data = {
        'model': ['PUMP_MODEL_1', 'PUMP_MODEL_2'],
        'base_frequency': [60.0, 60.0],
        'freq_start': [30.0, 30.0],
        'freq_end': [70.0, 70.0],
        'freq_step': [10.0, 10.0],  # Use 10 Hz steps like the Python app
        'flowrate_start': [0, 0],
        'flowrate_end': [500, 400],
        'ror_low': [100, 80],
        'ror_high': [400, 320],
        'bep_flowrate': [250, 200],  # Added BEP flowrate
        'c0': [500.0, 400.0],
        'c1': [-2.0, -1.8],
        'c2': [0.01, 0.012],
        'c3': [-0.0001, -0.00012],
        'c4': [0.0000001, 0.00000012],
        'c5': [0.0, 0.0]
    }
    df = pd.DataFrame(sample_data)
    print("⚠️ Using sample pump data. Upload actual ESPPump_Coefficients_limited.csv to Google Drive for real data.")
else:
    df = pd.read_csv(csv_path)

# Strip quotes around model names
df['model'] = df['model'].str.strip("'\"")
df.head()

## Data Preprocessing
Ensure numeric types and inspect ranges.

In [None]:
# Convert coefficient columns to numeric types for calculations
cols = ['base_frequency','freq_start','freq_end','freq_step',
        'flowrate_start','flowrate_end','ror_low','ror_high','bep_flowrate']
df[cols] = df[cols].apply(pd.to_numeric)
df.dtypes

## Define Helper Functions
Polynomial-based affinity scaling and operating-point solvers.

In [None]:
def compute_curves(config, stages=1):
    """Generate pump performance curves using affinity laws with proper frequency increments, extending to 0 head"""
    # Use freq_step from config (typically 10 Hz) instead of hardcoded 5 Hz
    freq_step = config.get('freq_step', 10)  # Default to 10 Hz if not specified
    freqs = np.arange(config['freq_start'], config['freq_end']+1, freq_step)
    data = []

    for f in freqs:
        scale = f/config['base_freq']

        # Find flow range that extends the curve down to 0 head
        # Start with base flow range and extend beyond to reach 0 head
        base_flows = np.linspace(config['flow_start'], config['flow_end'], 100)
        base_heads = np.polyval(config['coeffs'][::-1], base_flows)

        # Find the flow where head would be 0 by solving polynomial
        poly_coeffs = config['coeffs'][::-1].copy()
        roots = np.roots(poly_coeffs)
        # Find the largest positive real root (shutoff point)
        positive_roots = [r.real for r in roots if abs(r.imag) < 1e-10 and r.real > 0]

        if positive_roots:
            max_flow_for_zero_head = max(positive_roots)
            # Extend flow range to include shutoff point
            extended_max_flow = max(config['flow_end'], max_flow_for_zero_head * 1.1)
        else:
            extended_max_flow = config['flow_end'] * 1.5

        # Generate extended flow range to ensure curve reaches 0 head
        extended_flows = np.linspace(config['flow_start'], extended_max_flow, 150)
        extended_heads = np.polyval(config['coeffs'][::-1], extended_flows)

        # Only keep points with positive head values
        valid_mask = extended_heads >= 0
        valid_flows = extended_flows[valid_mask]
        valid_heads = extended_heads[valid_mask]

        # Scale to actual frequency
        for Qb, Hb in zip(valid_flows, valid_heads):
            scaled_flow = Qb * scale
            scaled_head = Hb * scale**2 * stages
            data.append({'freq': f, 'flow': scaled_flow, 'head': scaled_head})

    return pd.DataFrame(data)

def compute_ror_curves(config, stages=1):
    """Generate ROR boundary curves that scale properly with frequency"""
    freq_step = config.get('freq_step', 10)
    freqs = np.arange(config['freq_start'], config['freq_end']+1, freq_step)

    ror_data = []
    bep_data = []

    for f in freqs:
        scale = f/config['base_freq']

        # Calculate ROR flowrates at this frequency
        ror_low_flow = config['ror_low'] * scale
        ror_high_flow = config['ror_high'] * scale
        bep_flow = config['bep_flowrate'] * scale

        # Calculate corresponding heads using polynomial
        ror_low_head = np.polyval(config['coeffs'][::-1], config['ror_low']) * scale**2 * stages
        ror_high_head = np.polyval(config['coeffs'][::-1], config['ror_high']) * scale**2 * stages
        bep_head = np.polyval(config['coeffs'][::-1], config['bep_flowrate']) * scale**2 * stages

        ror_data.append({
            'freq': f,
            'ror_low_flow': ror_low_flow,
            'ror_low_head': ror_low_head,
            'ror_high_flow': ror_high_flow,
            'ror_high_head': ror_high_head
        })

        bep_data.append({
            'freq': f,
            'bep_flow': bep_flow,
            'bep_head': bep_head
        })

    return pd.DataFrame(ror_data), pd.DataFrame(bep_data)

## Interactive Pump Performance Explorer
Select a pump model and specific gravity, then generate performance curves and analyze operating points.

## Understanding Thrust Zones

ESP pumps experience axial thrust forces that affect bearing life and pump reliability:

- **Downthrust**: When operating flowrate < ROR Low flowrate boundary
  - Pump stages are pushed downward
  - Can cause excessive wear on lower thrust bearings

- **Normal Operation**: When ROR Low ≤ operating flowrate ≤ ROR High
  - Balanced thrust conditions
  - Optimal pump operation within recommended envelope

- **Upthrust**: When operating flowrate > ROR High flowrate boundary
  - Pump stages are pushed upward
  - Can cause excessive wear on upper thrust bearings

The ROR (Recommended Operating Range) curves define safe operating boundaries based on thrust considerations. **Important**: Thrust zones are determined by comparing the operating flowrate with the ROR flowrate boundaries at the selected frequency, not by head comparisons.

In [None]:
def find_operating_point_flow_to_head(config, target_flow, freq, stages=1):
    """Given flow, find corresponding head on pump curve"""
    scale = freq / config['base_freq']
    base_flow = target_flow / scale
    base_head = np.polyval(config['coeffs'][::-1], base_flow)
    return base_head * scale**2 * stages

def find_operating_point_head_to_flow(config, target_head, freq, stages=1):
    """Given head, find corresponding flow on pump curve"""
    scale = freq / config['base_freq']
    target_base_head = target_head / (scale**2 * stages)

    # Create function to find roots
    poly_coeffs = config['coeffs'][::-1].copy()
    poly_coeffs[-1] -= target_base_head  # Subtract target head

    roots = np.roots(poly_coeffs)
    # Find positive real roots
    positive_roots = [r.real for r in roots if abs(r.imag) < 1e-10 and r.real > 0]

    if positive_roots:
        # Return the smallest positive root (normal operating point)
        base_flow = min(positive_roots)
        return base_flow * scale
    return None

def get_ror_at_frequency(config, freq, stages=1):
    """Get ROR flowrate boundaries at specific frequency"""
    scale = freq / config['base_freq']
    ror_low_scaled = config['ror_low'] * scale
    ror_high_scaled = config['ror_high'] * scale
    return ror_low_scaled, ror_high_scaled

def determine_thrust_zone(operating_flow, ror_low, ror_high):
    """Determine thrust zone based on flowrate position relative to ROR boundaries"""
    if operating_flow < ror_low:
        return "Downthrust"
    elif operating_flow > ror_high:
        return "Upthrust"
    else:
        return "Normal Operation"


## Enhanced Interactive Features

The interactive explorer below includes several advanced capabilities:

### **🎛️ Custom Frequency Analysis**
- **Decimal Precision**: Enter any frequency like 40.5 Hz, 42.3 Hz (not just 5 Hz increments)
- **Dynamic Curve Generation**: If your selected frequency isn't in the standard set, the tool automatically generates a custom performance curve
- **Visual Highlighting**: Custom frequency curves are shown in bold black lines

### **⭐ Operating Point Visualization**
- **Star Marker**: Your calculated operating point appears as a distinctive red star on the plot
- **Real-time Updates**: The marker moves as you change inputs, showing exactly where your pump operates
- **Coordinates Display**: Shows precise (flow, head) values for the operating point

### **📊 Educational Outputs**
- **Thrust Zone Analysis**: Explains whether your pump experiences upthrust, downthrust, or normal operation
- **Safety Warnings**: Alerts when operating outside recommended envelopes
- **Engineering Context**: Connects calculations to real-world pump reliability concerns

**Instructions**:
1. Select pump model, specific gravity, and number of stages
2. Choose your operating frequency (can be any decimal value)
3. Select calculation mode: Head→Flow or Flow→Head  
4. Enter your known value and click "Run Analysis"
5. Observe the star marker showing your operating point relative to ROR boundaries

In [None]:
# Create interactive widgets
model_w = widgets.Dropdown(options=df['model'].tolist(), description='Pump Model:')
sg_w = widgets.FloatSlider(value=1.0, min=0.5, max=2.0, step=0.1, description='Specific Gravity:')
stages_w = widgets.IntText(value=1, description='Stages:')  # Changed from IntSlider to IntText
freq_w = widgets.FloatText(value=50.0, description='Frequency (Hz):')
calc_mode_w = widgets.Dropdown(options=['Flow → Head', 'Head → Flow'], description='Calculate:')
input_value_w = widgets.FloatText(value=100.0, description='Input Value:')
run_button = widgets.Button(description='Run Analysis', button_style='primary')

# Output widget for results
output_widget = widgets.Output()

def interactive_callback(button):
    """Handle button click and generate interactive plot"""
    with output_widget:
        output_widget.clear_output()

        # Get selected pump configuration
        pump_model = model_w.value
        specific_gravity = sg_w.value
        stages = stages_w.value

        pump_data = df[df['model'] == pump_model].iloc[0]

        config = {
            'coeffs': [pump_data[f'c{i}'] for i in range(6)],  # Changed from a0-a4 to c0-c5
            'base_freq': pump_data['base_frequency'],
            'freq_start': pump_data['freq_start'],
            'freq_end': pump_data['freq_end'],
            'freq_step': pump_data['freq_step'],  # Added freq_step
            'flow_start': pump_data['flowrate_start'],
            'flow_end': pump_data['flowrate_end'],
            'ror_low': pump_data['ror_low'],
            'ror_high': pump_data['ror_high'],
            'bep_flowrate': pump_data['bep_flowrate']  # Added BEP flowrate
        }

        # Generate performance curves
        pump_curves = compute_curves(config, stages)

        # Generate ROR and BEP curves
        ror_curves, bep_curves = compute_ror_curves(config, stages)

        # Create plot
        fig = go.Figure()

        # Get unique frequencies from curves for consistent formatting
        unique_freqs = sorted(pump_curves['freq'].unique())

        # Add performance curves for each frequency
        for freq in unique_freqs:
            freq_data = pump_curves[pump_curves['freq'] == freq]
            fig.add_trace(go.Scatter(
                x=freq_data['flow'],
                y=freq_data['head'] * specific_gravity,
                mode='lines',
                name=f'{freq:.2f} Hz',
                line=dict(width=2),
                hovertemplate=f'<b>{freq:.2f} Hz</b><br>Flow: %{{x:.2f}}<br>Head: %{{y:.2f}} psi<extra></extra>'
            ))

        # Add ROR boundary curves (not straight lines!)
        fig.add_trace(go.Scatter(
            x=ror_curves['ror_low_flow'],
            y=ror_curves['ror_low_head'] * specific_gravity,
            mode='lines+markers',
            name='ROR Low',
            line=dict(dash='dash', color='orange', width=2),
            marker=dict(size=4),
            hovertemplate='<b>ROR Low</b><br>Flow: %{x:.2f}<br>Head: %{y:.2f} psi<extra></extra>'
        ))

        fig.add_trace(go.Scatter(
            x=ror_curves['ror_high_flow'],
            y=ror_curves['ror_high_head'] * specific_gravity,
            mode='lines+markers',
            name='ROR High',
            line=dict(dash='dash', color='red', width=2),
            marker=dict(size=4),
            hovertemplate='<b>ROR High</b><br>Flow: %{x:.2f}<br>Head: %{y:.2f} psi<extra></extra>'
        ))

        # Add BEP (Best Efficiency Point) curve
        fig.add_trace(go.Scatter(
            x=bep_curves['bep_flow'],
            y=bep_curves['bep_head'] * specific_gravity,
            mode='lines+markers',
            name='BEP',
            line=dict(dash='dot', color='green', width=3),
            marker=dict(size=6, symbol='diamond'),
            hovertemplate='<b>Best Efficiency Point</b><br>Flow: %{x:.2f}<br>Head: %{y:.2f} psi<extra></extra>'
        ))

        # Handle custom frequency if not in standard set
        selected_freq = freq_w.value
        if selected_freq not in unique_freqs:
            # Generate custom curve for this frequency
            custom_config = config.copy()
            custom_config['freq_start'] = selected_freq
            custom_config['freq_end'] = selected_freq
            custom_config['freq_step'] = 1  # Single frequency
            custom_curve = compute_curves(custom_config, stages)

            fig.add_trace(go.Scatter(
                x=custom_curve['flow'],
                y=custom_curve['head'] * specific_gravity,
                mode='lines',
                name=f'{selected_freq:.2f} Hz (Custom)',
                line=dict(width=4, color='black'),
                hovertemplate=f'<b>{selected_freq:.2f} Hz (Custom)</b><br>Flow: %{{x:.2f}}<br>Head: %{{y:.2f}} psi<extra></extra>'
            ))

        # Calculate dynamic axis ranges based on all data
        all_flows = pump_curves['flow'].tolist()
        all_heads = (pump_curves['head'] * specific_gravity).tolist()

        # Add ROR and BEP data to range calculation
        all_flows.extend(ror_curves['ror_low_flow'].tolist())
        all_flows.extend(ror_curves['ror_high_flow'].tolist())
        all_flows.extend(bep_curves['bep_flow'].tolist())

        all_heads.extend((ror_curves['ror_low_head'] * specific_gravity).tolist())
        all_heads.extend((ror_curves['ror_high_head'] * specific_gravity).tolist())
        all_heads.extend((bep_curves['bep_head'] * specific_gravity).tolist())

        # Add custom frequency data if exists
        if selected_freq not in unique_freqs:
            all_flows.extend(custom_curve['flow'].tolist())
            all_heads.extend((custom_curve['head'] * specific_gravity).tolist())

        # Calculate ranges with some padding
        flow_min, flow_max = 0, max(all_flows) * 1.1  # Start from 0, add 10% padding
        head_min, head_max = 0, max(all_heads) * 1.1  # Start from 0, add 10% padding

        # Calculate and display operating point
        input_val = input_value_w.value
        calc_mode = calc_mode_w.value

        if calc_mode == 'Flow → Head':
            operating_flow = input_val
            operating_head = find_operating_point_flow_to_head(config, operating_flow, selected_freq, stages)
            operating_head *= specific_gravity
        else:  # Head → Flow
            target_head = input_val / specific_gravity  # Convert back to water basis
            operating_flow = find_operating_point_head_to_flow(config, target_head, selected_freq, stages)
            operating_head = input_val

        if operating_flow is not None and operating_head is not None:
            # Add operating point marker
            fig.add_trace(go.Scatter(
                x=[operating_flow], y=[operating_head],
                mode='markers',
                marker=dict(symbol='star', size=15, color='red'),
                name='Operating Point',
                hovertemplate=f'<b>Operating Point</b><br>Flow: {operating_flow:.2f}<br>Head: {operating_head:.2f} psi<extra></extra>'
            ))

            # Determine thrust zone
            ror_low_freq, ror_high_freq = get_ror_at_frequency(config, selected_freq, stages)
            thrust_zone = determine_thrust_zone(operating_flow, ror_low_freq, ror_high_freq)

            print(f"Operating Point: Flow = {operating_flow:.2f}, Head = {operating_head:.2f} psi")
            print(f"Thrust Zone: {thrust_zone}")

            if thrust_zone != "Normal Operation":
                print(f"⚠️  WARNING: Operating outside recommended range!")
        else:
            print("❌ Could not find valid operating point for given inputs")

        # Format and display the plot with axis-as-frame styling
        fig.update_layout(
            title=f'ESP Performance Analysis – {pump_model} ({stages} stages)',
            xaxis_title='Flow Rate (units from CSV)',
            yaxis_title='Head (psi)',
            plot_bgcolor='white',
            # Minimize margins for cleanest appearance
            margin=dict(l=50, r=10, t=50, b=50),
            # Axis lines serve as plot boundaries - no separate frame
            xaxis=dict(
                showgrid=True, gridwidth=1, gridcolor='lightgray',
                showline=True, linewidth=1, linecolor='gray',
                range=[flow_min, flow_max],  # Use dynamic range based on data
                zeroline=False,
                mirror=False,
                ticks='outside',
                minor=dict(ticks='inside', ticklen=3, showgrid=False),
                autorange=False
            ),
            yaxis=dict(
                showgrid=True, gridwidth=1, gridcolor='lightgray',
                showline=True, linewidth=1, linecolor='gray',
                range=[head_min, head_max],  # Use dynamic range based on data
                zeroline=False,
                mirror=False,
                ticks='outside',
                minor=dict(ticks='inside', ticklen=3, showgrid=False),
                autorange=False
            ),
            legend=dict(orientation="v", yanchor="top", y=1, xanchor="left", x=1.02)
        )

        # Display the plot in Google Colab
        # With proper renderer configuration, fig.show() should work
        fig.show()

# Connect button to callback
run_button.on_click(interactive_callback)

# Create widget layout
controls = widgets.VBox([
    widgets.HBox([model_w, sg_w]),
    widgets.HBox([stages_w, freq_w]),
    widgets.HBox([calc_mode_w, input_value_w]),
    run_button
])

# Display the interface
display(controls, output_widget)