# Interactive Well Test Analysis and IPR Optimization

This Colab notebook adapts the `WellTestOptimizer` application for an interactive classroom environment.
It includes setup instructions, the core optimizer class (GUI removed), data loading guidance, example runs, and visualization.

## 1) Setup: Install dependencies

Run the cell below to install required packages in Colab. If running locally, install the same packages into your environment.

In [None]:
# Install runtime dependencies (Colab) - uncomment if needed
# !pip install pandas numpy matplotlib scipy openpyxl

## 2) Imports

Import the libraries we will use. Colab already has many preinstalled packages; install extras above if necessary.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from datetime import datetime
import io, os

## 3) Core optimizer class (adapted from `well_test_optimizer.py`)

The GUI components are removed. The class provides: calculate_ipr, objective, optimize_group, process_data, and generate_ipr_curve.

In [None]:
class WellTestOptimizer:
    """Well Test Data Optimizer for PI and SBHP (Notebook-friendly)"""
    def __init__(self):
        self.data = None
        self.results = []

    def calculate_ipr(self, fbhp, sbhp, pbub, pi):
        # enforce non-negative inputs
        fbhp = max(0.0, float(fbhp))
        sbhp = max(0.0, float(sbhp))
        pbub = max(0.0, float(pbub))
        pi = max(0.0, float(pi))

        # Physical: if FBHP > SBHP, no production
        if fbhp > sbhp:
            return 0.0

        # Case 1: Linear PI (single-phase) - FBHP >= Pbub and SBHP > Pbub
        if (fbhp >= pbub) and (sbhp > pbub):
            ql = pi * (sbhp - fbhp)
        # Case 2: Modified Vogel (unsaturated reservoir) - FBHP < Pbub and SBHP > Pbub
        elif (fbhp < pbub) and (sbhp > pbub):
            ql_at_pbub = pi * (sbhp - pbub)
            if pbub > 0.0:
                fbhp_pbub_ratio = fbhp / pbub
                vogel_component = (pi * pbub / 1.8) * (1 - 0.2 * fbhp_pbub_ratio - 0.8 * fbhp_pbub_ratio**2)
                ql = ql_at_pbub + vogel_component
            else:
                ql = ql_at_pbub
        # Case 3: Standard Vogel (saturated reservoir) - SBHP <= Pbub
        else:
            if sbhp > 0.0:
                fbhp_sbhp_ratio = fbhp / sbhp
                ql = (pi * sbhp / 1.8) * (1 - 0.2 * fbhp_sbhp_ratio - 0.8 * fbhp_sbhp_ratio**2)
            else:
                ql = 0.0

        return max(0.0, ql)

    def calculate_ql_max(self, sbhp, pbub, pi):
        return self.calculate_ipr(0.0, sbhp, pbub, pi)

    def objective_function(self, params, fbhp_data, q_actual, pbub):
        sbhp, pi = params
        # penalty for non-physical parameters
        if sbhp <= 0 or pi <= 0:
            return 1e12
        max_fbhp = np.max(fbhp_data)
        if sbhp <= max_fbhp:
            return 1e12

        q_calculated = np.array([self.calculate_ipr(f, sbhp, pbub, pi) for f in fbhp_data])
        if np.any(q_calculated < 0):
            return 1e12

        sse = np.sum((q_actual - q_calculated)**2)
        # soft penalty for extreme values
        if sbhp > 15000 or pi > 1000:
            sse += 1e8
        return sse

    def optimize_group(self, group_data, pbub, sbhp_guess, pi_guess):
        fbhp_data = group_data['Pwf'].values
        q_actual = group_data['Total Rate'].values

        # data checks
        if np.any(fbhp_data <= 0) or np.any(q_actual <= 0):
            return {'success': False, 'message': 'Non-positive data values detected'}

        fbhp_rate_correlation = np.corrcoef(fbhp_data, q_actual)[0,1] if len(fbhp_data) > 1 else 0.0
        if np.isnan(fbhp_rate_correlation):
            fbhp_rate_correlation = 0.0
        if fbhp_rate_correlation > 0.3:
            return {'success': False, 'message': f'Data quality issue: positive FBHP-rate correlation ({fbhp_rate_correlation:.3f}).'}

        max_fbhp = np.max(fbhp_data)
        sbhp_lower = max_fbhp + 100
        sbhp_upper = min(8000.0, max_fbhp + 3000.0)

        rate_range = np.max(q_actual) - np.min(q_actual)
        fbhp_range = np.max(fbhp_data) - np.min(fbhp_data)
        estimated_pi = (rate_range / fbhp_range) if fbhp_range > 10 else 1.0
        pi_lower = max(0.01, estimated_pi * 0.1)
        pi_upper = min(50.0, estimated_pi * 10)

        sbhp_guess = float(np.clip(sbhp_guess, sbhp_lower + 100, sbhp_upper - 100)) if sbhp_upper - sbhp_lower > 200 else (sbhp_lower + 200.0)
        pi_guess = float(np.clip(pi_guess, pi_lower, pi_upper))

        initial_guess = [sbhp_guess, pi_guess]
        bounds = [(sbhp_lower, sbhp_upper), (pi_lower, pi_upper)]

        try:
            result = minimize(self.objective_function, initial_guess, args=(fbhp_data, q_actual, pbub), method='L-BFGS-B', bounds=bounds)
            if result.success:
                sbhp_opt, pi_opt = result.x
                q_calculated = np.array([self.calculate_ipr(f, sbhp_opt, pbub, pi_opt) for f in fbhp_data])
                ss_res = np.sum((q_actual - q_calculated)**2)
                ss_tot = np.sum((q_actual - np.mean(q_actual))**2)
                r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0.0
                rmse = np.sqrt(ss_res / len(q_actual))
                if r_squared >= 0.8: fit_quality = 'excellent'
                elif r_squared >= 0.5: fit_quality = 'good'
                elif r_squared >= 0.0: fit_quality = 'poor'
                else: fit_quality = 'very_poor'
                return {
                    'sbhp': float(sbhp_opt), 'pi': float(pi_opt), 'r_squared': float(r_squared), 'rmse': float(rmse),
                    'fit_quality': fit_quality, 'sse': float(result.fun), 'success': True,
                    'q_actual': q_actual.tolist(), 'q_calculated': q_calculated.tolist(), 'fbhp': fbhp_data.tolist(),
                    'dates': group_data['Test Date'].values,
                }
            else:
                return {'success': False, 'message': result.message}
        except Exception as e:
            return {'success': False, 'message': str(e)}

    def process_data(self, data, pbub, sbhp_guess, pi_guess, group_size=5):
        data_sorted = data.sort_values('Test Date').reset_index(drop=True)
        self.data = data_sorted
        self.results = []
        num_groups = len(data_sorted) // group_size
        for i in range(num_groups):
            start_idx = i * group_size
            end_idx = start_idx + group_size
            group_data = data_sorted.iloc[start_idx:end_idx]
            res = self.optimize_group(group_data, pbub, sbhp_guess, pi_guess)
            if res.get('success'):
                res['group_id'] = i+1
                res['start_date'] = group_data['Test Date'].min()
                res['end_date'] = group_data['Test Date'].max()
                self.results.append(res)
        return self.results

    def generate_ipr_curve(self, sbhp, pbub, pi):
        fbhp_range = np.linspace(0.0, float(sbhp), 100)
        q_range = np.array([self.calculate_ipr(f, sbhp, pbub, pi) for f in fbhp_range])
        return fbhp_range, q_range

# End of class

In [None]:
# Exercise starter: run a sensitivity sweep over group_size and pbub for first N groups
if 'df' in globals():
    sweep_results = []
    for group_size_test in [3,5,10]:
        optimizer = WellTestOptimizer()
        res = optimizer.process_data(df, pbub=4000.0, sbhp_guess=3500.0, pi_guess=1.0, group_size=group_size_test)
        sweep_results.append({'group_size': group_size_test, 'num_results': len(res)})
    import pprint; pprint.pprint(sweep_results)
else:
    print('Upload the CSV (Test-1 welltestdata.csv) and run the data load cell first.')

## Classroom Exercises

These short exercises are designed for a 50–90 minute classroom session. Instructors: assign 1–3 per student/group.

1) Sensitivity Analysis: Vary `group_size` (try 3,5,10) and observe how `SBHP` and `PI` trends change. Describe why results are more/less stable.

2) Bubble Point Effect: Change `pbub` (e.g., 3000, 4000, 5000 psi) and explain how the IPR curve shapes and fit metrics change for a selected group.

3) Data Quality Investigation: Intentionally remove or perturb one data point (e.g., change a `Pwf` to be higher than others) and run the optimizer. Report warnings and R² behavior.

4) Cross-Comparison: Run the optimizer on two different wells/datasets and compare parameter trends and fit qualities.

Each exercise should include: hypothesis, steps, results (figures/table), and interpretation in one paragraph.

## Data Format Requirements

Required CSV columns:
- `Test Date` (any pandas-parseable date)
- `Pwf` (Flowing Bottom Hole Pressure, psi)
- `Total Rate` (Liquid production rate, bbl/day)

Data rules: no missing values in required columns, all values > 0, chronological order preferred (the notebook sorts by date).

## Quick Start Guide for Students

1. Upload `Test-1 welltestdata.csv` using the Colab Files pane.
2. Run the *Imports* cell (Cell 5) and the *Core optimizer* cell (Cell 7).
3. Run the data load cell (Cell 9) to verify the CSV loads.
4. Run the optimization cell (Cell 11) and then the visualizations (Cell 13).

Tips: Try changing `group_size`, `pbub`, and initial guesses to see how results change.

## Installation & Requirements

Recommended packages (Colab already has most): pandas, numpy, matplotlib, scipy, openpyxl.
If running locally: `pip install pandas numpy matplotlib scipy openpyxl`.

System: Python 3.7+, 4GB RAM minimum.

## 4) Load example data

Upload `Test-1 welltestdata.csv` to Colab (Files pane → Upload). If running locally, ensure the file is in the working directory. The cell below attempts to load that filename.

In [None]:
# Colab helper: mount Google Drive and copy the example CSV into the session
try:
    from google.colab import drive
    import shutil
    drive.mount('/content/drive')
    drive_src = '/content/drive/My Drive/ColabNotebooks/Test-1 welltestdata.csv'
    if os.path.exists(drive_src):
        shutil.copy(drive_src, './')
        print('Copied Test-1 welltestdata.csv from Drive to session directory.')
    else:
        print('File not found at', drive_src, '\nPlace the CSV in Drive under "My Drive/ColabNotebooks/" or upload via Files pane.')
except Exception as e:
    print('Drive mount not available in this environment or error occurred:', e)

In [None]:
# Attempt to load the provided example file
csv_path = 'Test-1 welltestdata.csv'
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path, parse_dates=['Test Date'])
    df = df.sort_values('Test Date').reset_index(drop=True)
    print(f'Loaded {len(df)} records from {csv_path}')
    display(df.head())
else:
    print(f'File not found: {csv_path} -- upload it to the Colab session or drive and update the path.')

## 5) Run optimization (example)

Set parameters and run the optimizer. The example uses the uploaded `Test-1 welltestdata.csv` file.

In [None]:
# Parameters (tweak as needed)
pbub = 4000.0
sbhp_guess = 3500.0
pi_guess = 1.0
group_size = 5
# Run optimizer if df exists
if 'df' in globals():
    optimizer = WellTestOptimizer()
    results = optimizer.process_data(df, pbub, sbhp_guess, pi_guess, group_size)
    print(f'Optimization complete. Found results for {len(results)} groups.')
    # Convert results to DataFrame for display
    if len(results) > 0:
        results_df = pd.DataFrame(results)
        results_df['start_date'] = pd.to_datetime(results_df['start_date']).dt.strftime('%Y-%m-%d')
        results_df['end_date'] = pd.to_datetime(results_df['end_date']).dt.strftime('%Y-%m-%d')
        display(results_df[['group_id','start_date','end_date','sbhp','pi','r_squared','rmse','fit_quality']])
else:
    print('No data loaded; please upload the CSV and run this cell again.')

## 6) Visualizations

Create the three plots: parameter trends, actual vs calculated, and IPR curves over data.

In [None]:
if 'results' in globals() and len(results) > 0:
    results_df = pd.DataFrame(results)
    # Plot SBHP and PI vs group
    fig, ax1 = plt.subplots(figsize=(10,5))
    ax1.plot(results_df['group_id'], results_df['sbhp'], marker='o', color='tab:blue', label='SBHP')
    ax1.set_xlabel('Group')
    ax1.set_ylabel('SBHP (psi)', color='tab:blue')
    ax2 = ax1.twinx()
    ax2.plot(results_df['group_id'], results_df['pi'], marker='s', color='tab:red', label='PI')
    ax2.set_ylabel('PI (bbl/day/psi)', color='tab:red')
    fig.tight_layout()
    plt.title('Optimized Parameters by Group')
    plt.show()

    # Actual vs Calculated for first group
    selected = results[0]
    q_act = np.array(selected['q_actual'])
    q_calc = np.array(selected['q_calculated'])
    plt.figure(figsize=(6,6))
    plt.scatter(q_act, q_calc, alpha=0.7, edgecolor='k')
    mn = min(q_act.min(), q_calc.min())
    mx = max(q_act.max(), q_calc.max())
    plt.plot([mn,mx],[mn,mx],'r--')
    plt.xlabel('Actual Rate')
    plt.ylabel('Calculated Rate')
    plt.title(f'Actual vs Calculated (Group {selected[
]}) R²={selected[
]:.3f}')
    plt.grid(True)
    plt.show()

    # IPR curves overlay
    plt.figure(figsize=(10,6))
    colors = plt.cm.viridis(np.linspace(0,1,len(results)))
    for i,res in enumerate(results):
        fb, ql = optimizer.generate_ipr_curve(res['sbhp'], pbub, res['pi'])
        plt.plot(ql, fb, color=colors[i], label=f"Group {res['group_id']} IPR")
        plt.scatter(res['q_actual'], res['fbhp'], color=colors[i], edgecolor='k', alpha=0.7)
    plt.xlabel('Liquid Flow Rate (bbl/day)')
    plt.ylabel('FBHP (psi)')
    plt.title('IPR Curves with Measured Data')
    plt.legend()
    plt.grid(True)
    plt.show()
else:
    print('No optimization results to plot. Run the optimization cell first.')

## 7) Exporting results (optional)

You can export `results_df` to Excel using `pandas.ExcelWriter`. In Colab, save to Drive or download the file.

In [None]:
# Example: save results to Excel locally (or to Colab drive)
if 'results_df' in globals():
    out_name = 'well_test_results.xlsx'
    results_df.to_excel(out_name, index=False)
    print(f'Saved results to {out_name}')
else:
    print('No results to save.')

---
### Notes and next steps
- You can copy additional text from `USER_GUIDE.md` into markdown cells for classroom instructions.
- Consider adding small exercises and pre-uploaded CSVs for students.