# Example: Using Pandas to Analyze Completion Parameters

This notebook illustrates using the Python API and the pandas package to perform completion analysis.

## 0.5 Import packages

The only import needed for the Python API is `orchid` itself.

In [None]:
import orchid

The remaining imports are standard python packages to support the analysis.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate

In [None]:
## 1.0 Load the .ifrac project

In [None]:
project = orchid.load_project(r'c:\src\Orchid.IntegrationTestData\frankNstein_Bakken_UTM13_FEET.ifrac')

## 2.0 Define a function to compute the stage treatment data

In [None]:
def compute_stage_treatment_aggregates(treatment_stage):
    
    def slurry_rate_per_min_to_per_second_conversion_factor():
        source_slurry_rate_unit = treatment_curves['Slurry Rate'].sampled_quantity_unit()
        target_slurry_rate_unit = f'{orchid.slurry_rate_volume_unit(source_slurry_rate_unit)}/s'
        local_result = orchid.get_conversion_factor(source_slurry_rate_unit, target_slurry_rate_unit)
        return local_result

    def slurry_rate_bbl_per_second_to_gal_per_second_conversion_factor():
        local_result = orchid.get_conversion_factor('bbl/s', 'gal/s')
        return local_result

    treatment_curves = stage.treatment_curves()
    if not treatment_curves:
        return None
    
    raw_treatment_series = {n: c.time_series() for n, c in (treatment_curves.items())}
    stage_start_time = np.datetime64(stage.start_time)
    stage_stop_time = np.datetime64(stage.stop_time)
    
    fluid_per_sec = (raw_treatment_series[orchid.SLURRY_RATE][stage_start_time:stage_stop_time] *
                     slurry_rate_per_min_to_per_second_conversion_factor())
    stage_fluid = integrate.trapz(fluid_per_sec.values, 
                                  (fluid_per_sec.index - stage_start_time).seconds)
    
    stage_concentration = \
        raw_treatment_series[orchid.PROPPANT_CONCENTRATION][stage_start_time:stage_stop_time]
    proppant_per_sec = (fluid_per_sec * 
                        slurry_rate_bbl_per_second_to_gal_per_second_conversion_factor() *
                        stage_concentration)
    stage_proppant = integrate.trapz(proppant_per_sec.values, 
                                     (proppant_per_sec.index - stage_start_time).seconds)
    
    stage_pressure = \
        raw_treatment_series[orchid.TREATING_PRESSURE][stage_start_time:stage_stop_time]
    median_stage_pressure = stage_pressure.median()
    
    # Assumes that all three curves have the same time basis; that is, 
    # that the index for each series is equal.
    treatment_curves = pd.DataFrame.from_dict(raw_treatment_series)
    treatment_curves['dt'] = ((treatment_curves.index.values - stage_start_time) /
                              np.timedelta64(1, 's'))
    
    return stage_fluid, stage_proppant, median_stage_pressure

## 3.0 Build Pandas DataFrame

In [None]:
results = []
for well in project.wells:
    stages = list(well.stages)

    for stage in stages:
        
        treatment_aggregates = compute_stage_treatment_aggregates(stage)
        # Skip stages with no aggregates. These stages most likely are from 
        # an untreated monitor well.
        if not treatment_aggregates:
            continue
            
        stage_fluid, stage_proppant, median_stage_pressure = treatment_aggregates
        results.append((well.name, stage.display_stage_number, 
                       stage_fluid, stage_proppant, median_stage_pressure))
stage_aggregates = pd.DataFrame(columns=['Well', 'Stage', 'Fluid', 'Proppant', 'Median Pressure'],
                                data=results)