# Example: Using Pandas to Analyze Completion Parameters

This notebook illustrates using the Python API and the pandas package to perform completion analysis.

## 0.5 Import packages

The only import needed for the Python API is `orchid` itself.

In [1]:
import orchid

The remaining imports are standard python packages to support the analysis.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate

In [3]:
## 1.0 Load the .ifrac project

In [4]:
project = orchid.load_project(
    r'c:\src\Orchid.IntegrationTestData\frankNstein_Bakken_UTM13_FEET.ifrac')

## 2.0 Define a function to compute the stage treatment data

In [5]:
def compute_stage_treatment_aggregates(treatment_stage):
    
    def slurry_rate_per_min_to_per_second_conversion_factor():
        source_slurry_rate_unit = treatment_curves['Slurry Rate'].sampled_quantity_unit()
        target_slurry_rate_unit = f'{orchid.slurry_rate_volume_unit(source_slurry_rate_unit)}/s'
        local_result = orchid.get_conversion_factor(source_slurry_rate_unit, target_slurry_rate_unit)
        return local_result

    def slurry_rate_bbl_per_second_to_gal_per_second_conversion_factor():
        local_result = orchid.get_conversion_factor('bbl/s', 'gal/s')
        return local_result

    treatment_curves = treatment_stage.treatment_curves()
    if not treatment_curves:
        return None
    
    raw_treatment_series = {n: c.time_series() for n, c in (treatment_curves.items())}
    stage_start_time = np.datetime64(stage.start_time)
    stage_stop_time = np.datetime64(stage.stop_time)
    
    fluid_per_sec = (raw_treatment_series[orchid.SLURRY_RATE][stage_start_time:stage_stop_time] *
                     slurry_rate_per_min_to_per_second_conversion_factor())
    stage_fluid = integrate.trapz(fluid_per_sec.values, 
                                  (fluid_per_sec.index - stage_start_time).seconds)
    
    stage_concentration = \
        raw_treatment_series[orchid.PROPPANT_CONCENTRATION][stage_start_time:stage_stop_time]
    proppant_per_sec = (fluid_per_sec * 
                        slurry_rate_bbl_per_second_to_gal_per_second_conversion_factor() *
                        stage_concentration)
    stage_proppant = integrate.trapz(proppant_per_sec.values, 
                                     (proppant_per_sec.index - stage_start_time).seconds)
    
    stage_pressure = \
        raw_treatment_series[orchid.TREATING_PRESSURE][stage_start_time:stage_stop_time]
    median_stage_pressure = stage_pressure.median()
    
    # Assumes that all three curves have the same time basis; that is, 
    # that the index for each series is equal.
    treatment_curves = pd.DataFrame.from_dict(raw_treatment_series)
    treatment_curves['dt'] = ((treatment_curves.index.values - stage_start_time) /
                              np.timedelta64(1, 's'))
    
    return stage_fluid, stage_proppant, median_stage_pressure

## 3.0 Build a pandas data frame

In [6]:
# Remember the project units
units = {'length': project.unit('length'),
         'mass': project.unit('mass'),
         'pressure': project.unit('pressure'),
         'slurry volume': orchid.slurry_rate_volume_unit(project.unit('slurry rate')),
         'proppant mass': orchid.proppant_concentration_mass_unit(
             project.unit('proppant concentration'))}

In [7]:
# Calculate the stage results
stage_results = []
for well in project.wells:
    stages = list(well.stages)

    for stage in stages:
        
        treatment_aggregates = compute_stage_treatment_aggregates(stage)
        # Skip stages with no aggregates. These stages most likely are from 
        # an untreated monitor well.
        if not treatment_aggregates:
            continue
            
        stage_fluid, stage_proppant, median_stage_pressure = treatment_aggregates
        stage_results.append((project.name, well.name, stage.display_stage_number,
                              stage.md_top(units['length']).magnitude,
                              stage.md_bottom(units['length']).magnitude,
                              stage_fluid, stage_proppant, median_stage_pressure))

In [8]:
# Create the data frame
columns = ['Project',
           'Well',
           'Stage',
           f'MD Top ({units["length"]})',
           f'MD Bottom ({units["length"]})',
           f'Total Fluid ({units["slurry volume"]})',
           f'Total Proppant ({units["proppant mass"]})',
           f'Median Treating Pressure ({units["pressure"]})']
stage_summaries = pd.DataFrame(data=stage_results, columns=columns)
stage_summaries.head()

Unnamed: 0,Project,Well,Stage,MD Top (ft),MD Bottom (ft),Total Fluid (bbl),Total Proppant (lb),Median Treating Pressure (psi)
0,frankNstein_Bakken_UTM13_FEET,Demo_1H,1,20883.34,20934.0,3668.300534,139702.110134,6164.037326
1,frankNstein_Bakken_UTM13_FEET,Demo_1H,2,20782.34,20785.34,4086.916893,204660.325538,6421.972222
2,frankNstein_Bakken_UTM13_FEET,Demo_1H,3,20582.34,20585.34,4018.791882,217688.550722,6490.994358
3,frankNstein_Bakken_UTM13_FEET,Demo_1H,4,20358.34,20410.34,4055.97409,233981.926535,6516.329427
4,frankNstein_Bakken_UTM13_FEET,Demo_1H,5,20158.34,20210.34,4076.240254,215513.673232,6461.559028


### 3.1 Compute the stage length directly from the data frame

In [9]:
stage_summaries[f'Stage Length ({units["length"]})'] = \
    stage_summaries.apply(lambda s: s[columns[4]] - s[columns[3]], axis=1)
columns = stage_summaries.columns
stage_summaries.head()

Unnamed: 0,Project,Well,Stage,MD Top (ft),MD Bottom (ft),Total Fluid (bbl),Total Proppant (lb),Median Treating Pressure (psi),Stage Length (ft)
0,frankNstein_Bakken_UTM13_FEET,Demo_1H,1,20883.34,20934.0,3668.300534,139702.110134,6164.037326,50.66
1,frankNstein_Bakken_UTM13_FEET,Demo_1H,2,20782.34,20785.34,4086.916893,204660.325538,6421.972222,3.0
2,frankNstein_Bakken_UTM13_FEET,Demo_1H,3,20582.34,20585.34,4018.791882,217688.550722,6490.994358,3.0
3,frankNstein_Bakken_UTM13_FEET,Demo_1H,4,20358.34,20410.34,4055.97409,233981.926535,6516.329427,52.0
4,frankNstein_Bakken_UTM13_FEET,Demo_1H,5,20158.34,20210.34,4076.240254,215513.673232,6461.559028,52.0


### 3.2 Now compute the proppant loading for each stage

In [10]:
stage_summaries[f'Proppant loading ({units["proppant mass"]}/{units["length"]})'] = \
    stage_summaries.apply(lambda s: s[columns[6]] / s[columns[8]], axis=1)
stage_summaries.head()

Unnamed: 0,Project,Well,Stage,MD Top (ft),MD Bottom (ft),Total Fluid (bbl),Total Proppant (lb),Median Treating Pressure (psi),Stage Length (ft),Proppant loading (lb/ft)
0,frankNstein_Bakken_UTM13_FEET,Demo_1H,1,20883.34,20934.0,3668.300534,139702.110134,6164.037326,50.66,2757.641337
1,frankNstein_Bakken_UTM13_FEET,Demo_1H,2,20782.34,20785.34,4086.916893,204660.325538,6421.972222,3.0,68220.108513
2,frankNstein_Bakken_UTM13_FEET,Demo_1H,3,20582.34,20585.34,4018.791882,217688.550722,6490.994358,3.0,72562.850241
3,frankNstein_Bakken_UTM13_FEET,Demo_1H,4,20358.34,20410.34,4055.97409,233981.926535,6516.329427,52.0,4499.652433
4,frankNstein_Bakken_UTM13_FEET,Demo_1H,5,20158.34,20210.34,4076.240254,215513.673232,6461.559028,52.0,4144.493716


## 4.0 Completion questions

### 4.1 What is the median proppant intensity per well?

In [11]:
stage_summaries[[columns[1], columns[-1]]].groupby(columns[1]).median()

Unnamed: 0_level_0,Stage Length (ft)
Well,Unnamed: 1_level_1
Demo_1H,147.225
Demo_2H,148.05
Demo_4H,245.0
