In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import math

%matplotlib inline

In [2]:
data = pd.read_csv(r'../data/all_wells.csv')
# data.fillna(data.mean(), inplace=True)
data.head()

Unnamed: 0,easting,northing,porosity,permeability,Poisson's ratio,Young's Modulus,water saturation,oil saturation,proppant weight (lbs),pump rate (cubic feet/min),name,cumulative production,well length (ft),frac stages,total proppant (lbs),total pump rate (cubic feet/min)
0,66100.0,22300.0,0.09,0.033,0.332,9440769.483,0.12474,0.87526,260036.414279,275.737593,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
1,66199.0,22300.0,0.12,0.057,0.332,9429043.88,0.124979,0.875021,,,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
2,66297.0,22300.0,0.11,0.05,0.332,9417413.01,0.125221,0.874779,429740.754787,324.145032,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
3,66396.0,22300.0,0.08,0.024,0.332,9405879.454,0.125469,0.874531,,,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
4,66495.0,22300.0,0.08,0.031,0.332,9394445.773,0.12572,0.87428,485657.822229,320.868488,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887


formula for original oil in place

OOIP=(7758*30/165.43/FVF)(proppant weight)(porosity)(1-porosity)(oil saturation)(well length)

In [3]:
# formula modified to utilize proppant weight, porosity, oil saturation, and well length
# fvf assumed = 1.6
def ooip(proppant_weight, porosity, saturation, length, fvf = 1.6):
    return (7758*30/165.43/fvf)*(proppant_weight)*(porosity)*(1-porosity)*(saturation)*(length)

alternate approximation for OOIP 

In [4]:
def alt_ooip(cumulative_prod, recovery_rate):
    return 4/3 * 1/recovery_rate * cumulative_prod

functions generate regressors based on porosity, permeability, poisson's ratio, young's modulus, and oil saturation

In [5]:
def model_length():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'well length (ft)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['well length (ft)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))
    
    return model

In [6]:
def model_frac():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'frac stages']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['frac stages']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model

In [7]:
def model_total_proppant():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'total proppant (lbs)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['total proppant (lbs)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model

In [8]:
def model_total_pump_rate():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'total pump rate (cubic feet/min)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['total pump rate (cubic feet/min)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model 

establishing regressors

In [9]:
# based on porosity, permeability, poisson's ratio,
# young's modulus, and oil saturation
length_reg = model_length()
frac_reg = model_frac()

In [10]:
total_prop_reg = model_total_proppant()
total_rate_reg = model_total_pump_rate()

returns list of deliverables given easting and northing

In [11]:
"""
returns a list containing deliverables in the followng order:
    A. easting
    B. northing
    C. length of well
    D. direction
    E. number of frac stages
    F. amount of proppant for each stage
    G. pump rate
    H. original oil in place
    I. recoverable reserves
    J. estimated ultimate recovery
"""
def deliverables(easting, northing, direction, youngs, saturation, poissons, porosity, 
                 permeability, cumulative, low_recovery, high_recovery):
    deliver = []
    
    length = length_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    frac = frac_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    total_prop = total_prop_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    total_rate = total_rate_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    
    # AB: easting, northing
    deliver.append(easting)
    deliver.append(northing)
    
    # C: length
    deliver.append(length)
    
    # D: direction
    deliver.append(direction)
    
    # E: frac stages
    # rounded up to nearest integer
    deliver.append(math.ceil(frac))
    
    # F: average proppant per stage
    deliver.append(total_prop / frac)
    
    # G: average pump rate per stage
    deliver.append(total_rate / frac)
    
    # H: original oil in place
    # area of well is approximated as length^2
    deliver.append(ooip(total_prop, porosity, saturation, length))
    
    # I: recoverable reserves
    # recoverable reserves == cumulative production
    deliver.append(cumulative)
    
    # J: estimated ultimate recovery
    deliver.append(cumulative)
    
    # alternate calculations for ooip
    deliver.append(alt_ooip(cumulative, low_recovery))
    deliver.append(alt_ooip(cumulative, high_recovery))
    
    return deliver

In [12]:
prospective_wells = pd.read_csv(r'../data/prospective_wells.csv')
prospective_wells.head()

Unnamed: 0,easting,northing,porosity,permeability,poissons,youngs,oil,production
0,72297.816327,12485.714286,0.098456,0.035435,0.317352,9350633.0,0.812961,114138.486667
1,74222.081633,2414.285714,0.095731,0.03283,0.31584,9235678.0,0.814014,113426.44
2,72297.816327,2414.285714,0.097934,0.034673,0.318049,9511531.0,0.825021,112740.413333
3,54979.428571,10471.428571,0.096653,0.035101,0.363571,11197250.0,0.959432,109864.663333
4,53055.163265,12485.714286,0.095587,0.031729,0.365857,11225830.0,0.963618,109809.21


In [13]:
well_deliverables = []

# top wells from prospective_wells.csv
top_well_indices = [1, 3, 5, 6, 10, 11, 13, 15, 17, 18,]
top_well_directions = ['west', 'west', 'west', 'east', 'west', 'east', 'east', 'west', 'west', 'west',]
# approximated recovery rate based on decay rate of 0.1155
upper_recovery = 0.35
lower_recovery = 0.3

for i in range(len(top_well_indices)):
    row = prospective_wells.iloc[top_well_indices[i]]
    well_deliverables.append(deliverables(row['easting'], row['northing'], top_well_directions[i],
                                         row['youngs'], row['oil'], row['poissons'], 
                                         row['porosity'], row['permeability'], row['production'],
                                         upper_recovery, lower_recovery))

In [14]:
# creating DataFrame from list top 10 wells
col_names = ['easting (ft)', 'northing (ft)', 'length of well (ft)', 
             'direction', 'frac stages', 'average proppant per frac stage (lb)', 
             'average pump rate per frac stage (cubic feet/min)', 'original oil in place (bbl)',
            'recoverable reserves (bbl)', 'estimated ultimate recovery (bbl)',
            '(alternate) low original oil in place (bbl)', '(alternate) high original oil in place (bbl)']

all_deliverables = pd.DataFrame(well_deliverables, columns = col_names)
all_deliverables

Unnamed: 0,easting (ft),northing (ft),length of well (ft),direction,frac stages,average proppant per frac stage (lb),average pump rate per frac stage (cubic feet/min),original oil in place (bbl),recoverable reserves (bbl),estimated ultimate recovery (bbl),(alternate) low original oil in place (bbl),(alternate) high original oil in place (bbl)
0,74222.081633,2414.285714,7782.0,west,50,834813.638006,304.569836,20126580000000.0,113426.44,113426.44,432100.72381,504117.511111
1,54979.428571,10471.428571,8647.0,west,50,671381.461623,301.748491,21380820000000.0,109864.663333,109864.663333,418532.050794,488287.392593
2,51130.897959,14500.0,8647.68,west,50,671381.461623,301.748491,21138030000000.0,109430.05,109430.05,416876.380952,486355.777778
3,54979.428571,8457.142857,8647.0,east,50,671381.461623,301.748491,21527370000000.0,109021.45,109021.45,415319.809524,484539.777778
4,53055.163265,8457.142857,8666.92,west,46,695424.639392,313.768662,20628090000000.0,107643.756667,107643.756667,410071.453968,478416.696296
5,49206.632653,14500.0,8649.04,east,50,671381.461623,301.748491,19934810000000.0,107552.49,107552.49,409723.771429,478011.066667
6,51130.897959,12485.714286,8647.0,east,50,664822.627473,299.946836,21119550000000.0,106836.856667,106836.856667,406997.549206,474830.474074
7,74222.081633,4428.571429,7233.06,west,51,898678.430007,298.274631,20329370000000.0,105518.42,105518.42,401974.933333,468970.755556
8,74222.081633,12485.714286,7128.67,west,50,845439.543217,299.94382,18480630000000.0,105107.94,105107.94,400411.2,467146.4
9,53055.163265,14500.0,8648.36,west,50,671381.461623,301.748491,20934030000000.0,105093.983333,105093.983333,400358.031746,467084.37037


In [15]:
total_ooip = all_deliverables['original oil in place (bbl)'].sum()
total_rr = all_deliverables['recoverable reserves (bbl)'].sum()
total_eur = all_deliverables['estimated ultimate recovery (bbl)'].sum()

# recovery factor is the recoverable reserves / original oil in place
recovery_factor = total_rr / total_ooip

totals = pd.DataFrame([[total_ooip, total_rr, total_eur, recovery_factor]], columns = ['total original oil in place (bbl)', 
                                                                      'total recoverable reserves (bbl)', 
                                                                      'total estimated ultimate recovery (bbl)',
                                                                        'recovery factor'])
totals

Unnamed: 0,total original oil in place (bbl),total recoverable reserves (bbl),total estimated ultimate recovery (bbl),recovery factor
0,205599300000000.0,1079496.05,1079496.05,5.250486e-09


alternate ooip / recover factor

In [16]:
# alternate low / high ooip and recovery factor
low_alt_ooip = all_deliverables['(alternate) low original oil in place (bbl)'].sum()
low_alt_ooip_recovery = total_rr / low_alt_ooip

high_alt_ooip = all_deliverables['(alternate) high original oil in place (bbl)'].sum()
high_alt_ooip_recovery = total_rr / high_alt_ooip

alt_totals = pd.DataFrame([[low_alt_ooip, low_alt_ooip_recovery, high_alt_ooip, high_alt_ooip_recovery]], columns = 
                          ['total (alternate) low original oil in place (bbl)',
                          'alternate low ooip recovery factor',
                          'total (alternate) high original oil in place (bbl)',
                          'alternate high ooip recovery factor'])
alt_totals

Unnamed: 0,total (alternate) low original oil in place (bbl),alternate low ooip recovery factor,total (alternate) high original oil in place (bbl),alternate high ooip recovery factor
0,4112366.0,0.2625,4797760.0,0.225
