In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import math

%matplotlib inline

In [2]:
data = pd.read_csv(r'../data/all_wells.csv')
# data.fillna(data.mean(), inplace=True)
data.head()

Unnamed: 0,easting,northing,porosity,permeability,Poisson's ratio,Young's Modulus,water saturation,oil saturation,proppant weight (lbs),pump rate (cubic feet/min),name,cumulative production,well length (ft),frac stages,total proppant (lbs),total pump rate (cubic feet/min)
0,66100.0,22300.0,0.09,0.033,0.332,9440769.483,0.12474,0.87526,260036.414279,275.737593,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
1,66199.0,22300.0,0.12,0.057,0.332,9429043.88,0.124979,0.875021,,,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
2,66297.0,22300.0,0.11,0.05,0.332,9417413.01,0.125221,0.874779,429740.754787,324.145032,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
3,66396.0,22300.0,0.08,0.024,0.332,9405879.454,0.125469,0.874531,,,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
4,66495.0,22300.0,0.08,0.031,0.332,9394445.773,0.12572,0.87428,485657.822229,320.868488,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887


formula for original oil in place

In [3]:
# formula modified to use oil saturation instead of water saturation
def ooip(area, porosity, saturation, thickness = 50, fvf = 1.6):
    return 7758 * area * thickness * porosity * (saturation) / fvf

functions generate regressors based on porosity, permeability, poisson's ratio, young's modulus, and oil saturation

In [4]:
def model_length():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'well length (ft)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['well length (ft)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))
    
    return model

In [5]:
def model_frac():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'frac stages']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['frac stages']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model

In [6]:
def model_total_proppant():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'total proppant (lbs)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['total proppant (lbs)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model

In [7]:
def model_total_pump_rate():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'total pump rate (cubic feet/min)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['total pump rate (cubic feet/min)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model 

establishing regressors

In [8]:
# based on porosity, permeability, poisson's ratio,
# young's modulus, and oil saturation
length_reg = model_length()
frac_reg = model_frac()

In [9]:
total_prop_reg = model_total_proppant()
total_rate_reg = model_total_pump_rate()

returns list of deliverables given easting and northing

In [10]:
"""
returns a list containing deliverables in the followng order:
    A. easting
    B. northing
    C. length of well
    D. number of frac stages
    E. amount of proppant for each stage
    F. pump rate
    G. original oil in place
    H. recoverable reserves
    I. estimated ultimate recovery
"""
def deliverables(easting, northing, youngs, saturation, poissons, porosity, permeability, cumulative):
    deliver = []
    
    length = length_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    frac = frac_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    total_prop = total_prop_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    total_rate = total_rate_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    
    # AB: easting, northing
    deliver.append(easting)
    deliver.append(northing)
    
    # C: length
    deliver.append(length)
    
    # D: frac stages
    # rounded up to nearest integer
    deliver.append(math.ceil(frac))
    
    # E: average proppant per stage
    deliver.append(total_prop / frac)
    
    # F: average pump rate per stage
    deliver.append(total_rate / frac)
    
    # G: original oil in place
    # area of well is approximated as length^2
    deliver.append(ooip(length**2, porosity, saturation))
    
    # H: recoverable reserves
    # recoverable reserves == cumulative production
    deliver.append(cumulative)
    
    # I: estimated ultimate recovery
    deliver.append(cumulative)
    
    return deliver

In [11]:
# placeholder top 10 eastings, northings, youngs, oil saturations, poissons, permeability, porosity, cumulative prod
well_deliverables = []
eastings = [10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000,]
northings = [10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000,]
youngs = [10547155.909007126, 10475556.549813963, 10403957.190620802, 10332357.831427641, 10260758.472234478, 10189159.113041317, 10117559.753848156, 10045960.394654993, 9974361.035461832, 9902761.67626867]
saturations = [0.8785413618896565, 0.8709986808683892, 0.863455999847122, 0.8559133188258548, 0.8483706378045874, 0.8408279567833202, 0.833285275762053, 0.8257425947407857, 0.8181999137195185, 0.8106572326982513]
poissons = [0.33435957265794736, 0.33241381066361236, 0.3304680486692774, 0.3285222866749425, 0.3265765246806075, 0.32463076268627256, 0.3226850006919376, 0.3207392386976026, 0.3187934767032677, 0.31684771470893275]
porosities = [0.050720975527954626, 0.04929766078620696, 0.04907370412996584, 0.05709840899911957, 0.06461900409031353, 0.04633780536467275, 0.04628423115940857, 0.04049261098366007, 0.03353198016675706, 0.020655721962871947]
permeabilities = [0.012332159531746725, 0.01097438300118446, 0.010445190242267092, 0.014640840778179284, 0.016559057301691528, 0.01118699199734434, 0.009143498065977824, 0.00659388438410459, 0.004912336853958197, 0.0028284490052745493]
cumulative_prods = [35717.39, 29736.21, 12055.79, 39830.3, 36774.84, 22952.88, 15153.88, 7486.55, 5561.63, 5021.18]

for i in range(len(eastings)):
    well_deliverables.append(deliverables(eastings[i], northings[i], youngs[i], saturations[i], poissons[i], porosities[i], permeabilities[i], cumulative_prods[i]))

In [12]:
# creating DataFrame from list top 10 wells
col_names = ['easting (ft)', 'northing (ft)', 
             'length of well (ft)', 'frac stages', 'average proppant per frac stage (lb)', 
             'average pump rate per frac stage (cubic feet/min)', 'original oil in place (bbl)',
            'recoverable reserves (bbl)', 'estimated ultimate recovery (bbl)']

all_deliverables = pd.DataFrame(well_deliverables, columns = col_names)
all_deliverables

Unnamed: 0,easting (ft),northing (ft),length of well (ft),frac stages,average proppant per frac stage (lb),average pump rate per frac stage (cubic feet/min),original oil in place (bbl),recoverable reserves (bbl),estimated ultimate recovery (bbl)
0,10000,10000,6962.85,35,714114.9,303.513308,523749600000.0,35717.39,35717.39
1,20000,20000,7579.01,37,720686.2,267.05528,597955100000.0,29736.21,29736.21
2,30000,30000,8056.25,8,1047818.0,351.760014,666737200000.0,12055.79,12055.79
3,40000,40000,6261.18,25,2079982.0,327.081826,464478900000.0,39830.3,39830.3
4,50000,50000,6303.11,33,953853.0,293.279903,528026200000.0,36774.84,36774.84
5,60000,60000,7035.68,36,945451.7,285.386416,467578500000.0,22952.88,22952.88
6,70000,70000,6897.36,42,829533.7,291.362695,444828200000.0,15153.88,15153.88
7,80000,80000,6786.96,44,746549.0,285.263811,373397000000.0,7486.55,7486.55
8,90000,90000,7428.7,46,790147.3,301.298283,367065900000.0,5561.63,5561.63
9,100000,100000,8453.79,43,875156.7,283.88327,290121800000.0,5021.18,5021.18


In [13]:
total_ooip = all_deliverables['original oil in place (bbl)'].sum()
total_rr = all_deliverables['recoverable reserves (bbl)'].sum()
total_eur = all_deliverables['estimated ultimate recovery (bbl)'].sum()

# recovery factor is the recoverable reserves / original oil in place
recovery_factor = total_rr / total_ooip

totals = pd.DataFrame([[total_ooip, total_rr, total_eur, recovery_factor]], columns = ['total original oil in place (bbl)', 
                                                                      'total recoverable reserves (bbl)', 
                                                                      'total estimated ultimate recovery (bbl)',
                                                                        'recovery factor'])
totals

Unnamed: 0,total original oil in place (bbl),total recoverable reserves (bbl),total estimated ultimate recovery (bbl),recovery factor
0,4723938000000.0,210290.65,210290.65,4.451596e-08
