In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import verde as vd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
import zipfile
import math

%matplotlib inline



In [2]:
data = pd.read_csv(r'../data/all_wells.csv')
# data.fillna(data.mean(), inplace=True)
data.head()

Unnamed: 0,easting,northing,porosity,permeability,Poisson's ratio,Young's Modulus,water saturation,oil saturation,proppant weight (lbs),pump rate (cubic feet/min),name,cumulative production,well length (ft),frac stages,total proppant (lbs),total pump rate (cubic feet/min)
0,66100.0,22300.0,0.09,0.033,0.332,9440769.483,0.12474,0.87526,260036.414279,275.737593,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
1,66199.0,22300.0,0.12,0.057,0.332,9429043.88,0.124979,0.875021,,,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
2,66297.0,22300.0,0.11,0.05,0.332,9417413.01,0.125221,0.874779,429740.754787,324.145032,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
3,66396.0,22300.0,0.08,0.024,0.332,9405879.454,0.125469,0.874531,,,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887
4,66495.0,22300.0,0.08,0.031,0.332,9394445.773,0.12572,0.87428,485657.822229,320.868488,Tarragon 4-119H,81324.0,9768.0,50.0,37468940.0,14418.10887


formula for original oil in place

In [3]:
# formula modified to use oil saturation instead of water saturation
def ooip(area, porosity, saturation, thickness = 50, fvf = 1.6):
    return 7758 * area * thickness * porosity * (saturation) / fvf

functions generate regressors based on easting and northing

In [4]:
def model_youngs():
    features = data[['easting', 'northing', 'Young\'s Modulus']].dropna()
    X = features[['easting', 'northing']]
    y = features['Young\'s Modulus']
    
    # Linear Regression is placeholder regressor
    model = LinearRegression()
    model.fit(X, y)

    return model

In [5]:
def model_oil():
    features = data[['easting', 'northing', 'oil saturation']].dropna()
    X = features[['easting', 'northing']]
    y = features['oil saturation']

    model = LinearRegression()
    model.fit(X, y)

    return model

In [6]:
def model_poissons():
    features = data[['easting', 'northing', 'Poisson\'s ratio']].dropna()
    X = features[['easting', 'northing']]
    y = features['Poisson\'s ratio']

    model = LinearRegression()
    model.fit(X, y)

    return model

functions generate regressors based on easting, northing, young's modulus, oil saturation, and poisson's ratio

In [7]:
def model_porosity():
    features = data[['easting', 'northing', 'Young\'s Modulus', 'oil saturation', 'Poisson\'s ratio', 'porosity']].dropna()
    X = features[['easting', 'northing', 'Young\'s Modulus', 'oil saturation', 'Poisson\'s ratio']]
    y = features['porosity']

    model = GradientBoostingRegressor()
    model.fit(X, y)
    
    return model

In [8]:
def model_permeability():
    features = data[['easting', 'northing', 'Young\'s Modulus', 'oil saturation', 'Poisson\'s ratio', 'permeability']].dropna()
    X = features[['easting', 'northing', 'Young\'s Modulus', 'oil saturation', 'Poisson\'s ratio']]
    y = features['permeability']
 
    model = GradientBoostingRegressor()
    model.fit(X, y)

    return model

functions generate regressors based on porosity, permeability, poisson's ratio, young's modulus, and oil saturation

In [9]:
def model_length():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'well length (ft)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['well length (ft)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))
    
    return model

In [10]:
def model_frac():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'frac stages']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['frac stages']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model

In [11]:
def model_cumulative_production():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'cumulative production']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['cumulative production']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model

In [12]:
def model_total_proppant():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'total proppant (lbs)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['total proppant (lbs)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model

In [13]:
def model_total_pump_rate():
    features = data[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'total pump rate (cubic feet/min)']].dropna()
    X = features[['porosity', 'permeability', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
    y = features[['total pump rate (cubic feet/min)']]
    
    model = RandomForestRegressor()
    model.fit(X, np.ravel(y))

    return model 

establishing regressors

In [14]:
# based on easting and northing
youngs_reg = model_youngs()
oil_sat_reg = model_oil()
poissons_reg = model_poissons()

In [15]:
# based on easting, northing, young's modulus, 
# oil saturation, and poisson's ratio
porosity_reg = model_porosity()
permeability_reg = model_permeability()

In [16]:
# based on porosity, permeability, poisson's ratio,
# young's modulus, and oil saturation
length_reg = model_length()
frac_reg = model_frac()
cumulative_prod_reg = model_cumulative_production()

In [17]:
total_prop_reg = model_total_proppant()
total_rate_reg = model_total_pump_rate()

returns list of deliverables given easting and northing

In [18]:
"""
returns a list containing deliverables in the followng order:
    A. easting
    B. northing
    C. length of well
    D. number of frac stages
    E. amount of proppant for each stage
    F. pump rate
    G. original oil in place
    H. recoverable reserves
    I. estimated ultimate recovery
"""
def deliverables(easting, northing):
    deliver = []
    
    youngs = youngs_reg.predict([[easting, northing]])[0]
    saturation = oil_sat_reg.predict([[easting, northing]])[0]
    poissons = poissons_reg.predict([[easting, northing]])[0]
    
    porosity = porosity_reg.predict([[easting, northing, youngs, saturation, poissons]])[0]
    permeability = permeability_reg.predict([[easting, northing, youngs, saturation, poissons]])[0]
    
    length = length_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    frac = frac_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    cumulative = cumulative_prod_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    total_prop = total_prop_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    total_rate = total_rate_reg.predict([[porosity, permeability, poissons, youngs, saturation]])[0]
    
    # AB: easting, northing
    deliver.append(easting)
    deliver.append(northing)
    
    # C: length
    deliver.append(length)
    
    # D: frac stages
    # rounded up to nearest integer
    deliver.append(math.ceil(frac))
    
    # E: average proppant per stage
    deliver.append(total_prop / frac)
    
    # F: average pump rate per stage
    deliver.append(total_rate / frac)
    
    # G: original oil in place
    # area of well is approximated as length^2
    deliver.append(ooip(length**2, porosity, saturation))
    
    # H: recoverable reserves
    # recoverable reserves == cumulative production
    deliver.append(cumulative)
    
    # I: estimated ultimate recovery
    deliver.append(cumulative)
    
    return deliver

In [19]:
# placeholder top 10 well names, eastings, northings
well_deliverables = []
eastings = [10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000,]
northings = [10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000,]

for i in range(len(eastings)):
    well_deliverables.append(deliverables(eastings[i], northings[i]))

In [20]:
# creating DataFrame from list top 10 wells
col_names = ['easting (ft)', 'northing (ft)', 
             'length of well (ft)', 'frac stages', 'average proppant per frac stage (lb)', 
             'average pump rate per frac stage (cubic feet/min)', 'original oil in place (bbl)',
            'recoverable reserves (bbl)', 'estimated ultimate recovery (bbl)']

all_deliverables = pd.DataFrame(well_deliverables, columns = col_names)
all_deliverables

Unnamed: 0,easting (ft),northing (ft),length of well (ft),frac stages,average proppant per frac stage (lb),average pump rate per frac stage (cubic feet/min),original oil in place (bbl),recoverable reserves (bbl),estimated ultimate recovery (bbl)
0,10000,10000,7126.35,35,720270.2,298.788451,548635500000.0,32479.52,32479.52
1,20000,20000,7602.43,38,727938.4,263.362879,601656300000.0,31924.1,31924.1
2,30000,30000,8453.18,9,1177584.0,385.557065,734055800000.0,15261.06,15261.06
3,40000,40000,6420.54,22,2441265.0,376.069476,488423700000.0,38636.97,38636.97
4,50000,50000,6446.07,32,1110150.0,293.488922,552250000000.0,41425.58,41425.58
5,60000,60000,7053.34,37,933718.4,279.452249,469928800000.0,19903.59,19903.59
6,70000,70000,7013.33,41,852829.4,286.836628,459912400000.0,14547.43,14547.43
7,80000,80000,7107.56,42,760332.9,292.144561,409507000000.0,7392.06,7392.06
8,90000,90000,7436.08,44,835894.8,300.34582,367795600000.0,6165.05,6165.05
9,100000,100000,8523.76,42,895521.8,290.618617,310638800000.0,4926.1,4926.1


In [21]:
total_ooip = all_deliverables['original oil in place (bbl)'].sum()
total_rr = all_deliverables['recoverable reserves (bbl)'].sum()
total_eur = all_deliverables['estimated ultimate recovery (bbl)'].sum()

# recovery factor is the recoverable reserves / original oil in place
recovery_factor = total_rr / total_ooip

totals = pd.DataFrame([[total_ooip, total_rr, total_eur, recovery_factor]], columns = ['total original oil in place (bbl)', 
                                                                      'total recoverable reserves (bbl)', 
                                                                      'total estimated ultimate recovery (bbl)',
                                                                        'recovery factor'])
totals

Unnamed: 0,total original oil in place (bbl),total recoverable reserves (bbl),total estimated ultimate recovery (bbl),recovery factor
0,4942804000000.0,212661.46,212661.46,4.302446e-08
