In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LinearRegression
import math

Load in total well data

In [24]:
data = pd.read_csv(r'well production.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,well name,average pressure (Pa),oil 1,oil 2,oil 3,oil 4,oil 5,oil 6,oil 7,...,water 3,water 4,water 5,water 6,water 7,water 8,water 9,water 10,water 11,water 12
0,0,Tarragon 4-119H,26180969,10809.0,10108.0,9352.0,8626.0,7856.0,7137.0,6430.0,...,1600.0,1481.0,1367.0,1237.0,1069.0,982.0,857.0,733.0,593.0,472.0
1,1,Fennel 10-129H,36433680,2049.0,1932.0,1784.0,1649.0,1499.0,1364.0,1226.0,...,254.0,210.0,205.0,200.0,157.0,148.0,136.0,97.0,91.0,87.0
2,2,Federal 14-113H,36642888,11699.0,10905.0,10149.0,9365.0,8521.0,7732.0,6919.0,...,720.0,661.0,617.0,541.0,466.0,435.0,394.0,340.0,266.0,213.0
3,3,King 7-184H,30429506,5980.0,5580.0,5176.0,4789.0,4405.0,3978.0,3581.0,...,502.0,471.0,453.0,417.0,346.0,343.0,275.0,231.0,238.0,164.0
4,4,Sundae 1-129H,42591950,3892.0,3646.0,3399.0,3133.0,2834.0,2577.0,2309.0,...,1096.0,974.0,902.0,806.0,790.0,668.0,570.0,468.0,394.0,322.0


Only keep rows that provide overall well data

In [25]:
oil_months = []

for i in range(1, 13):
    oil_months.append('oil ' + str(i))

wells = data[['well name'] + oil_months].dropna()
wells.head()

Unnamed: 0,well name,oil 1,oil 2,oil 3,oil 4,oil 5,oil 6,oil 7,oil 8,oil 9,oil 10,oil 11,oil 12
0,Tarragon 4-119H,10809.0,10108.0,9352.0,8626.0,7856.0,7137.0,6430.0,5678.0,4922.0,4205.0,3477.0,2724.0
1,Fennel 10-129H,2049.0,1932.0,1784.0,1649.0,1499.0,1364.0,1226.0,1093.0,969.0,804.0,689.0,542.0
2,Federal 14-113H,11699.0,10905.0,10149.0,9365.0,8521.0,7732.0,6919.0,6124.0,5338.0,4536.0,3781.0,2954.0
3,King 7-184H,5980.0,5580.0,5176.0,4789.0,4405.0,3978.0,3581.0,3159.0,2735.0,2338.0,1914.0,1536.0
4,Sundae 1-129H,3892.0,3646.0,3399.0,3133.0,2834.0,2577.0,2309.0,2063.0,1800.0,1545.0,1279.0,1007.0


Express each column as its natural log in order to perform exponential regression

In [26]:
well_logs = pd.DataFrame()
for i in range(0, 12):
    original = wells['oil ' + str(i + 1)]
    logs = []
    for j in range(100):
        logs.append(np.log(original[j]))
    well_logs['oil ' + str(i)] = logs

well_logs.head()

Unnamed: 0,oil 0,oil 1,oil 2,oil 3,oil 4,oil 5,oil 6,oil 7,oil 8,oil 9,oil 10,oil 11
0,9.288134,9.221082,9.143346,9.062536,8.969033,8.873048,8.76873,8.644354,8.50147,8.34403,8.153925,7.909857
1,7.625107,7.566311,7.486613,7.407924,7.312553,7.218177,7.111512,6.996681,6.876265,6.689599,6.535241,6.295266
2,9.367259,9.296977,9.22513,9.144735,9.050289,8.953123,8.842027,8.719971,8.582606,8.419801,8.237744,7.990915
3,8.696176,8.626944,8.551788,8.474077,8.390496,8.288534,8.183397,8.058011,7.913887,7.757051,7.556951,7.336937
4,8.266678,8.201386,8.131237,8.049746,7.949444,7.854381,7.74457,7.631917,7.495542,7.342779,7.153834,6.914731


Model initial production and decay rate for each well

In [27]:
x = np.array([i for i in range(0, 12)]).reshape(-1, 1)
decays = []
initial = []
scores = []

for index, row in well_logs.iterrows():
    y = np.array(row)
    
    model = LinearRegression()
    model.fit(x, y)
    decays.append(model.coef_[0])
    scores.append(model.score(x, y))
    
wells['decay'] = decays
wells['regression score'] = scores

In [32]:
wells.head(100)

Unnamed: 0,well name,oil 1,oil 2,oil 3,oil 4,oil 5,oil 6,oil 7,oil 8,oil 9,oil 10,oil 11,oil 12,decay,regression score
0,Tarragon 4-119H,10809.0,10108.0,9352.0,8626.0,7856.0,7137.0,6430.0,5678.0,4922.0,4205.0,3477.0,2724.0,-0.119736,0.964746
1,Fennel 10-129H,2049.0,1932.0,1784.0,1649.0,1499.0,1364.0,1226.0,1093.0,969.0,804.0,689.0,542.0,-0.116082,0.967369
2,Federal 14-113H,11699.0,10905.0,10149.0,9365.0,8521.0,7732.0,6919.0,6124.0,5338.0,4536.0,3781.0,2954.0,-0.119660,0.965494
3,King 7-184H,5980.0,5580.0,5176.0,4789.0,4405.0,3978.0,3581.0,3159.0,2735.0,2338.0,1914.0,1536.0,-0.119050,0.964329
4,Sundae 1-129H,3892.0,3646.0,3399.0,3133.0,2834.0,2577.0,2309.0,2063.0,1800.0,1545.0,1279.0,1007.0,-0.117664,0.966516
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Burger 6-148H,2992.0,2830.0,2592.0,2382.0,2227.0,1969.0,1766.0,1590.0,1393.0,1201.0,993.0,784.0,-0.116590,0.967606
96,King 14-184H,3258.0,3051.0,2857.0,2628.0,2397.0,2190.0,1966.0,1742.0,1524.0,1268.0,1082.0,847.0,-0.117570,0.963703
97,Tribal 14-4H,861.0,836.0,762.0,694.0,651.0,623.0,536.0,466.0,437.0,353.0,311.0,216.0,-0.115255,0.939703
98,Pikachu 13-133H,750.0,711.0,686.0,639.0,554.0,515.0,460.0,423.0,378.0,314.0,265.0,227.0,-0.108555,0.969617


In [35]:
wells['decay'].mean()

    


-0.11553637103076243

In [36]:
a=0
b=0.0
for x in wells['decay']:
    a=a+(x+0.11553637103076243)*(x+0.11553637103076243)
    b=math.sqrt(a)
print(b/10)

0.010119946604552848
