<a href="https://colab.research.google.com/github/JozefSL/pyNotes/blob/main/Kalman/DPR_ProdRpLd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Install filterpy and restart runtime
# this command hides the cell output
%%capture
!pip install filterpy
import os
#os.kill(os.getpid(),9)

In [2]:
#@title Import packages from Google and get EIA logo
import numpy as np
import pandas as pd
from filterpy.kalman import KalmanFilter
from filterpy.common import Q_discrete_white_noise
from filterpy.stats import plot_covariance_ellipse
from filterpy.kalman import predict
from filterpy.kalman import update
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import r2_score 
import ipywidgets as widgets

eiaLgFile = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/49/Eia-logomark.svg/640px-Eia-logomark.svg.png"
eiaLogo = [dict(source = eiaLgFile,
                       x=1.06, y=-0.06,
                       sizex=0.15, sizey=0.15,
                       xanchor="center", yanchor="bottom")]


In [3]:
#@title Select DPR Region 
dprRegions = ['Anadarko Region','Bakken Region','Eagle Ford Region','Niobrara Region','Permian Region', 'Marcellus Region','Utica Region','Appalachia Region','Haynesville Region']
dprR = widgets.Dropdown(options=dprRegions, description='DPR_Region:', disabled=False)
dprR

Dropdown(description='DPR_Region:', options=('Anadarko Region', 'Bakken Region', 'Eagle Ford Region', 'Niobrar…

In [16]:
#@title Select oil or gas commodity 
dprCommodity = ['oil','gas']
dprC = widgets.Dropdown(options=dprCommodity, description='DPR_Com:', disabled=False)
dprC

Dropdown(description='DPR_Com:', options=('oil', 'gas'), value='oil')

In [5]:
dprC.value

'oil'

In [17]:
#@title Import rig and production data from the DPR region
file = r"https://www.eia.gov/petroleum/drilling/xls/dpr-data.xlsx"
data = pd.read_excel(file, sheet_name=dprR.value, skiprows=2, usecols=[0,1,4,7]) #index_col=0, , nrows=numRows)
data.columns = ['Month', 'BH','PRo' ,'PRg']
#data.head(4)
data['BH2MF'] = data['BH'].shift(2)
data['dPRo'] = data['PRo'].diff()
data['dPRg'] = data['PRg'].diff()
data['dBH2MF'] = data['BH2MF'].diff()
data = data.fillna(method="backfill")
data = data.fillna(method="ffill")
data.tail()

Unnamed: 0,Month,BH,PRo,PRg,BH2MF,dPRo,dPRg,dBH2MF
192,2023-02-01,353.0,5626577.4,22185961.5,350.0,-25557.8,182727.1,1.0
193,2023-03-01,349.0,5657159.5,22296564.3,355.0,30582.1,110602.8,5.0
194,2023-04-01,356.0,5679653.1,22392282.6,353.0,22493.6,95718.3,-2.0
195,2023-05-01,356.0,5692375.9,22469627.6,349.0,12722.8,77345.0,-4.0
196,2023-06-01,356.0,5707466.3,22551532.1,356.0,15090.4,81904.5,7.0


In [37]:
#@title Kalman Filter setup
# Step units in months
dt = 1

if dprC.value == "oil":
    data['PR'] = data['PRo']
    data['dPR'] = data['dPRo']
else:
    data['PR'] = data['PRg']
    data['dPR'] = data['dPRg']

orv = data['PR'].iloc[0]
orr = data['dPR'].iloc[0]

# x - Origin state estimate vector
x = np.array([orv, orr]).T

# Q - Process noise matrix
Q = Q_discrete_white_noise(dim=2, dt=1., var=.75)

# P - Covariance matrix
P = np.diag([11, 1])

# R - Measurement noise matrix
R = np.diag([33, 34])

# H - Measurement function
H = np.array([[1., 0.],
              [0., 1.]])


# F - State transition matrix
F = np.array([[1., dt],
              [0, 1]])

# B - Measurement function
B = np.array([[0.,0],
              [0., x[1]]])

# Measurements for update 
zsu = data[['PR','dPR','BH2MF','dBH2MF']].to_numpy()

# System prediction 
Xp = np.empty((0,2), int)

# System update
Xu = np.empty((0,2), int)
for zs  in zsu:
    z = zs[0:2]
    u = zs[2:4]
 
    u[0]=0
    #u[2]=0
    #print(z)
    x, P = predict(x=x, P=P, F=F, Q=Q, u=u, B=B, alpha=0.99)    #x, P = predict(x=x, P=P, F=F, Q=Q, u=u)
    Xp = np.vstack([Xp, x])

    x, P = update(x, P, z, R, H,)
    Xu = np.vstack([Xu, x])
   
print('R^2:', r2_score(Xp[:,0], data.PR),'|| lastEstimate:', x, '|| lastRigChange:',u)  

R^2: 0.8818797210876419 || lastEstimate: [5720455.78535671    5919.70014288] || lastRigChange: [0. 7.]


In [23]:
P

array([[12.37230429,  3.20635139],
       [ 3.20635139,  2.07284919]])

In [30]:
x[1]

24518.22670161208

In [33]:
Xp

array([[ 8.51879000e+05, -5.39810000e+03],
       [ 8.48056666e+05, -5.25306554e+03],
       [ 8.44027036e+05, -5.04168299e+03],
       [ 8.43348274e+05, -3.89286486e+03],
       [ 8.38552746e+05, -4.26232442e+03],
       [ 8.31311201e+05, -5.05925345e+03],
       [ 8.33652011e+05, -3.10803316e+03],
       [ 8.35867609e+05, -1.87979881e+03],
       [ 8.38762475e+05, -7.98377815e+02],
       [ 8.44038274e+05,  6.12329432e+02],
       [ 8.51929307e+05,  2.30113420e+03],
       [ 8.59424436e+05,  3.44146018e+03],
       [ 8.63748311e+05,  3.53999629e+03],
       [ 8.71205398e+05,  4.47805891e+03],
       [ 8.75409052e+05,  4.32668253e+03],
       [ 8.74494810e+05,  2.99711455e+03],
       [ 8.71884480e+05,  1.67896595e+03],
       [ 8.65114111e+05, -3.24562373e+02],
       [ 8.71659307e+05,  1.60923540e+03],
       [ 8.75127063e+05,  1.99434075e+03],
       [ 8.63084656e+05, -1.59863940e+03],
       [ 8.80670989e+05,  3.48419037e+03],
       [ 8.96641898e+05,  6.32626908e+03],
       [ 9.

In [9]:
#@title Plot production data as predicted and reported or estimated in DPR report
fig = go.Figure(px.scatter(title="%s production estimates using Kalman filter" % (dprR.value + " " + dprC.value)))
fig.add_trace(go.Scatter(x=data.Month, y=Xp[:,0], mode='markers',name='PredictPR'))
fig.add_trace(go.Scatter(x=data.Month, y=data.PR, mode='markers',name='Reported'))
fig.add_trace(go.Scatter(x=data.Month, y=Xu[:,0], mode='lines+markers',name='UpdatePR'))
fig.layout.images = eiaLogo
fig.show()

In [10]:
fig = go.Figure(px.scatter(title="%s monthly changes in liquids production estimates using Kalman filter" % (dprR.value)))
fig.add_trace(go.Scatter(x=data.Month, y=Xp[:,1], mode='markers',name='PredictPR'))
fig.add_trace(go.Scatter(x=data.Month, y=data.dPR, mode='markers',name='Reported'))
fig.add_trace(go.Scatter(x=data.Month, y=Xu[:,1], mode='lines+markers',name='UpdatePR'))
fig.layout.images = eiaLogo
fig.show()

In [11]:
dataX = data[['BH2MF','dBH2MF', 'PR']]
dataY = data['dPR']

In [12]:
from sklearn.linear_model import LinearRegression

In [13]:
model = LinearRegression()
model.fit(dataX, dataY)
predictions = model.predict(dataX)

In [14]:
#print(predictions)

In [15]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['Month'], y=predictions, name='Predictions'))
fig.add_trace(go.Scatter(x=data['Month'], y=data['dPR'], name='Data'))

# Add title and axis labels
fig.update_layout(title='Sample Line Plot', xaxis_title='X-axis', yaxis_title='Y-axis')

# Show the plot
fig.show()