<a href="https://colab.research.google.com/github/JozefSL/pyNotes/blob/main/Kalman/DPR_liqProduction3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
#@title Install filterpy and restart runtime
# this command hides the cell output
%%capture
!pip install filterpy
import os
#os.kill(os.getpid(),9)

In [32]:
#@title Import packages from Google and get EIA logo
import numpy as np
import pandas as pd
from filterpy.kalman import KalmanFilter
from filterpy.common import Q_discrete_white_noise
from filterpy.stats import plot_covariance_ellipse
from filterpy.kalman import predict
from filterpy.kalman import update
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import r2_score 
import ipywidgets as widgets

eiaLgFile = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/49/Eia-logomark.svg/640px-Eia-logomark.svg.png"
eiaLogo = [dict(source = eiaLgFile,
                       x=1.06, y=-0.06,
                       sizex=0.15, sizey=0.15,
                       xanchor="center", yanchor="bottom")]


In [33]:
#@title Select DPR Region 
dprRegions = ['Anadarko Region','Bakken Region','Eagle Ford Region','Niobrara Region','Permian Region', 'Marcellus Region','Utica Region','Appalachia Region','Haynesville Region']
dprR = widgets.Dropdown(options=dprRegions, value='Permian Region', description='DPR_Region:', disabled=False)
dprR

Dropdown(description='DPR_Region:', index=4, options=('Anadarko Region', 'Bakken Region', 'Eagle Ford Region',…

In [35]:
#@title Import rig and production data from the DPR region
file = r"https://www.eia.gov/petroleum/drilling/xls/dpr-data.xlsx"
data = pd.read_excel(file, sheet_name=dprR.value, skiprows=2, usecols=[0,1,4]) #index_col=0, , nrows=numRows)
data.columns = ['Month', 'BH', 'PR']
#data.head(4)
data['BH2MF'] = data['BH'].shift(2)
data['dPR'] = data['PR'].diff()
data['dBH2MF'] = data['BH2MF'].diff()
data = data.fillna(method="backfill")
data = data.fillna(method="ffill")
data.tail()

Unnamed: 0,Month,BH,PR,BH2MF,dPR,dBH2MF
190,2022-12-01,76.0,1076028.5,76.0,-19337.2,0.0
191,2023-01-01,78.0,1100625.7,76.0,24597.2,0.0
192,2023-02-01,78.0,1111267.4,76.0,10641.7,0.0
193,2023-03-01,78.0,1122680.8,78.0,11413.4,2.0
194,2023-04-01,78.0,1132066.5,78.0,9385.7,0.0


In [39]:
#@title Kalman Filter setup
# Step units in months
dt = 1

orv = data['PR'].iloc[0]
orr = data['dPR'].iloc[0]

# x - Origin state estimate vector
x = np.array([orv, orr]).T

# Q - Process noise matrix
Q = Q_discrete_white_noise(dim=2, dt=1., var=.75)

# P - Covariance matrix
P = np.diag([11, 1])

# R - Measurement noise matrix
R = np.diag([1.13, 24])

# H - Measurement function
H = np.array([[1., 0.],
              [0., 1.]])

# F - State transition matrix
F = np.array([[1, dt],
              [0, 1]])

# B - Measurement function
B = np.array([[0.,1],
              [0., 1*dt]])

# Measurements for update 
zsu = data[['PR','dPR','BH2MF','dBH2MF']].to_numpy()

# System prediction 
Xp = np.empty((0,2), int)

# System update
Xu = np.empty((0,2), int)
for zs  in zsu:
    z = zs[0:2]
    u = zs[2:4]
 
    u[0]=0
    #u[2]=0
    #print(z)
    x, P = predict(x=x, P=P, F=F, Q=Q, u=u, B=B, alpha=0.99)    #x, P = predict(x=x, P=P, F=F, Q=Q, u=u)
    Xp = np.vstack([Xp, x])

    x, P = update(x, P, z, R, H,)
    Xu = np.vstack([Xu, x])
   
print('R^2:', r2_score(Xp[:,0], data.PR),'|| lastEstimate:', z, '|| lastRigChange:',u)  

R^2: 0.9921520362147593 || lastEstimate: [1132066.5    9385.7] || lastRigChange: [0. 0.]


In [38]:
#@title Plot production data as predicted and reported or estimated in DPR report
fig = go.Figure(px.scatter(title="%s liquids production estimates using Kalman filter" % (dprR.value)))
fig.add_trace(go.Scatter(x=data.Month, y=Xp[:,0], mode='markers',name='PredictPR'))
fig.add_trace(go.Scatter(x=data.Month, y=data.PR, mode='markers',name='Reported'))
fig.add_trace(go.Scatter(x=data.Month, y=Xu[:,0], mode='lines+markers',name='UpdatePR'))
fig.layout.images = eiaLogo
fig.show()

In [41]:
fig = go.Figure(px.scatter(title="%s monthly changes in liquids production estimates using Kalman filter" % (dprR.value)))
fig.add_trace(go.Scatter(x=data.Month, y=Xp[:,1], mode='markers',name='PredictPR'))
fig.add_trace(go.Scatter(x=data.Month, y=data.dPR, mode='markers',name='Reported'))
fig.add_trace(go.Scatter(x=data.Month, y=Xu[:,1], mode='lines+markers',name='UpdatePR'))
fig.layout.images = eiaLogo
fig.show()

In [None]:
dataX = data[['BH2MF','dBH2MF', 'PR']]
dataY = data['dPR']

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()
model.fit(dataX, dataY)
predictions = model.predict(dataX)

In [None]:
#print(predictions)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['Month'], y=predictions, name='Predictions'))
fig.add_trace(go.Scatter(x=data['Month'], y=data['dPR'], name='Data'))

# Add title and axis labels
fig.update_layout(title='Sample Line Plot', xaxis_title='X-axis', yaxis_title='Y-axis')

# Show the plot
fig.show()