<a href="https://colab.research.google.com/github/JozefSL/pyNotes/blob/main/Kalman/DPR_Oil%26Gas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Install filterpy and restart runtime
# this command hides the cell output
%%capture
!pip install filterpy
import os
#os.kill(os.getpid(),9)

In [2]:
#@title Import packages from Google and get EIA logo
import numpy as np
import pandas as pd
from filterpy.kalman import KalmanFilter
from filterpy.common import Q_discrete_white_noise
from filterpy.stats import plot_covariance_ellipse
from filterpy.kalman import predict
from filterpy.kalman import update
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import r2_score 
import ipywidgets as widgets

eiaLgFile = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/49/Eia-logomark.svg/640px-Eia-logomark.svg.png"
eiaLogo = [dict(source = eiaLgFile,
                       x=1.06, y=-0.06,
                       sizex=0.15, sizey=0.15,
                       xanchor="center", yanchor="bottom")]


In [3]:
#@title Select DPR Region 
dprRegions = ['Anadarko Region','Bakken Region','Eagle Ford Region','Niobrara Region','Permian Region', 'Marcellus Region','Utica Region','Appalachia Region','Haynesville Region']
dprR = widgets.Dropdown(options=dprRegions, description='DPR_Region:', disabled=False)
dprR

Dropdown(description='DPR_Region:', options=('Anadarko Region', 'Bakken Region', 'Eagle Ford Region', 'Niobrar…

In [4]:
#@title Select oil or gas commodity 
dprCommodity = ['oil','gas']
dprC = widgets.Dropdown(options=dprCommodity, description='DPR_Com:', disabled=False)
dprC

Dropdown(description='DPR_Com:', options=('oil', 'gas'), value='oil')

In [6]:
#@title Import rig and production data from the DPR region
file = r"https://www.eia.gov/petroleum/drilling/xls/dpr-data.xlsx"
data = pd.read_excel(file, sheet_name=dprR.value, skiprows=2, usecols=[0,1,4,7]) #index_col=0, , nrows=numRows)
data.columns = ['Month', 'BH','PRo' ,'PRg']
#data.head(4)
data['BH2MF'] = data['BH'].shift(2)
data['dPRo'] = data['PRo'].diff()
data['dPRg'] = data['PRg'].diff()
data['dBH2MF'] = data['BH2MF'].diff()
data = data.fillna(method="backfill")
data = data.fillna(method="ffill")
data.tail()

Unnamed: 0,Month,BH,PRo,PRg,BH2MF,dPRo,dPRg,dBH2MF
191,2023-01-01,355.0,5595141.1,22120006.0,349.0,82432.5,335604.8,3.0
192,2023-02-01,353.0,5626839.9,22231310.6,350.0,31698.8,111304.6,1.0
193,2023-03-01,349.0,5657863.4,22341466.1,355.0,31023.5,110155.5,5.0
194,2023-04-01,349.0,5680761.8,22436740.7,353.0,22898.4,95274.6,-2.0
195,2023-05-01,349.0,5693850.9,22513642.1,349.0,13089.1,76901.4,-4.0


In [54]:
#@title Kalman Filter setup
# Step units in months
dt = 1

if dprC.value == "oil":
    data['PR'] = data['PRo']
    data['dPR'] = data['dPRo']
else:
    data['PR'] = data['PRg']
    data['dPR'] = data['dPRg']

orv = data['PR'].iloc[0]
orr = data['dPR'].iloc[0]

# x - Origin state estimate vector
x = np.array([orv, orr]).T

# Q - Process noise matrix
# Q = Q_discrete_white_noise(dim=2, dt=1., var=.75)
Q = np.array([[0.25, 0.5],
              [0.5, 1.]])
# P - Covariance matrix
P = np.diag([11, 1])

# R - Measurement noise matrix
# R = np.diag([1.13, 24])
R = np.diag([181, 16])

# H - Measurement function
H = np.array([[1., 0.],
              [0., 1.]])

# F - State transition matrix
F = np.array([[1, dt],
              [0, 1]])

# B - Measurement function
B = np.array([[0., 0],
              [0., 65]])

# Measurements for update 
zsu = data[['PR','dPR','BH2MF','dBH2MF']].to_numpy()

# System prediction 
Xp = np.empty((0,2), int)

# System update
Xu = np.empty((0,2), int)
for zs  in zsu:
    z = zs[0:2]
    u = zs[2:4]
 
    #u[0]=0
    #u[1]=0
    #print(z)
    x, P = predict(x=x, P=P, F=F, Q=Q, u=u, B=B, alpha=0.99)    #x, P = predict(x=x, P=P, F=F, Q=Q, u=u)
    Xp = np.vstack([Xp, x])

    x, P = update(x, P, z, R, H,)
    Xu = np.vstack([Xu, x])
   
print('R^2:', r2_score(Xp[:,0], data.PR),'|| lastEstimate:', z, '|| lastRigChange:',u)  

R^2: 0.9952918451802863 || lastEstimate: [5693850.9   13089.1] || lastRigChange: [349.  -4.]


In [28]:
u

array([349.,  -4.])

In [None]:
#@title Plot production data as predicted and reported or estimated in DPR report
fig = go.Figure(px.scatter(title="%s production estimates using Kalman filter" % (dprR.value + " " + dprC.value)))
fig.add_trace(go.Scatter(x=data.Month, y=Xp[:,0], mode='markers',name='PredictPR'))
fig.add_trace(go.Scatter(x=data.Month, y=data.PR, mode='markers',name='Reported'))
fig.add_trace(go.Scatter(x=data.Month, y=Xu[:,0], mode='lines+markers',name='UpdatePR'))
fig.layout.images = eiaLogo
fig.show()

In [None]:
#@title Plot predicted and estimated rate of production change as well as reported production change by DPR report
fig = go.Figure(px.scatter(title="Monthly changes in %s  production estimates using Kalman filter" % (dprR.value + " " + dprC.value)))
fig.add_trace(go.Scatter(x=data.Month, y=Xp[:,1], mode='markers',name='PredictPR'))
fig.add_trace(go.Scatter(x=data.Month, y=data.dPR, mode='markers',name='Reported'))
fig.add_trace(go.Scatter(x=data.Month, y=Xu[:,1], mode='lines+markers',name='UpdatePR'))
fig.layout.images = eiaLogo
fig.show()

In [None]:
dataX = data[['BH2MF','dBH2MF', 'PR']]
dataY = data['dPR']

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()
model.fit(dataX, dataY)
predictions = model.predict(dataX)

In [None]:
#print(predictions)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['Month'], y=predictions, name='Predictions'))
fig.add_trace(go.Scatter(x=data['Month'], y=data['dPR'], name='Data'))

# Add title and axis labels
fig.update_layout(title='Sample Line Plot', xaxis_title='X-axis', yaxis_title='Y-axis')

# Show the plot
fig.show()