In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Load the df file
df = pd.read_csv("./Data/labeled_data.csv", sep=";")

In [2]:
#  pick distinct well names
well_names = df["WELL"].unique()

In [3]:
columns = ['DEPTH_MD', 'RDEP', 'RMED', 'RXO','NPHI','RHOB','DRHO','SP']

best_well = None
best_well_score = 0

# find the well with least missing columns
for well in well_names:
    well_df = df[df["WELL"] == well]
    well_df = well_df[columns]
    well_df = well_df.dropna()
    score = len(well_df)
    if score > best_well_score:
        best_well = well
        best_well_score = score

In [4]:
best_well

'30/6-5'

In [5]:
#  pick a well
well_original = df[df['WELL'] == '30/6-5']

In [6]:
# select the columns 
well = well_original[['DEPTH_MD', 'RDEP', 'RMED', 'RXO','NPHI','RHOB','DRHO','SP']].copy()
well.shape

(21636, 8)

In [7]:
well.isnull().sum()


DEPTH_MD       0
RDEP           0
RMED           0
RXO           93
NPHI        4240
RHOB        4240
DRHO        4240
SP             0
dtype: int64

In [8]:
well.dropna(inplace=True)

## Archie's Equation

In [9]:
# Get the necessary logs
RDEP = well["RDEP"]
RMED = well["RMED"]
RXO = well["RXO"]
NPHI = well["NPHI"]
RHOB = well["RHOB"]
DRHO = well["DRHO"]
SP = well["SP"]


# Define the constants
Rw = np.exp(2.25 - 0.75 * np.log10(SP))
n = 2.0  # cementation exponent
a = 1.0  # saturation exponent
m = 2.0  # formation factor exponent

# Calculate the effective porosity
phi = (NPHI - DRHO) / (1 - DRHO)

# Calculate the true resistivity of the formation
Rt = np.where(RMED > 0, RMED, RDEP)
Rt = np.where(RXO > 0, RXO, Rt)

# Apply Archie's equation to calculate the water saturation
Sw = ((a*Rw)/(phi**n * Rt))**(1/m)

# Calculate the water content
water_content = Sw * 100

# Limit the water content to a maximum of 100%
water_content = np.where(water_content > 100, 100, water_content)

water_content_copy = water_content.copy()
# drop nan values
water_content_copy = water_content_copy[~np.isnan(water_content_copy)]

overall_water_content = np.mean(water_content_copy)

# # Print the results
# print("Depth (m)    Water Content (%)")
# for depth, wc in zip(well["DEPTH_MD"], water_content):
#     print("{:.2f}    {:.2f}".format(depth, wc))

print("Overall water content: {:.2f}%".format(overall_water_content))


Overall water content: 99.95%


In [10]:
well_depth = well["DEPTH_MD"].max() - well["DEPTH_MD"].min()
print("Well depth range: {:.2f}".format(well_depth))

Well depth range: 2646.47


In [11]:
(1 - overall_water_content/100) * well_depth

1.4189128693216961

This means that 0.05% of the well isn't water, which can contain oil or gas and according to the well depth we can expect the oil to exist in a part of 1.4 meter (which is not bad)

To find the exact spots:

In [12]:
# print the depths with water content less than 50%
print("Depth (m)    Water Content (%)")
for depth, wc in zip(well["DEPTH_MD"], water_content):
    if wc < 90:
        print("{:.2f}    {:.2f}".format(depth, wc))

Depth (m)    Water Content (%)
902.16    39.85
902.31    63.09
902.47    87.01
902.62    85.47
2510.32    82.30
2630.71    84.26
2637.24    74.71
2739.39    85.51
2836.36    88.46
2836.51    76.82
3019.07    58.05
3019.22    41.67
3019.52    88.03
3019.67    88.49
3241.90    76.21
3242.05    64.46
3242.20    63.74
3242.35    65.13
3243.87    60.53
3244.03    64.12
3244.18    68.84
3244.33    63.89
3244.48    62.20
3244.63    66.99
3244.79    68.00
3244.94    64.90
3245.09    67.28
3245.24    73.18
3245.39    83.47


In [13]:
well_original[["DEPTH_MD", "RDEP",  "RHOB", "DRHO", 'RSHA',"SP",'LITHOLOGY']].isnull().sum()

DEPTH_MD        0
RDEP            0
RHOB         4240
DRHO         4240
RSHA           93
SP              0
LITHOLOGY       0
dtype: int64

In [14]:
# we can use our model to predict the lithology of these points
import pickle
model = pickle.load(open("model.pkl", "rb"))

from sklearn.ensemble import RandomForestClassifier

points = well_original[["DEPTH_MD", "RDEP",  "RHOB", "DRHO", 'RSHA',"SP","GR",'LITHOLOGY']].copy()
points = points.dropna()

from utils import rename_columns

rename_columns(points)


In [15]:
points['LITHOLOGY'] = points['LITHOLOGY'].map({30000: 0,    65030: 1,    65000: 2,    80000: 3,
                                                         74000: 4,    70000: 5,    70032: 6,    88000: 7,    86000: 8,    99000: 9,    90000: 10,    93000: 11,
                                                         })

In [16]:
lithology_map = {
    0: 'Sandstone',
    1: 'Sandstone/Shale',
    2: 'Shale',
    3: 'Marl',
    4: 'Dolomite',
    5: 'Limestone',
    6: 'Chalk',
    7: 'Halite',
    8: 'Anhydrite',
    9: 'Tuff',
    10: 'Coal',
    11: 'Basement'
}

In [17]:
# predicte the lithology at the points with water content less than 50%
for depth, wc in zip(well["DEPTH_MD"], water_content):
    if wc < 90:
        point = points[points['Measured Depth'] == depth]
        point = point.dropna()
        point = point.drop(columns=['Measured Depth', "LITHOLOGY"])
        lithology = model.predict(point)
        #  map the lithology to the lithology name
        lithology = lithology_map[lithology[0]]
        print("{:.2f}    {}".format(depth, lithology))

902.16    Limestone
902.31    Limestone
902.47    Limestone
902.62    Limestone
2510.32    Sandstone
2630.71    Sandstone
2637.24    Sandstone
2739.39    Limestone
2836.36    Sandstone
2836.51    Sandstone
3019.07    Chalk
3019.22    Chalk
3019.52    Chalk
3019.67    Chalk
3241.90    Chalk
3242.05    Chalk
3242.20    Chalk
3242.35    Chalk
3243.87    Chalk
3244.03    Chalk
3244.18    Chalk
3244.33    Chalk
3244.48    Chalk
3244.63    Chalk
3244.79    Chalk
3244.94    Chalk
3245.09    Chalk
3245.24    Chalk
3245.39    Chalk


The ones with sandstone are the most likely to find hydrocarbons in 