In [1]:
!pip install pandas
!pip install matplotlib
!pip install numpy
!pip install scikit-learn
!pip install tensorflow
!pip install seaborn
!pip install lasio
!pip install joblib
!pip install ydata-profiling



In [2]:
import tensorflow as tf
import joblib


# Load the ensemble configuration
final_models = joblib.load('deep_ensemble.pkl')

# Load each model in the ensemble
for i in range(len(final_models)):
    model_path = f'ensemble_model_{i+1}.h5'
    final_models[i] = tf.keras.models.load_model(model_path)

print("Deep Ensemble Models Loaded.")

FileNotFoundError: [Errno 2] No such file or directory: 'deep_ensemble.pkl'

In [None]:
import lasio
import pandas as pd
from ydata_profiling import ProfileReport

In [None]:
file_paths = [
    'Project_well_logs/TMG-1-logs.las',  
   #'Project_well_logs/TMG-07-LOGS.LAS',
    #'Data/Well logs/TMB-4-LOGS.LAS',
    #'Data/Well logs/TMG-02-logs.LAS'
]

In [None]:
# Initialize an empty list to store DataFrames
dataframes = []

for file_path in file_paths:
    print(f"Loading {file_path}...")
    
    # Load the LAS file
    las = lasio.read(file_path)
    
    # Convert LAS data to a Pandas DataFrame
    df = las.df()
    
    # Reset index to make 'DEPT' (or equivalent depth column) a regular column
    df.reset_index(inplace=True)
    
    # Add a column to identify the well
    well_name = file_path.split('.')[0]  # Extract well name from file name
    df['Well'] = well_name
    
    # Append to the list of DataFrames
    dataframes.append(df)

# Concatenate all DataFrames
combined_data = pd.concat(dataframes, ignore_index=True)

# Display the combined DataFrame
print("Combined Data:")
print(combined_data.head())
print(f"Shape of Combined Data: {combined_data.shape}")

# Generate a profile report
profile = ProfileReport(combined_data, title="Well Log Data Profiling Report", explorative=True)

# Display the report in Jupyter Notebook
profile.to_notebook_iframe()

In [None]:
# Step 1: Drop rows with missing values
combined_data_cleaned = combined_data.dropna()

# Step 2: Drop rows with zero values in specified columns
columns_to_check = ['IMPEDANCE', 'DT', 'PHI', 'RHOB']  # Add other columns as needed
mask = combined_data_cleaned[columns_to_check].eq(0).any(axis=1)
combined_data_cleaned = combined_data_cleaned[~mask]

# Check the final shape of the cleaned data
print(f"Final Shape After Cleaning: {combined_data_cleaned.shape}")

In [None]:
# Check for missing values
missing_values = combined_data_cleaned.isnull().sum()
print("Missing Values After Cleaning:")
print(missing_values)

# Check for zero values in specified columns
zero_values = (combined_data_cleaned[columns_to_check] == 0).sum()
print("\nZero Values After Cleaning:")
print(zero_values)

In [None]:
%matplotlib inline

In [None]:
# Save the cleaned data to a CSV file
combined_data_cleaned.to_csv('cleaned_well_data.csv', index=False)
print("Cleaned data saved to 'cleaned_well_data.csv'")

In [None]:
gr_min = combined_data_cleaned['GR'].min()
gr_max = combined_data_cleaned['GR'].max()
combined_data_cleaned['V_sh'] = (combined_data_cleaned['GR'] - gr_min) / (gr_max - gr_min)

In [None]:
rho_ma = 2.65  # Matrix density (g/cm3)
rho_fluid = 1.0  # Fluid density (g/cm3)
combined_data_cleaned['Density_Porosity'] = (rho_ma - combined_data_cleaned['RHOB']) / (rho_ma - rho_fluid)

In [None]:
dt_shale = 189  # Shale travel time in μs/ft
dt_matrix = 55   # Matrix travel time in μs/ft
combined_data_cleaned['Sonic_Porosity'] = (combined_data_cleaned['DT'] - dt_matrix) / (dt_shale - dt_matrix)

In [None]:
combined_data_cleaned['Velocity'] = combined_data_cleaned['IMPEDANCE'] / combined_data_cleaned['RHOB']

In [None]:
amplitude_min = combined_data_cleaned['AMPLITUDE'].min()
amplitude_max = combined_data_cleaned['AMPLITUDE'].max()
combined_data_cleaned['Amplitude_Norm'] = (combined_data_cleaned['AMPLITUDE'] - amplitude_min) / (amplitude_max - amplitude_min)

In [None]:
frequency_min = combined_data_cleaned['FREQUENCY'].min()
frequency_max = combined_data_cleaned['FREQUENCY'].max()
combined_data_cleaned['Frequency_Norm'] = (combined_data_cleaned['FREQUENCY'] - frequency_min) / (frequency_max - frequency_min)

In [None]:
combined_data_cleaned['Velocity_Frequency'] = combined_data_cleaned['Velocity'] * combined_data_cleaned['Frequency_Norm']

In [None]:
combined_data_cleaned['GR_Impedance'] = combined_data_cleaned['GR'] * combined_data_cleaned['IMPEDANCE']

In [None]:
window_size = 5  # Adjust as needed

# Apply moving average to GR
combined_data_cleaned['GR_MA'] = combined_data_cleaned['GR'].rolling(window=window_size, center=True).mean()

# Apply moving average to RHOB
combined_data_cleaned['RHOB_MA'] = combined_data_cleaned['RHOB'].rolling(window=window_size, center=True).mean()

# Apply moving average to DT
combined_data_cleaned['DT_MA'] = combined_data_cleaned['DT'].rolling(window=window_size, center=True).mean()

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(combined_data_cleaned['DEPT'], combined_data_cleaned['GR'], label='Original GR', color='blue', alpha=0.5)
plt.plot(combined_data_cleaned['DEPT'], combined_data_cleaned['GR_MA'], label='Moving Average', color='red')
plt.gca().invert_yaxis()  # Invert y-axis for depth
plt.xlabel('Depth (m)')
plt.ylabel('Gamma Ray (API)')
plt.title('Gamma Ray with Moving Average')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(combined_data_cleaned['DEPT'], combined_data_cleaned['DT'], label='Original DT', color='blue', alpha=0.5)
plt.plot(combined_data_cleaned['DEPT'], combined_data_cleaned['DT_MA'], label='Moving Average', color='red')
plt.gca().invert_yaxis()  # Invert y-axis for depth
plt.xlabel('Depth (m)')
plt.ylabel('DT (API)')
plt.title('DT with Moving Average')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(combined_data_cleaned['DEPT'], combined_data_cleaned['RHOB'], label='Original RHOB', color='blue', alpha=0.5)
plt.plot(combined_data_cleaned['DEPT'], combined_data_cleaned['RHOB_MA'], label='Moving Average', color='red')
plt.gca().invert_yaxis()  # Invert y-axis for depth
plt.xlabel('Depth (m)')
plt.ylabel('RHOB (API)')
plt.title('RHOB with Moving Average')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import numpy as np
# Compute gradient of GR with respect to depth
combined_data_cleaned['GR_Gradient'] = np.gradient(combined_data_cleaned['GR'], combined_data_cleaned['DEPT'])

# Compute gradient of RHOB with respect to depth
combined_data_cleaned['RHOB_Gradient'] = np.gradient(combined_data_cleaned['RHOB'], combined_data_cleaned['DEPT'])

# Compute gradient of DT with respect to depth
combined_data_cleaned['DT_Gradient'] = np.gradient(combined_data_cleaned['DT'], combined_data_cleaned['DEPT'])

In [None]:
combined_data_cleaned['Amplitude_Impedance'] = combined_data_cleaned['Amplitude_Norm'] * combined_data_cleaned['IMPEDANCE']
combined_data_cleaned['Frequency_Impedance'] = combined_data_cleaned['Frequency_Norm'] * combined_data_cleaned['IMPEDANCE']

In [None]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np

X = combined_data_cleaned.drop(columns=['PHI', 'DEPT', 'Well'], errors='ignore') 
y = combined_data_cleaned['PHI']  


scaler_X = StandardScaler()
X_normalized = scaler_X.fit_transform(X)

# Step 3: Scale the target variable (PHI) using MinMaxScaler
# Porosity values should be scaled to a range of [0, 1] for better interpretability
scaler_y = MinMaxScaler(feature_range=(0, 1))
y_normalized = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Step 4: Combine normalized data into a single DataFrame (optional)
combined_data_normalized = pd.DataFrame(X_normalized, columns=X.columns)
combined_data_normalized['PHI'] = y_normalized  # Add the normalized target variable back

# Inspect the normalized data
print("Normalized Data Overview:")
print(combined_data_normalized.head())

In [None]:
# Check for missing values
missing_values = combined_data_normalized.isnull().sum()
print("Missing Values After Normalizing:")
print(missing_values)

# Check for zero values in specified columns
zero_values = (combined_data_normalized[columns_to_check] == 0).sum()
print("\nZero Values After Normalizing:")
print(zero_values)

In [None]:
# Step 1: Drop rows with missing values
combined_data_normalized = combined_data_normalized.dropna()

# Step 2: Drop rows with zero values in specified columns
columns_to_check = ['GR_MA','GR_Gradient','V_sh']  
mask = combined_data_normalized[columns_to_check].eq(0).any(axis=1)
combined_data_normalized = combined_data_normalized[~mask]

# Check the final shape of the cleaned data
print(f"Final Shape After Cleaning: {combined_data_normalized.shape}")

In [None]:
# Define input features (X) and target variable (y)
target_column = 'PHI' 

X = combined_data_normalized.drop(columns=[target_column, 'Well', 'DEPT'], errors='ignore')  
y = combined_data_normalized[target_column]

# Check the shapes of X and y
print("\nInput Features Shape:", X.shape)
print("Target Variable Shape:", y.shape)


In [None]:
from sklearn.metrics import mean_squared_error
# Make predictions using the ensemble
y_preds = np.array([model.predict(X).flatten() for model in final_models])
y_pred_mean = y_preds.mean(axis=0)
y_pred_std = y_preds.std(axis=0)

# Evaluate performance
mse = mean_squared_error(y, y_pred_mean)
print(f"Final Mean Squared Error: {mse}")

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.fill_between(range(len(y_pred_mean)), y_pred_mean - 2 * y_pred_std, y_pred_mean + 2 * y_pred_std, color='green', alpha=1, label='95% CI')
plt.plot(y_pred_mean, label='True Porosity', color='blue')
plt.plot(y, label='Predicted Porosity', color='red')


plt.legend()
plt.title('Porosity Prediction with Uncertainty (Deep Ensemble)')
plt.show()

In [None]:
y

In [None]:
y_pred_mean