In [3]:
from re import L
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import RobustScaler
from sklearn.multioutput import MultiOutputRegressor


In [5]:
df_rev = pd.read_excel('G:\My Drive\Drilling\Drilling_Data.xls')

df_rev = df_rev.drop(
    ['MD', 'WOB', 'TRQ', 'SPP', 'Flow In', 'SPM', 'Actpitv', 'HOOKLOAD', 'CSIP',
     'P ovb For Soft formation', 'P ovb For Hard formation', 'P ovb', 'd',
     'D adjust', 'PP Eaton Method'], axis=1
)
df_rev = df_rev.drop(0, axis=0)
df_rev = df_rev.reset_index(drop=True)


numeric_columns = ['ROP', 'ROP.1', 'WOB.1', 'BIT_RPM', 'MWI', 'MWO',
                   'PP Zamora Method']

for column in numeric_columns:
    df_rev[column] = pd.to_numeric(df_rev[column], errors='coerce')

In [6]:
# Define the input (X) and target outputs (Y)
X = df_rev[['PP Zamora Method']]  
Y = df_rev[['ROP', 'ROP.1', 'WOB.1', 'BIT_RPM', 'MWI', 'MWO']]  # Multiple targets

# Log Transformation (on both X and Y to reduce skewness)
X_log = np.log1p(X)
Y_log = np.log1p(Y)

# Robust Scaling (to normalize features and targets)
x_scaler = RobustScaler()
y_scaler = RobustScaler()

X_scaled = x_scaler.fit_transform(X_log)
Y_scaled = y_scaler.fit_transform(Y_log)

# Split into train/test sets
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_scaled, test_size=0.3, random_state=42)

# Model 1: Linear Regression for Multi-Output
lr = MultiOutputRegressor(LinearRegression())
lr.fit(X_train, Y_train)
Y_pred_lr = lr.predict(X_test)

print("\nLinear Regression Performance (with Transformation and Scaling):")
for i, col in enumerate(Y.columns):
    print(f"{col} - MSE: {mean_squared_error(Y_test[:, i], Y_pred_lr[:, i]):.6f}, "
          f"R2 Score: {r2_score(Y_test[:, i], Y_pred_lr[:, i]):.6f}")

# Model 2: Random Forest Regressor for Multi-Output
rf = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
rf.fit(X_train, Y_train)
Y_pred_rf = rf.predict(X_test)

print("\nRandom Forest Performance (with Transformation and Scaling):")
for i, col in enumerate(Y.columns):
    print(f"{col} - MSE: {mean_squared_error(Y_test[:, i], Y_pred_rf[:, i]):.6f}, "
          f"R2 Score: {r2_score(Y_test[:, i], Y_pred_rf[:, i]):.6f}")

# Predictions for New Data
new_pp_zamora = [[0.9]]  
new_pp_zamora_transformed = x_scaler.transform(np.log1p(new_pp_zamora))

new_predictions_lr = lr.predict(new_pp_zamora_transformed)
new_predictions_rf = rf.predict(new_pp_zamora_transformed)

# Inverse transformations to interpret predictions
new_predictions_lr_original = np.expm1(y_scaler.inverse_transform(new_predictions_lr))
new_predictions_rf_original = np.expm1(y_scaler.inverse_transform(new_predictions_rf))

print("\nPredictions for New PP Zamora Method (Linear Regression):")
print(f"ROP: {new_predictions_lr_original[0][0]}, ROP.1: {new_predictions_lr_original[0][1]}, "
      f"WOB.1: {new_predictions_lr_original[0][2]}, BIT_RPM: {new_predictions_lr_original[0][3]}, "
      f"MWI: {new_predictions_lr_original[0][4]}, MWO: {new_predictions_lr_original[0][5]}")

print("\nPredictions for New PP Zamora Method (Random Forest):")
print(f"ROP: {new_predictions_rf_original[0][0]}, ROP.1: {new_predictions_rf_original[0][1]}, "
      f"WOB.1: {new_predictions_rf_original[0][2]}, BIT_RPM: {new_predictions_rf_original[0][3]}, "
      f"MWI: {new_predictions_rf_original[0][4]}, MWO: {new_predictions_rf_original[0][5]}")



Linear Regression Performance (with Transformation and Scaling):
ROP - MSE: 0.430557, R2 Score: 0.004752
ROP.1 - MSE: 0.727949, R2 Score: 0.004136
WOB.1 - MSE: 0.411304, R2 Score: 0.045215
BIT_RPM - MSE: 0.628274, R2 Score: 0.013334
MWI - MSE: 8.063806, R2 Score: 0.059250
MWO - MSE: 15.066460, R2 Score: 0.033940

Random Forest Performance (with Transformation and Scaling):
ROP - MSE: 0.250694, R2 Score: 0.420513
ROP.1 - MSE: 0.454569, R2 Score: 0.378131
WOB.1 - MSE: 0.223299, R2 Score: 0.481642
BIT_RPM - MSE: 0.167683, R2 Score: 0.736664
MWI - MSE: 1.459805, R2 Score: 0.829694
MWO - MSE: 2.727557, R2 Score: 0.825109

Predictions for New PP Zamora Method (Linear Regression):
ROP: -0.9832572192075889, ROP.1: 1.9152253570758528, WOB.1: 455437283.2528059, BIT_RPM: -0.5698821543020106, MWI: -0.794391806323378, MWO: 0.23815997108081133

Predictions for New PP Zamora Method (Random Forest):
ROP: 8.135196509843972, ROP.1: 0.12579116849454058, WOB.1: 3.2614415727301838, BIT_RPM: 198.5880358432



In [7]:
# Example: Input a new value for PP Zamora Method
new_pp_zamora_value = [[8.586818]]

# Log Transform and Scale the Input
new_pp_zamora_transformed = x_scaler.transform(np.log1p(new_pp_zamora_value))

# Get Predictions from Each Model
# Linear Regression Prediction
linear_pred = lr.predict(new_pp_zamora_transformed)
linear_pred_original = np.expm1(y_scaler.inverse_transform(linear_pred))  # Inverse transform to original scale

# Random Forest Prediction
rf_pred = rf.predict(new_pp_zamora_transformed)
rf_pred_original = np.expm1(y_scaler.inverse_transform(rf_pred))  # Inverse transform to original scale

# Display Predictions
print("\nPredictions for New PP Zamora Method Value:")
print(f"Input PP Zamora Method: {new_pp_zamora_value[0][0]}")

print("\nLinear Regression Predictions:")
for i, col in enumerate(Y.columns):
    print(f"{col}: {linear_pred_original[0][i]:.6f}")

print("\nRandom Forest Predictions:")
for i, col in enumerate(Y.columns):
    print(f"{col}: {rf_pred_original[0][i]:.6f}")





Predictions for New PP Zamora Method Value:
Input PP Zamora Method: 8.586818

Linear Regression Predictions:
ROP: 8.134727
ROP.1: 0.170041
WOB.1: 3.642256
BIT_RPM: 134.766602
MWI: 80.745101
MWO: 81.526873

Random Forest Predictions:
ROP: 5.621921
ROP.1: 0.203073
WOB.1: 2.404094
BIT_RPM: 56.168538
MWI: 64.519585
MWO: 65.144585
