In [465]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [466]:
import os
current_directory = os.getcwd()
while current_directory.endswith("Notebooks"):
    os.chdir("..")
    current_directory = os.getcwd()
    print("Current working directory: ", current_directory)

In [467]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import pickle 
from pathlib import Path

from torch.optim import Adam
from torch.utils.data import DataLoader
from torchinfo import summary
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC,SVR
from xgboost import XGBClassifier, XGBRegressor
from utils.filepaths.filepaths import FILE_PATH_FLOW_LOOP_DATA, FILE_PATH_BEST_MODELS_FEATURE_2, FILE_PATH_TEST_SET_CASE_102, FILE_PATH_BEST_MODELS_REGRESSION_FEATURE_2, FILE_PATH_TEST_SET_CASE_101, FILE_PATH_NN_FEATURE_2, FILE_PATH_NN_REGRESION_FEATURE_2
from utils.datasets.datasets import MLPDataset
from utils.training.training import k_fold_cross_validation, k_fold_cross_validation_sklearn_models
from utils.models.models import MLP
from utils.utils.utils import change_directory, define_threshold

In [468]:
FILE_PATH_BEST_MODELS = FILE_PATH_BEST_MODELS_FEATURE_2
FILE_PATH_BEST_MODELS_REGRESSION = FILE_PATH_BEST_MODELS_REGRESSION_FEATURE_2
FILE_PATH_NN = FILE_PATH_NN_FEATURE_2
FILE_PATH_NN_REGRESSION = FILE_PATH_NN_REGRESION_FEATURE_2

In [469]:
with open(FILE_PATH_FLOW_LOOP_DATA, 'r')as file:
    data = file.readlines()
    changed_data = []
    for line in data:
        changed_line = line.strip().split('\t')
        changed_data.append(changed_line)
        

In [470]:
df = pd.DataFrame(data=changed_data[1:], columns = changed_data[0])
df = df.drop(index = 0)

In [471]:
print(df.columns)
df = df[df["Cuttings conc quality"]=='1']
df = df.sample(frac=1).reset_index(drop=True)


Index(['mean File name', 'mean StartTime', 'mean EndTime', 'mean DPT1',
       'mean DPT2', 'mean DPT3', 'mean Torque', 'mean MudDensity',
       'mean Tankweight', 'mean TempTank', 'mean SandRate', 'mean Flow',
       'mean DPT4', 'mean DPR1', 'mean DPR2', 'mean TestSectionWeight',
       'mean SetPoint', 'mean USL', 'mean StringRPM',
       'mean relBedHeightTankWeight', 'mean relBedHeightTestSectionWeight',
       'mean fricCoeff', 'std DPT1', 'std DPT2', 'std DPT3', 'std Torque',
       'std MudDensity', 'std Tankweight', 'std TempTank', 'std SandRate',
       'std Flow', 'std DPT4', 'std DPR1', 'std DPR2', 'std TestSectionWeight',
       'std SetPoint', 'std USL', 'std StringRPM',
       'std relBedHeightTankWeight', 'std relBedHeightTestSectionWeight',
       'std fricCoeff', 'drift DPT1', 'drift DPT2', 'drift DPT3',
       'drift Torque', 'drift MudDensity', 'drift Tankweight',
       'drift TempTank', 'drift SandRate', 'drift Flow', 'drift DPT4',
       'drift DPR1', 'drift DPR

In [472]:
# No flow behavior index since I am not sure if I can use that for field data
# Friction coefficient had many nan values. See what to do about this later
# Might not need it as you probably do not have it for field
dict_of_variables = {
    "Slip ratio" : df["Particle slip ratio in test section"].astype("float32"),
    "Particle bed height" : df["mean relBedHeightTestSectionWeight"].astype('float32'),
    # "Eccentricity" : df["Eccentricity"].astype("float32"),
    # "Inclination" : df["Inclination"],
    "Reynolds / Taylor" : df["Reynolds annulus (no cuttings)"].astype("float32") / df["Taylor number"].astype("float32")**2,
    # "Shields" : df["Shields number"].astype("float32"),
    # "Froude" : df["Froude number"].astype("float32"),
    # 'Reynolds by Bingham' : df["Reynolds annulus (no cuttings)"].astype("float32") * df["Bingham number"].astype("float32"),
    'Taylor by Bingham' : df["Taylor number"].astype("float32") * df["Bingham number"].astype("float32")**2,
    'Reynolds / Friction fac' : df["Reynolds annulus (no cuttings)"].astype("float32") /  df["Friction factor mod annulus"].astype("float32")**2,
    # 'Reynolds by taylor' : df["Reynolds annulus (no cuttings)"].astype("float32") / df["Taylor number"].astype("float32")
    # "Friction coef" : df["mean fricCoeff"].astype("float32"),
}
dict_of_variables_regression = {
    "Relative bed height" : df["mean relBedHeightTestSectionWeight"].astype('float32'),
    # "Eccentricity" : df["Eccentricity"].astype("float32"),
    # "Inclination" : df["Inclination"],
    "Reynolds / Taylor" : df["Reynolds annulus (no cuttings)"].astype("float32") / df["Taylor number"].astype("float32")**2,
    # "Shields" : df["Shields number"].astype("float32"),
    # "Froude" : df["Froude number"].astype("float32"),
    # 'Reynolds by Bingham' : df["Reynolds annulus (no cuttings)"].astype("float32") * df["Bingham number"].astype("float32"),
    'Taylor by Bingham' : df["Taylor number"].astype("float32") * df["Bingham number"].astype("float32"),
    'Reynolds / Friction fac' : df["Reynolds annulus (no cuttings)"].astype("float32") /  df["Friction factor mod annulus"].astype("float32"),
    # 'Reynolds by taylor' : df["Reynolds annulus (no cuttings)"].astype("float32") / df["Taylor number"].astype("float32")
    # "Friction coef" : df["mean fricCoeff"].astype("float32"),
}
df = pd.DataFrame(dict_of_variables)
df_reg = pd.DataFrame(dict_of_variables_regression)

In [473]:
# Encoding fluid categorically
fluid_mapping = {'EMSVersatec' : 1.0, 'Glydril': 0.0}
# df["Fluid name"] = df["Fluid name"].map(fluid_mapping)
# Likewise for inclination
inclination_mapping = {'90' : 2.0, '60' : 1.0, '48': 0.0}
# df["Inclination"] = df["Inclination"].map(inclination_mapping)
# df_reg["Inclination"] = df_reg["Inclination"].map(inclination_mapping)
df = df[(df>0).all(axis=1)]
df_reg = df_reg[(df_reg>0).all(axis=1)]


In [474]:
# Modify values based on the condition
threshold = define_threshold(df["Slip ratio"].values,rel_bed_height=df["Particle bed height"], height_threshold=0.0575)
df["Slip ratio"] = df["Slip ratio"].apply(lambda x: 0 if x > threshold else 1)

In [475]:
df = df.drop(columns = "Particle bed height")
df = df.sample(frac = 1, random_state = 42).reset_index(drop = True)
df_reg = df_reg.sample(frac = 1, random_state = 42).reset_index(drop = True)

In [476]:
print(df.isna().sum())

Slip ratio                 0
Reynolds / Taylor          0
Taylor by Bingham          0
Reynolds / Friction fac    0
dtype: int64


In [477]:
X = df.drop("Slip ratio", axis = 1).to_numpy()
mean = X.mean(axis = 0)
std =X.std(axis = 0)
X = (X-mean) / std
y = df["Slip ratio"].to_numpy()
X_reg = df_reg.drop("Relative bed height", axis = 1).to_numpy()
X_reg = (X_reg - mean) / std
y_reg = df_reg["Relative bed height"].to_numpy()

In [478]:
change_directory()
mlp_path = FILE_PATH_NN
mlp_path_reg = FILE_PATH_NN_REGRESSION
mlp = MLP(in_features = len(X[1]), hidden_size=128,out_features=1)
mlp_reg= MLP(in_features = len(X[1]), hidden_size=128,out_features=1,task ="regression")
mlp.load_state_dict(torch.load(mlp_path))
mlp_reg.load_state_dict(torch.load(mlp_path_reg))

with FILE_PATH_BEST_MODELS.open('rb') as file:
    best_model_params = pickle.load(file)

with FILE_PATH_BEST_MODELS_REGRESSION.open('rb') as file:
    best_model_params_reg = pickle.load(file)

In [479]:
xgb = XGBClassifier(**best_model_params["best_params_xg"])
logreg = LogisticRegression(**best_model_params["best_params_logreg"])
rf = RandomForestClassifier(**best_model_params["best_params_rf"])
svc = SVC(**best_model_params["best_params_svc"])

xgb_reg = XGBRegressor(**best_model_params_reg["best_params_xg"])
rf_reg = RandomForestRegressor(**best_model_params_reg["best_params_rf"])
svr = SVR(**best_model_params_reg["best_params_svr"])

In [480]:
xgb.fit(X,y)
logreg.fit(X,y)
rf.fit(X,y)
svc.fit(X,y)

xgb_reg.fit(X_reg,y_reg)
rf_reg.fit(X_reg,y_reg)
svr.fit(X_reg,y_reg)

In [481]:
def test_for_case(filepath, case_name, classification = True):
    df = pd.read_pickle(filepath)
    X_test = df.to_numpy()
    X_test[0] = X_test[0] * 0.8
    X_test[1] = X_test[1] * 1.1 
    X_test[2] = X_test[2] * 1.5 
    X_test = ( X_test - mean ) /  std  
    X_test_mlp = torch.tensor(X_test,dtype = torch.float32)
    print(X_test)

    if classification:
        yhat_nn = mlp(X_test_mlp).detach()
        yhat_nn = torch.where(
            yhat_nn > 0.5,
            torch.tensor(1, dtype=torch.int32),
            torch.tensor(0, dtype=torch.int32),
        )
        yhat_xg = xgb.predict(X_test)
        yhat_logreg = logreg.predict(X_test)
        yhat_rf = rf.predict(X_test)
        yhat_svm = svc.predict(X_test)
    else:
        yhat_nn = mlp_reg(X_test_mlp).detach()
        yhat_xg = xgb_reg.predict(X_test)
        yhat_rf = rf_reg.predict(X_test)
        yhat_svm = svr.predict(X_test)
    
    print(f"Predictions for {case_name} neural network: {yhat_nn.T} ")
    print(f"Predictions for {case_name} xgboost: {yhat_xg} ")
    if classification:
        print(f"Predictions for {case_name} logreg: {yhat_logreg} ")
    print(f"Predictions for {case_name} random forest: {yhat_rf}")
    print(f"Predictions for {case_name} support vector machine: {yhat_svm} ")
        


    

In [482]:
test_for_case(FILE_PATH_TEST_SET_CASE_101, case_name = "case 101")

[[-0.13726564 -0.4879135  -0.43726049]
 [-0.13745873 -0.3979454  -0.34615001]
 [-0.13729239 -0.34483791 -0.26518635]]
Predictions for case 101 neural network: tensor([[1, 1, 1]], dtype=torch.int32) 
Predictions for case 101 xgboost: [1 1 1] 
Predictions for case 101 logreg: [1 1 1] 
Predictions for case 101 random forest: [1 0 0]
Predictions for case 101 support vector machine: [1 1 1] 


In [483]:
test_for_case(FILE_PATH_TEST_SET_CASE_101, case_name="case 101", classification = False)

[[-0.13726564 -0.4879135  -0.43726049]
 [-0.13745873 -0.3979454  -0.34615001]
 [-0.13729239 -0.34483791 -0.26518635]]
Predictions for case 101 neural network: tensor([[0.0916, 0.0733, 0.0631]]) 
Predictions for case 101 xgboost: [0.09014254 0.09014254 0.09014254] 
Predictions for case 101 random forest: [0.12295987 0.09359297 0.09828425]
Predictions for case 101 support vector machine: [0.12967202 0.12570351 0.1224805 ] 


In [484]:
test_for_case(FILE_PATH_TEST_SET_CASE_102, case_name="case 102")

[[-0.13755068 -0.36839323 -0.29804109]
 [-0.13737081 -0.30355311 -0.12214902]
 [-0.13719078 -0.18891164  0.11196136]
 [-0.1374761  -0.31281721 -0.31595679]
 [-0.13744239 -0.33295719 -0.32095001]]
Predictions for case 102 neural network: tensor([[1, 1, 1, 1, 1]], dtype=torch.int32) 
Predictions for case 102 xgboost: [1 1 1 1 1] 
Predictions for case 102 logreg: [1 1 1 1 1] 
Predictions for case 102 random forest: [0 0 0 0 0]
Predictions for case 102 support vector machine: [1 1 1 1 1] 


In [485]:

test_for_case(FILE_PATH_TEST_SET_CASE_102, case_name="case 102", classification = False)

[[-0.13755068 -0.36839323 -0.29804109]
 [-0.13737081 -0.30355311 -0.12214902]
 [-0.13719078 -0.18891164  0.11196136]
 [-0.1374761  -0.31281721 -0.31595679]
 [-0.13744239 -0.33295719 -0.32095001]]
Predictions for case 102 neural network: tensor([[0.0674, 0.0510, 0.0349, 0.0619, 0.0645]]) 
Predictions for case 102 xgboost: [0.09014254 0.09014254 0.09014254 0.09014254 0.09014254] 
Predictions for case 102 random forest: [0.09229195 0.09071772 0.09620087 0.09037614 0.09291425]
Predictions for case 102 support vector machine: [0.12380989 0.11737506 0.10849148 0.12376684 0.12415542] 
