In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
import feyn

# Model agnostic 
from typing import Optional, List, Callable, Dict, Any, List
from pathlib import Path
from utils import ChiefBldr, QLatticeWrapper

In [5]:
# Get the directory this file lives in
nb_dir = Path.cwd() # notebook directory
project_root = nb_dir.parents[0] # project directory
data_path = project_root / "datasets" / "processed_well_data.csv"

includ_cols = ['Dia', 'Dev(deg)','Area (m2)', 'z','GasDens','LiquidDens', 'P/T','friction_factor', 'critical_film_thickness']
D = ChiefBldr(path=data_path, includ_cols=includ_cols, test_size=0.20)

In [6]:
# define xgboost pipeline
def qlattice(
        hparams: Dict[str,Any]
):
    ql_wrap = QLatticeWrapper(
        feature_tags=includ_cols, 
        **hparams,
    )

    return ql_wrap

hparam_grid = {
            "max_complexity":   [5],
            "n_epochs":         [5],
        }
# train model and optimize hyperparameters via grid search 
trained_model = D.evolv_model(build_model=qlattice, hparam_grid=hparam_grid, k_folds=5)

# output equation 
print(trained_model.express())

3.04894*tanh(0.670265*Area (m2) + 0.743208) - 1.56114


Training set score: 0.5602409638554217
Test set score: 0.5714285714285714
