In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
import feyn

# Model agnostic 
from typing import Optional, List, Callable, Dict, Any, List
from pathlib import Path
from utils import Helm, QLatticeWrapper

In [11]:
# Get the directory this file lives in
nb_dir = Path.cwd() # notebook directory
project_root = nb_dir.parents[0] # project directory
data_path = project_root / "datasets" / "processed_well_data.csv"

includ_cols = ['Dia', 'Dev(deg)','Area (m2)', 'z','GasDens','LiquidDens', 'P/T','friction_factor', 'critical_film_thickness']
D = Helm(path=data_path, includ_cols=includ_cols, test_size=0.20)

In [12]:
# define xgboost pipeline
def qlattice(
        hparams: Dict[str,Any]
):
    ql_wrap = QLatticeWrapper(
        feature_tags=includ_cols, 
        **hparams,
    )

    return ql_wrap

hparam_grid = {
            "max_complexity":   [10, 15, 25],
            "n_epochs":         [10, 15, 20],
        }
# train model and optimize hyperparameters via grid search 
trained_model = D.evolv_model(build_model=qlattice, hparam_grid=hparam_grid, k_folds=5)

# output equation 
print(trained_model.express())

1.51643*tanh(0.861602*Area (m2) + 0.206034*P/T + 0.254327*criticalfilmthickness + 0.337221) - 0.276102


Best CV Classification Accuracy = 0.6804 Â± 0.0744
Best Hyperparameters: {'max_complexity': 15, 'n_epochs': 10}
Test Classification Accuracy = 0.6429
Test Regression: RMSE=82626.4154, MAE=70653.8855, R2=0.5396


In [4]:
import pandas as pd


y_pred_scaled = trained_model.predict(D.scaler_X.transform(D.X))
y_pred = D.scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Assuming y_pred and x1 are 1D arrays or lists of equal length
df = pd.DataFrame({
    'GasFlowrate': D.gsflow,
    'y_pred': y_pred
})

df.to_csv('output_qlattice.csv', index=False)