In [None]:
# Model agnostic 
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import pandas as pd
from typing import Optional, List, Callable, Dict, Any, List
from pathlib import Path
from utils import Helm  # custom model for data handling/model trianing
from tabpfn import TabPFNClassifier, TabPFNRegressor
from tabpfn.constants import ModelVersion

In [None]:
# Get the directory this file lives in
nb_dir = Path.cwd() # notebook directory
project_root = nb_dir.parents[0] # project directory
data_path = project_root / "datasets" / "processed_well_data.csv"

includ_cols = ['Dia', 'Dev(deg)','Area (m2)', 'z','GasDens','LiquidDens', 'P/T','friction_factor', 'critical_film_thickness']
D = Helm(path=data_path, includ_cols=includ_cols, test_size=0.20)

In [None]:
# user will need to provide hugging face authentication code
def transformer(
        hparams: Dict[str, Any]
):
    transformer = TabPFNRegressor(**hparams)

    return transformer

hparam_grid = {
    "n_estimators":          [8, 12, 14],
    "softmax_temperature":   [0.2, 0.5, 0.9],
}

# train model and optimize hyperparameters via grid search 
trained_model = D.evolv_model(build_model=transformer, hparam_grid=hparam_grid, k_folds=5)

Training model and optimizing hyperparameters via k-fold CV...
  y = column_or_1d(y, warn=True)


In [19]:
import pandas as pd



y_pred_scaled = trained_model.predict(D.scaler_X.transform(D.X))
y_pred = D.scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

# Assuming y_pred and x1 are 1D arrays or lists of equal length
df = pd.DataFrame({
    'GasFlowrate': D.gsflow,
    'y_pred': y_pred
})

df.to_csv('output_tadpfn.csv', index=False)