In [None]:
%load_ext autoreload
%autoreload 2
import sys, warnings, time, numpy, yaml
sys.path.append("../src/") # go to parent dir
from main import get_X, get_y, get_score, get_train_test
# from models import get_model_benchmark1, get_model_benchmark2, show_importance
from models.model import Model
from models.factory import ModelFactory
from models.benchmark import Benchmark1, Benchmark2
warnings.filterwarnings('ignore')

In [None]:
train_data = get_X('train')
train_scores = get_y()
test_data = get_X('test')
X_train, y_train, X_test, y_test, X_valid, y_valid, target = get_train_test(train_size=0.8, random_state=42)

In [None]:
%load_ext autoreload
%autoreload 2
benchmark1 = Benchmark1(X_train, y_train, X_valid, y_valid, train_scores)
benchmark1.train()
benchmark1.evaluate(X_test)
# benchmark1.save(test_data)

In [None]:
%load_ext autoreload
%autoreload 2
benchmark2 = Benchmark2(X_train, y_train, X_valid, y_valid, train_scores)
benchmark2.train()
benchmark2.evaluate(X_test)

In [None]:
benchmark2.save(test_data)

In [None]:
params_by_name={
    'benchmark2': {
          'booster': 'gbtree',
          'tree_method':'hist',
          'max_depth': 8, 
          'learning_rate': 0.025,
          'objective': 'multi:softprob',
          'num_class': 2,
          'eval_metric':'mlogloss'
        },
    'benchmark3': {
          'booster': 'gblinear',
          'tree_method':'hist',
          'max_depth': 8, 
          'learning_rate': 0.025,
          'objective': 'multi:softprob',
          'num_class': 2,
          'eval_metric':'mlogloss'
        },
}
for name, params in params_by_name.items():
    benchmark2 = Benchmark2(X_train, y_train, X_valid, y_valid, train_scores, params)
    benchmark2.train()
    benchmark2.evaluate(X_test)
    benchmark2.name = name
    benchmark2.save(test_data)

In [None]:
%load_ext autoreload
%autoreload 2
def get_params(booster, tree_method, eval_metric='mlogloss'):
    return {
          'booster': booster,
          'tree_method': tree_method,
          'max_depth': 8, 
          'learning_rate': 0.025,
          'objective': 'multi:softprob',
          'num_class': 2,
          'eval_metric':eval_metric
        }
# boosters = ['gbtree', 'gblinear', 'dart']
boosters = ['gblinear']
# tree_methods = ['auto', 'exact', 'approx', 'hist']
tree_methods = ['hist']
# eval_metrics = ['mphe', 'merror', 'mlogloss', 'auc']
eval_metrics = ['mlogloss']
for booster in boosters:
    for tree_method in tree_methods:
        for eval_metric in eval_metrics:
          start = time.time()
          name = f'{booster}_{tree_method}_{eval_metric}'
          params = get_params(booster, tree_method, eval_metric)
          benchmark2 = Benchmark2(X_train, y_train, X_valid, y_valid, train_scores, params)
          benchmark2.train()
          score = benchmark2.evaluate(X_test)
          end = time.time()
          print(f'{name}={score} in {numpy.round((end-start), 2)}s')

In [None]:
from importlib import import_module

class_str: str = 'models.benchmark.Benchmark2'
try:
    module_path, class_name = class_str.rsplit('.', 1)
    module = import_module(module_path)
    model = getattr(module, class_name)(X_train, y_train, X_valid, y_valid, train_scores, params)
    benchmark2.train()
    print(benchmark2.evaluate(X_test))
except (ImportError, AttributeError) as e:
    raise ImportError(class_str)

In [None]:
name = 'benchmark3'
with open('../confs/models.yaml', 'r') as file:
    configurations = yaml.safe_load(file)
factory = ModelFactory(configurations, X_train, y_train, X_valid, y_valid, train_scores)
model = factory.get_model(name)
model.train()
print(model.evaluate(X_test))