In [None]:
%load_ext autoreload
%autoreload 2
import sys, warnings, time, numpy, yaml, pandas
sys.path.append("../src/") # go to parent dir
from data_access import get_X, get_y, get_train_test
from models.factory import ModelFactory
warnings.filterwarnings('ignore')

In [None]:
train_data = get_X('train')
train_scores = get_y()
test_data = get_X('test')
X_train, y_train, X_test, y_test, target = get_train_test(train_size=0.8, random_state=42)

In [None]:
def eval_model(model, save_model=False):
    start = time.time()
    model.fit()
    score = model.evaluate(X_test)
    end = time.time()
    print(f'{model.name}={score} in {numpy.round((end-start), 2)}s')
    if save_model:
        model.save(test_data)
    return {'name': model.name, 'score': score, 'time': numpy.round((end-start), 2)}

In [None]:
%load_ext autoreload
%autoreload 2
def eval_model_for_name(name):
    with open('../confs/models.yaml', 'r') as file:
        configurations = yaml.safe_load(file)
    factory = ModelFactory(configurations, X_train, y_train, train_scores)
    model = factory.get_model(name)
    return eval_model(model)

In [None]:
names = ['dummy', 'random_forest']
names= ['xgb_gblinear', 'catboost', 'random_forest', 'gradient_boosting', 'ada_boost', 'extra_trees', 'hist_gradient_boosting']
for name in names:
    eval_model_for_name(name)

In [None]:
%load_ext autoreload
%autoreload 2
from models.stacking_model import StackingModel
names= ['catboost', 'random_forest', 'gradient_boosting', 'ada_boost', 'extra_trees', 'hist_gradient_boosting']
# names= ['catboost', 'random_forest']
# names= ['random_forest']
with open('../confs/models.yaml', 'r') as file:
    configurations = yaml.safe_load(file)
factory = ModelFactory(configurations, X_train, y_train, train_scores)
params = {'estimators': {name:factory.get_model(name) for name in names}}

model = StackingModel(X_train, y_train, train_scores, params)
line = eval_model(model)
model.name = 'stacking_classifier'
model.save(test_data)

In [None]:
score = model.evaluate(X_test)
print(f'{model.name}={score}')

In [None]:
%load_ext autoreload
%autoreload 2
save_model = True
with open('../confs/models.yaml', 'r') as file:
    configurations = yaml.safe_load(file)
factory = ModelFactory(configurations, X_train, y_train, train_scores)
lines = []
for model in factory.get_models():
    model.fit()
    score = model.evaluate(X_test)
    lines.append(eval_model(model, save_model))
df = pandas.DataFrame(lines)
df = df.sort_values(by=['score'], ascending=False)

In [None]:
df=df.sort_values(by=['score'], ascending=False)
print(df.sort_values(by=['score'], ascending=False))
df.to_csv('../data/result.csv')