XGBoost


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# XGBoost
N_ESTIMATORS = 600
LEARNING_RATE = 0.1
MAX_DEPTH = 10
MIN_CHILD_WEIGHT = 1
SUBSAMPLE = 0.9
COLSAMPLE_BYTREE = 0.9
N_JOBS = -1
RUN_NAME = f'xgb_{N_ESTIMATORS}_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--n-estimators', str(N_ESTIMATORS),
    '--learning-rate', str(LEARNING_RATE),
    '--max-depth', str(MAX_DEPTH),
    '--min-child-weight', str(MIN_CHILD_WEIGHT),
    '--subsample', str(SUBSAMPLE),
    '--colsample-bytree', str(COLSAMPLE_BYTREE),
    '--n-jobs', str(N_JOBS),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:
import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


XGBoost log1p


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# XGBoost (log1p)
N_ESTIMATORS = 600
LEARNING_RATE = 0.1
MAX_DEPTH = 10
MIN_CHILD_WEIGHT = 1
SUBSAMPLE = 0.9
COLSAMPLE_BYTREE = 0.9
N_JOBS = -1
RUN_NAME = f'xgb_log_{N_ESTIMATORS}_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_log.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--n-estimators', str(N_ESTIMATORS),
    '--learning-rate', str(LEARNING_RATE),
    '--max-depth', str(MAX_DEPTH),
    '--min-child-weight', str(MIN_CHILD_WEIGHT),
    '--subsample', str(SUBSAMPLE),
    '--colsample-bytree', str(COLSAMPLE_BYTREE),
    '--n-jobs', str(N_JOBS),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:
import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


XGBoost Grid


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Grid XGBoost (top por MAPE)
GRID_SIZE = 200
TOP_K = 3
RUN_NAME = f'xgbgrid_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_xgb_grid.py',
    '--data-path', str(DATA_PATH),
    '--run-prefix', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--grid-size', str(GRID_SIZE),
    '--top-k', str(TOP_K),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:

import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


RandomForest


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# RandomForest
N_ESTIMATORS = 400
MAX_DEPTH = None
MIN_SAMPLES_LEAF = 1
MAX_FEATURES = 'auto'
N_JOBS = -1
RUN_NAME = f'rf_{N_ESTIMATORS}_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_rf.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--n-estimators', str(N_ESTIMATORS),
    '--max-depth', '' if MAX_DEPTH is None else str(MAX_DEPTH),
    '--min-samples-leaf', str(MIN_SAMPLES_LEAF),
    '--max-features', str(MAX_FEATURES),
    '--n-jobs', str(N_JOBS),
]
cmd = [c for c in cmd if c != '']
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:
import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


CatBoost


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# CatBoost
ITERATIONS = 800
LEARNING_RATE_CB = 0.08
DEPTH = 8
SUBSAMPLE_CB = 0.9
L2_LEAF_REG = 3.0
RUN_NAME = f'cb_{ITERATIONS}_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_catboost.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--iterations', str(ITERATIONS),
    '--learning-rate', str(LEARNING_RATE_CB),
    '--depth', str(DEPTH),
    '--subsample', str(SUBSAMPLE_CB),
    '--l2-leaf-reg', str(L2_LEAF_REG),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:

import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


LightGBM


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# LightGBM
N_ESTIMATORS = 800
LEARNING_RATE_LGBM = 0.07
MAX_DEPTH = -1
NUM_LEAVES = 64
SUBSAMPLE = 0.9
COLSAMPLE_BYTREE = 0.9
MIN_CHILD_SAMPLES = 20
RUN_NAME = f'lgbm_{N_ESTIMATORS}_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_lgbm.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--n-estimators', str(N_ESTIMATORS),
    '--learning-rate', str(LEARNING_RATE_LGBM),
    '--max-depth', str(MAX_DEPTH),
    '--num-leaves', str(NUM_LEAVES),
    '--subsample', str(SUBSAMPLE),
    '--colsample-bytree', str(COLSAMPLE_BYTREE),
    '--min-child-samples', str(MIN_CHILD_SAMPLES),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:

import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


ElasticNet


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# ElasticNet
ALPHA = 0.1
L1_RATIO = 0.5
MAX_ITER_EN = 5000
RUN_NAME = f'en_a{ALPHA}_l{L1_RATIO}_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_elastic.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--alpha', str(ALPHA),
    '--l1-ratio', str(L1_RATIO),
    '--max-iter', str(MAX_ITER_EN),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:

import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


SVM


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# SVM (Linear/RBF/Poly/Sigmoid)
C = 1.0
EPSILON = 0.1
MAX_ITER_SVM = 5000
KERNEL = 'linear'
GAMMA = 'scale'
DEGREE = 3
COEF0 = 0.0
RUN_NAME = f'svm_{KERNEL}_c{C}_eps{EPSILON}_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_svm.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--c', str(C),
    '--epsilon', str(EPSILON),
    '--max-iter', str(MAX_ITER_SVM),
    '--kernel', str(KERNEL),
    '--gamma', str(GAMMA),
    '--degree', str(DEGREE),
    '--coef0', str(COEF0),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:

import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)


Keras


In [None]:
from pathlib import Path
from datetime import datetime
import subprocess, sys, shlex

DATA_PATH = Path('data/theorical_data.csv')
HOLDOUT_FRAC = 0.20
TEST_SIZE = 0.2
SEED = 42
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# Keras MLP
EPOCHS = 50
BATCH_SIZE = 256
HIDDEN_UNITS = [256,128,64]
DROPOUT = 0.1
LR_KERAS = 1e-3
RUN_NAME = f'keras_{len(HIDDEN_UNITS)}layers_seed{SEED}_{stamp}'


In [None]:
cmd = [
    sys.executable, 'train_model_keras.py',
    '--data-path', str(DATA_PATH),
    '--run-name', RUN_NAME,
    '--holdout-frac', str(HOLDOUT_FRAC),
    '--test-size', str(TEST_SIZE),
    '--seed', str(SEED),
    '--epochs', str(EPOCHS),
    '--batch-size', str(BATCH_SIZE),
    '--dropout', str(DROPOUT),
    '--learning-rate', str(LR_KERAS),
    '--hidden-units', *map(str, HIDDEN_UNITS),
]
print('Comando:', ' '.join(shlex.quote(c) for c in cmd))
res = subprocess.run(cmd, capture_output=True, text=True)
print(res.stdout)
if res.returncode != 0:
    print(res.stderr)
    raise SystemExit(res.returncode)


In [None]:

import json
meta_path = Path('models') / RUN_NAME / 'metadata.json'
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding='utf-8'))
    display(meta)
else:
    print('No se encontr? metadata en', meta_path)
