In [1]:
import dagshub
import mlflow
dagshub.init(url="https://dagshub.com/PacoTinoco/Proyecto_Final_CDD", mlflow=True)

MLFLOW_TRACKING_URI = mlflow.get_tracking_uri()

print(MLFLOW_TRACKING_URI)

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name="amazon_stock")

https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow


2024/09/28 13:30:40 INFO mlflow.tracking.fluent: Experiment with name 'amazon_stock' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/33bcf767e68f4559aeed57be97a38cdb', creation_time=1727551840815, experiment_id='1', last_update_time=1727551840815, lifecycle_stage='active', name='amazon_stock', tags={}>

In [3]:
import yfinance as yf
from datetime import date, timedelta

# Test dates
amazon_stock = yf.download('AMZN', start='2020-01-01', end='2024-01-01')

[*********************100%***********************]  1 of 1 completed


In [4]:
# Definir X e y
X = amazon_stock.drop(columns=["Close"])
y = amazon_stock["Close"]
from sklearn.model_selection import train_test_split

# Dividir en entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("Tamaño del conjunto de entrenamiento:", X_train.shape, y_train.shape)
print("Tamaño del conjunto de prueba:", X_val.shape, y_val.shape)


Tamaño del conjunto de entrenamiento: (804, 5) (804,)
Tamaño del conjunto de prueba: (202, 5) (202,)


In [5]:
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
import pathlib
from sklearn.metrics import  root_mean_squared_error
import numpy as np

train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [6]:
def objective(params):
    with mlflow.start_run(nested=True):
         
        # Tag model
        mlflow.set_tag("model_family", "xgboost")
        
        # Log parameters
        mlflow.log_params(params)
        
        # Train model
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=100,
            evals=[(valid, 'validation')],
            early_stopping_rounds=10
        )
        
        # Log xgboost model with artifact_path
        mlflow.xgboost.log_model(booster, artifact_path="model")
         
        # Predict in the val dataset
        y_pred = booster.predict(valid)
        
        # Calculate metric
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        
        # Log performance metric
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [10]:
from sklearn.metrics import mean_squared_error
mlflow.xgboost.autolog()

with mlflow.start_run(run_name="Xgboost Hyper-parameter Optimization", nested=True):
    search_space = {
        'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
        'learning_rate': hp.loguniform('learning_rate', -3, 0),
        'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
        'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
        'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
        'objective': 'reg:squarederror',
        'seed': 42
    }
    
    best_params = fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=10,
        trials=Trials()
    )
    best_params["max_depth"] = int(best_params["max_depth"])
    best_params["seed"] = 42
    best_params["objective"] = "reg:squarederror"
    
    mlflow.log_params(best_params)

    # Log tags
    mlflow.set_tags(
        tags={
            "project": "Amazon Stock validation",
            "optimizer_engine": "hyper-opt",
            "model_family": "xgboost",
            "feature_set_version": 1,
        }
    )

    # Log a fit model instance
    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=100,
        evals=[(valid, 'validation')],
        early_stopping_rounds=10
    )
        
    y_pred = booster.predict(valid)
    
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    mlflow.log_metric("rmse", rmse)
    
    pathlib.Path("models").mkdir(exist_ok=True)

        
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

[0]	validation-rmse:4.20321                           
[1]	validation-rmse:0.83767                           
[2]	validation-rmse:0.54887                           
[3]	validation-rmse:0.53392                           
[4]	validation-rmse:0.53303                           
[5]	validation-rmse:0.53328                           
[6]	validation-rmse:0.53301                           
[7]	validation-rmse:0.53292                           
[8]	validation-rmse:0.53286                           
[9]	validation-rmse:0.53284                           
[10]	validation-rmse:0.53284                          
[11]	validation-rmse:0.53283                          
[12]	validation-rmse:0.53283                          
[13]	validation-rmse:0.53283                          
[14]	validation-rmse:0.53283                          
[15]	validation-rmse:0.53283                          
[16]	validation-rmse:0.53283                          
[17]	validation-rmse:0.53283                          
[18]	valid






2024/09/28 13:38:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run gregarious-midge-710 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/6d8c7309b15d4199978b364e7adca504.

2024/09/28 13:38:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:22.37928                                                    
[1]	validation-rmse:17.88734                                                    
[2]	validation-rmse:14.30360                                                    
[3]	validation-rmse:11.41416                                                    
[4]	validation-rmse:9.12917                                                     
[5]	validation-rmse:7.30344                                                     
[6]	validation-rmse:5.85480                                                     
[7]	validation-rmse:4.69313                                                     
[8]	validation-rmse:3.77025                                                     
[9]	validation-rmse:3.02882                                                     
[10]	validation-rmse:2.43988                                                    
[11]	validation-rmse:1.97774                                                    
[12]	validation-rmse:1.60846






2024/09/28 13:39:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run awesome-squid-220 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/e90178711b954fd4b557acb97e5d77db.

2024/09/28 13:39:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:25.75606                                                     
[1]	validation-rmse:23.68811                                                     
[2]	validation-rmse:21.78909                                                     
[3]	validation-rmse:20.02704                                                     
[4]	validation-rmse:18.41532                                                     
[5]	validation-rmse:16.93112                                                     
[6]	validation-rmse:15.56239                                                     
[7]	validation-rmse:14.31443                                                     
[8]	validation-rmse:13.15714                                                     
[9]	validation-rmse:12.10342                                                     
[10]	validation-rmse:11.13494                                                    
[11]	validation-rmse:10.24011                                                    
[12]	validation-






2024/09/28 13:39:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run inquisitive-mule-100 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/0c22676abbab48ebb2dd46169c289ed4.

2024/09/28 13:39:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:10.88265                                                     
[1]	validation-rmse:4.36702                                                     
[2]	validation-rmse:1.85709                                                     
[3]	validation-rmse:0.93342                                                     
[4]	validation-rmse:0.61941                                                     
[5]	validation-rmse:0.54069                                                     
[6]	validation-rmse:0.51327                                                     
[7]	validation-rmse:0.50144                                                     
[8]	validation-rmse:0.50702                                                     
[9]	validation-rmse:0.50027                                                     
[10]	validation-rmse:0.50449                                                    
[11]	validation-rmse:0.50006                                                    
[12]	validation-rmse:0.4961






2024/09/28 13:39:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run zealous-flea-818 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/9685fdd65de44fd6b94994ae0add98ab.

2024/09/28 13:39:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:26.18030                                                    
[1]	validation-rmse:24.47787                                                    
[2]	validation-rmse:22.88756                                                    
[3]	validation-rmse:21.40128                                                    
[4]	validation-rmse:20.01053                                                    
[5]	validation-rmse:18.70915                                                    
[6]	validation-rmse:17.49372                                                    
[7]	validation-rmse:16.35604                                                    
[8]	validation-rmse:15.29299                                                    
[9]	validation-rmse:14.29506                                                    
[10]	validation-rmse:13.36218                                                   
[11]	validation-rmse:12.49530                                                   
[12]	validation-rmse:11.6805






2024/09/28 13:40:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run crawling-gnu-646 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/93fe9eb86d9e4d3d8a182b50efed8df1.

2024/09/28 13:40:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:26.56988                                                    
[1]	validation-rmse:25.20948                                                    
[2]	validation-rmse:23.92038                                                    
[3]	validation-rmse:22.69691                                                    
[4]	validation-rmse:21.53639                                                    
[5]	validation-rmse:20.43660                                                    
[6]	validation-rmse:19.39024                                                    
[7]	validation-rmse:18.40152                                                    
[8]	validation-rmse:17.46341                                                    
[9]	validation-rmse:16.56774                                                    
[10]	validation-rmse:15.72062                                                   
[11]	validation-rmse:14.91499                                                   
[12]	validation-rmse:14.1550






2024/09/28 13:40:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run stylish-toad-235 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/31a9d40f140c4462a5df471efc9b2997.

2024/09/28 13:40:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:7.69063                                                     
[1]	validation-rmse:2.34488                                                     
[2]	validation-rmse:0.92049                                                     
[3]	validation-rmse:0.55835                                                     
[4]	validation-rmse:0.52406                                                     
[5]	validation-rmse:0.51158                                                     
[6]	validation-rmse:0.50770                                                     
[7]	validation-rmse:0.50359                                                     
[8]	validation-rmse:0.50275                                                     
[9]	validation-rmse:0.50339                                                     
[10]	validation-rmse:0.50350                                                    
[11]	validation-rmse:0.50376                                                    
[12]	validation-rmse:0.50374






2024/09/28 13:40:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run smiling-trout-158 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/7a0548c7818c4c3aa998e40244faa7a3.

2024/09/28 13:40:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:23.92305                                                    
[1]	validation-rmse:20.42677                                                    
[2]	validation-rmse:17.45177                                                    
[3]	validation-rmse:14.89441                                                    
[4]	validation-rmse:12.71109                                                    
[5]	validation-rmse:10.85261                                                    
[6]	validation-rmse:9.27475                                                     
[7]	validation-rmse:7.92365                                                     
[8]	validation-rmse:6.77473                                                     
[9]	validation-rmse:5.79975                                                     
[10]	validation-rmse:4.96300                                                    
[11]	validation-rmse:4.24898                                                    
[12]	validation-rmse:3.63971






2024/09/28 13:40:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run crawling-auk-894 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/d59478d0677741c4887281b438c618f9.

2024/09/28 13:40:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:6.49803                                                     
[1]	validation-rmse:1.69141                                                    
[2]	validation-rmse:0.82676                                                    
[3]	validation-rmse:0.69782                                                    
[4]	validation-rmse:0.69471                                                    
[5]	validation-rmse:0.69197                                                    
[6]	validation-rmse:0.68097                                                    
[7]	validation-rmse:0.68855                                                    
[8]	validation-rmse:0.68387                                                    
[9]	validation-rmse:0.68321                                                    
[10]	validation-rmse:0.68041                                                   
[11]	validation-rmse:0.67059                                                   
[12]	validation-rmse:0.66867           






2024/09/28 13:41:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run unequaled-slug-367 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/9b5873eed17c4eccadacbda2bdf6a2b5.

2024/09/28 13:41:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



[0]	validation-rmse:25.72439                                                   
[1]	validation-rmse:23.61485                                                   
[2]	validation-rmse:21.67509                                                   
[3]	validation-rmse:19.91134                                                   
[4]	validation-rmse:18.27634                                                   
[5]	validation-rmse:16.77893                                                   
[6]	validation-rmse:15.40790                                                   
[7]	validation-rmse:14.15744                                                   
[8]	validation-rmse:13.00836                                                   
[9]	validation-rmse:11.94720                                                   
[10]	validation-rmse:10.98143                                                  
[11]	validation-rmse:10.09097                                                  
[12]	validation-rmse:9.26534            






2024/09/28 13:41:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run gifted-lamb-643 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/d20c823a1d414f6f87924dc07995b04a.

2024/09/28 13:41:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.



100%|██████████| 10/10 [02:43<00:00, 16.33s/trial, best loss: 0.326065303593491]
[0]	validation-rmse:23.92305
[1]	validation-rmse:20.42677
[2]	validation-rmse:17.45177
[3]	validation-rmse:14.89441
[4]	validation-rmse:12.71109
[5]	validation-rmse:10.85261
[6]	validation-rmse:9.27475
[7]	validation-rmse:7.92365
[8]	validation-rmse:6.77473
[9]	validation-rmse:5.79975
[10]	validation-rmse:4.96300
[11]	validation-rmse:4.24898
[12]	validation-rmse:3.63971
[13]	validation-rmse:3.12044
[14]	validation-rmse:2.68167
[15]	validation-rmse:2.30256
[16]	validation-rmse:1.98484
[17]	validation-rmse:1.71233
[18]	validation-rmse:1.48218
[19]	validation-rmse:1.28465
[20]	validation-rmse:1.11699
[21]	validation-rmse:0.97587
[22]	validation-rmse:0.85717
[23]	validation-rmse:0.75714
[24]	validation-rmse:0.67356
[25]	validation-rmse:0.60526
[26]	validation-rmse:0.55066
[27]	validation-rmse:0.50651
[28]	validation-rmse:0.47052
[29]	validation-rmse:0.44276
[30]	validation-rmse:0.41974
[31]	validation-rmse:0.4

2024/09/28 13:41:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run Xgboost Hyper-parameter Optimization at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1/runs/e300c53b5aeb490fbec70bf090efab20.
2024/09/28 13:41:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/1.


FileNotFoundError: [Errno 2] No such file or directory: 'models/preprocessor.b'

In [11]:
best_params

{'learning_rate': np.float64(0.14683580367536145),
 'max_depth': 15,
 'min_child_weight': np.float64(2.343141759325446),
 'reg_alpha': np.float64(0.015671497007957273),
 'reg_lambda': np.float64(0.02298136816323423),
 'seed': 42,
 'objective': 'reg:squarederror'}