In [6]:
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from sklearn.metrics import  root_mean_squared_error
from sklearn.feature_extraction import  DictVectorizer
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import numpy as np
import mlflow
import dagshub

In [1]:
dagshub.init(url="https://dagshub.com/PacoTinoco/Proyecto_Final_CDD", mlflow=True)

MLFLOW_TRACKING_URI = mlflow.get_tracking_uri()

print(MLFLOW_TRACKING_URI)

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name="amazon_stock2")

https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow


2024/11/25 17:07:40 INFO mlflow.tracking.fluent: Experiment with name 'amazon_stock2' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/3c86a387cd234fdc8649c36f629d406f', creation_time=1732576060273, experiment_id='8', last_update_time=1732576060273, lifecycle_stage='active', name='amazon_stock2', tags={}>

In [2]:
import yfinance as yf
from datetime import date, timedelta

# Test dates
amazon_stock = yf.download('AMZN', start='2015-01-01', end='2024-01-01')

[*********************100%***********************]  1 of 1 completed


In [11]:
# Definir X e y
X = amazon_stock.drop(columns=["Close", "Adj Close"])
y = amazon_stock["Close"]
from sklearn.model_selection import train_test_split

# Dividir en entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("Tamaño del conjunto de entrenamiento:", X_train.shape, y_train.shape)
print("Tamaño del conjunto de prueba:", X_val.shape, y_val.shape)

Tamaño del conjunto de entrenamiento: (1811, 4) (1811,)
Tamaño del conjunto de prueba: (453, 4) (453,)


In [10]:
X

Unnamed: 0_level_0,Open,High,Low,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-02,15.629000,15.737500,15.348000,55664000
2015-01-05,15.350500,15.419000,15.042500,55484000
2015-01-06,15.112000,15.150000,14.619000,70380000
2015-01-07,14.875000,15.064000,14.766500,52806000
2015-01-08,15.016000,15.157000,14.805500,61768000
...,...,...,...,...
2023-12-22,153.770004,154.350006,152.710007,29480100
2023-12-26,153.559998,153.979996,153.029999,25067200
2023-12-27,153.559998,154.779999,153.119995,31434700
2023-12-28,153.720001,154.080002,152.949997,27057000


In [7]:
mlflow.sklearn.autolog()

def objective_rf(params):
    with mlflow.start_run(nested=True):
        # Set model tag
        mlflow.set_tag("model_family", "random_forest")
        
        # Log parameters
        mlflow.log_params(params)
        
        # Train RandomForest model
        rf_model = RandomForestRegressor(
            n_estimators=int(params['n_estimators']),
            max_depth=int(params['max_depth']),
            min_samples_split=int(params['min_samples_split']),
            min_samples_leaf=int(params['min_samples_leaf']),
            random_state=42
        )
        rf_model.fit(X_train, y_train)
        
        # Predict on validation dataset
        y_pred = rf_model.predict(X_val)
        
        # Calculate RMSE
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        
        # Log RMSE metric
        mlflow.log_metric("rmse", rmse)
        
        return {'loss': rmse, 'status': STATUS_OK}

# Define search space for RandomForest
search_space_rf = {
    'n_estimators': hp.quniform('n_estimators', 50, 100, 1),
    'max_depth': hp.quniform('max_depth', 5, 15, 1),
    'min_samples_split': hp.quniform('min_samples_split', 2, 5, 1),
    'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 2, 1),
}


# Run hyperparameter optimization
with mlflow.start_run(run_name="Parent Random Forest", nested=True):
    best_params_rf = fmin(
        fn=objective_rf,
        space=search_space_rf,
        algo=tpe.suggest,
        max_evals=10,
        trials=Trials()
    )
    
    # Log best parameters
    mlflow.log_params(best_params_rf)



  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]




2024/11/25 17:11:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run receptive-quail-716 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/a1bb41df77d14335989422e684a59d23.

2024/11/25 17:11:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 10%|█         | 1/10 [00:18<02:50, 18.98s/trial, best loss: 0.9409940690289171]




2024/11/25 17:11:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run persistent-cow-247 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/3467d30db3324b62ba33cdfcc62f19b9.

2024/11/25 17:11:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 20%|██        | 2/10 [00:30<01:57, 14.68s/trial, best loss: 0.9409013859128348]




2024/11/25 17:11:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run wistful-conch-35 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/86feed0c5a184ef9a715a5cd94cf044e.

2024/11/25 17:11:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 30%|███       | 3/10 [00:47<01:50, 15.81s/trial, best loss: 0.9409013859128348]




2024/11/25 17:12:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run dashing-bee-682 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/d3da48de445e49328771f9435c93a35e.

2024/11/25 17:12:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 40%|████      | 4/10 [01:04<01:37, 16.25s/trial, best loss: 0.9365316989846586]




2024/11/25 17:12:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run judicious-stag-539 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/4a267d3da3de4aef806e5e650fb64b11.

2024/11/25 17:12:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 50%|█████     | 5/10 [01:21<01:22, 16.52s/trial, best loss: 0.9365316989846586]




2024/11/25 17:12:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run placid-stag-281 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/f1567fe9b9e948e3925f8e8064b9361d.

2024/11/25 17:12:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 60%|██████    | 6/10 [01:38<01:06, 16.66s/trial, best loss: 0.9365316989846586]




2024/11/25 17:12:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run whimsical-wolf-200 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/3bec777d2382447a99f8b1f86f7e0eea.

2024/11/25 17:12:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 70%|███████   | 7/10 [02:01<00:55, 18.56s/trial, best loss: 0.9365316989846586]




2024/11/25 17:13:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run dashing-pig-157 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/2c2d4a4676f04a47b67081019445c033.

2024/11/25 17:13:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 80%|████████  | 8/10 [02:12<00:32, 16.41s/trial, best loss: 0.9365316989846586]




2024/11/25 17:13:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run merciful-ray-913 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/482801ba154947c19befc48fea3c46e9.

2024/11/25 17:13:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



 90%|█████████ | 9/10 [02:29<00:16, 16.50s/trial, best loss: 0.9340958598447686]




2024/11/25 17:13:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run unique-robin-848 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/4911e46449e0417daf28a3a0ed469f3e.

2024/11/25 17:13:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.



100%|██████████| 10/10 [02:46<00:00, 16.66s/trial, best loss: 0.9340958598447686]


2024/11/25 17:13:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run Parent Random Forest at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8/runs/ae7edf1d55364e3b8be39f09587ece13.
2024/11/25 17:13:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/8.
