In [1]:
import yfinance as yf
from datetime import date, timedelta
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from sklearn.metrics import  root_mean_squared_error
from sklearn.feature_extraction import  DictVectorizer
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import numpy as np
import mlflow

# Test dates
amazon_stock = yf.download('AMZN', start='2015-01-01', end='2024-01-01')

[*********************100%***********************]  1 of 1 completed


In [4]:
# Definir X e y
X = amazon_stock.drop(columns=["Close", "Adj Close"])
y = amazon_stock["Close"]


# Dividir en entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("Tamaño del conjunto de entrenamiento:", X_train.shape, y_train.shape)
print("Tamaño del conjunto de prueba:", X_val.shape, y_val.shape)

Tamaño del conjunto de entrenamiento: (1811, 4) (1811,)
Tamaño del conjunto de prueba: (453, 4) (453,)


In [9]:
X

Unnamed: 0_level_0,Open,High,Low,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-02,15.629000,15.737500,15.348000,55664000
2015-01-05,15.350500,15.419000,15.042500,55484000
2015-01-06,15.112000,15.150000,14.619000,70380000
2015-01-07,14.875000,15.064000,14.766500,52806000
2015-01-08,15.016000,15.157000,14.805500,61768000
...,...,...,...,...
2023-12-22,153.770004,154.350006,152.710007,29480100
2023-12-26,153.559998,153.979996,153.029999,25067200
2023-12-27,153.559998,154.779999,153.119995,31434700
2023-12-28,153.720001,154.080002,152.949997,27057000


In [5]:
import dagshub
import mlflow
dagshub.init(url="https://dagshub.com/PacoTinoco/Proyecto_Final_CDD", mlflow=True)

MLFLOW_TRACKING_URI = mlflow.get_tracking_uri()

print(MLFLOW_TRACKING_URI)

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name="amazon-stock-model-randomforest")

https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow


2024/11/15 19:01:10 INFO mlflow.tracking.fluent: Experiment with name 'amazon-stock-model-randomforest' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/15cd90d7ce864197825b6004970573b6', creation_time=1731718870358, experiment_id='6', last_update_time=1731718870358, lifecycle_stage='active', name='amazon-stock-model-randomforest', tags={}>

In [6]:
mlflow.sklearn.autolog()

def objective_rf(params):
    with mlflow.start_run(nested=True):
        # Set model tag
        mlflow.set_tag("model_family", "random_forest")

        # Log parameters
        mlflow.log_params(params)

        # Train RandomForest model
        rf_model = RandomForestRegressor(
            n_estimators=int(params['n_estimators']),
            max_depth=int(params['max_depth']),
            min_samples_split=int(params['min_samples_split']),
            min_samples_leaf=int(params['min_samples_leaf']),
            random_state=42
        )
        rf_model.fit(X_train, y_train)

        # Predict on validation dataset
        y_pred = rf_model.predict(X_val)

        # Calculate RMSE
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))

        # Log RMSE metric
        mlflow.log_metric("rmse", rmse)

        return {'loss': rmse, 'status': STATUS_OK}


# Define search space for RandomForest
search_space_rf = {
    'n_estimators': hp.quniform('n_estimators', 50, 100, 1),
    'max_depth': hp.quniform('max_depth', 5, 15, 1),
    'min_samples_split': hp.quniform('min_samples_split', 2, 5, 1),
    'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 2, 1),
}

# Run hyperparameter optimization
with mlflow.start_run(run_name="Parent Random Forest", nested=True):
    best_params_rf = fmin(
        fn=objective_rf,
        space=search_space_rf,
        algo=tpe.suggest,
        max_evals=10,
        trials=Trials()
    )

    # Log best parameters
    mlflow.log_params(best_params_rf)



  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]




2024/11/15 19:03:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run sophisticated-ox-659 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/27c48a8cb50d4ef5afa3d7799677505e.

2024/11/15 19:03:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 10%|█         | 1/10 [00:53<08:01, 53.45s/trial, best loss: 0.9375700202551965]




2024/11/15 19:03:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run masked-mare-347 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/b77c7762d40645f0a9cd9b67320b764d.

2024/11/15 19:03:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 20%|██        | 2/10 [01:15<04:42, 35.25s/trial, best loss: 0.9357617105529868]




2024/11/15 19:03:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run mysterious-crab-582 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/7d97e85237ba4becb02225b09716ccdc.

2024/11/15 19:03:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 30%|███       | 3/10 [01:40<03:32, 30.31s/trial, best loss: 0.9357617105529868]




2024/11/15 19:04:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run victorious-cat-867 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/b994356029494f72b29091cf207c3945.

2024/11/15 19:04:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 40%|████      | 4/10 [02:14<03:11, 31.94s/trial, best loss: 0.9349319129546464]




2024/11/15 19:04:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run glamorous-worm-409 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/ee11457d021d4193bd0339c6f98fcd05.

2024/11/15 19:04:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 50%|█████     | 5/10 [02:39<02:26, 29.30s/trial, best loss: 0.9349319129546464]




2024/11/15 19:05:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run stately-crab-963 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/635b210e133e4c8b93f150c3bdaf7d13.

2024/11/15 19:05:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 60%|██████    | 6/10 [02:51<01:33, 23.32s/trial, best loss: 0.9349319129546464]




2024/11/15 19:05:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run overjoyed-hog-391 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/e17de28176fc4159a72dd4a56b51b151.

2024/11/15 19:05:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 70%|███████   | 7/10 [03:08<01:03, 21.25s/trial, best loss: 0.9349319129546464]




2024/11/15 19:05:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run upbeat-duck-856 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/bd0d74fb64ee41ce8301ac8274de0e6c.

2024/11/15 19:05:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 80%|████████  | 8/10 [03:25<00:40, 20.01s/trial, best loss: 0.9349319129546464]




2024/11/15 19:05:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run luxuriant-worm-113 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/4a6b915c91b44de6b14569cbf6a575a2.

2024/11/15 19:05:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



 90%|█████████ | 9/10 [03:42<00:19, 19.10s/trial, best loss: 0.9349319129546464]




2024/11/15 19:06:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run luxuriant-rat-935 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/c1c38f5c72884a8e896b6f0f9d59691d.

2024/11/15 19:06:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.



100%|██████████| 10/10 [03:59<00:00, 23.92s/trial, best loss: 0.9349319129546464]


2024/11/15 19:06:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run Parent Random Forest at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6/runs/40adb0f534074fc2beba66cfcff84083.
2024/11/15 19:06:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/6.


In [10]:
run_id = input("Ingrese el run_id")
run_uri = f"runs:/{run_id}/model"

result = mlflow.register_model(
    model_uri=run_uri,
    name="amazon-stock-model-randomforest"
)

Successfully registered model 'amazon-stock-model-randomforest'.
2024/11/15 19:37:30 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: amazon-stock-model-randomforest, version 1
Created version '1' of model 'amazon-stock-model-randomforest'.


In [11]:
from datetime import datetime
from mlflow import MlflowClient

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
client.update_registered_model(
    name="amazon-stock-model-randomforest",
    description="Model registry for the amazon stock randomforest prediction",
)

new_alias = "champion"
date = datetime.today()
model_version = "1"

# create "champion" alias for version 1 of model "nyc-taxi-model"
client.set_registered_model_alias(
    name="amazon-stock-model-randomforest",
    alias=new_alias,
    version=model_version
)

client.update_model_version(
    name="amazon-stock-model-randomforest",
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_alias} on {date}",
)

<ModelVersion: aliases=['champion'], creation_timestamp=1731721050287, current_stage='None', description='The model version 1 was transitioned to champion on 2024-11-15 19:38:30.704945', last_updated_timestamp=1731721110641, name='amazon-stock-model-randomforest', run_id='b994356029494f72b29091cf207c3945', run_link='', source='mlflow-artifacts:/15cd90d7ce864197825b6004970573b6/b994356029494f72b29091cf207c3945/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [12]:
best_params_rf

{'max_depth': 14.0,
 'min_samples_leaf': 1.0,
 'min_samples_split': 3.0,
 'n_estimators': 94.0}

In [13]:
import mlflow.pyfunc

model_name = "amazon-stock-model-randomforest"
alias = "champion"

model_uri = f"models:/{model_name}@{alias}"

champion_version = mlflow.pyfunc.load_model(
    model_uri=model_uri
)

champion_version.predict(X_val)

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

array([168.99291617, 102.8225732 ,  35.2315668 ,  38.43199172,
        18.81398255, 151.98644914,  80.29483741, 169.29012876,
        21.45449975, 102.47998987, 114.18053518,  26.98258436,
        41.91802015,  58.96802357, 157.03280609, 170.80020567,
       172.56694894, 106.77702636,  35.93351239, 107.14165401,
       161.4168907 , 151.38629254, 164.5985691 , 166.5001041 ,
        81.44238154, 176.39847769,  86.2199086 ,  35.85562574,
        90.7958783 ,  86.92307909,  21.39735076, 115.29210883,
       168.24846553,  34.29536475, 159.21121108, 153.44790001,
        70.38899676, 119.60514246,  42.63869646,  75.48991032,
       128.91945171, 165.55298124,  38.27371556,  59.59608865,
       162.9466886 , 159.97474783, 173.15589374,  48.25253461,
        98.06577891,  39.34642414,  21.49810735, 178.40361973,
       182.56106923,  39.20470775, 169.41359193, 157.09510449,
        84.82006672,  47.02689281, 133.27326233, 148.57046288,
        86.89870848,  23.220959  ,  94.87993146, 101.39