In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from sklearn.metrics import  root_mean_squared_error
from sklearn.feature_extraction import  DictVectorizer
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import numpy as np
import mlflow

In [2]:
google_stock = yf.download('GOOGL', start='2015-01-01', end='2024-01-01')

[*********************100%***********************]  1 of 1 completed


In [12]:
# Definir X e y
X = google_stock.drop(columns=["Close"])
y = google_stock["Close"]


# Dividir en entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("Tamaño del conjunto de entrenamiento:", X_train.shape, y_train.shape)
print("Tamaño del conjunto de prueba:", X_val.shape, y_val.shape)


Tamaño del conjunto de entrenamiento: (1811, 5) (1811,)
Tamaño del conjunto de prueba: (453, 5) (453,)


In [25]:
import dagshub
import mlflow
dagshub.init(url="https://dagshub.com/PacoTinoco/Proyecto_Final_CDD", mlflow=True)

MLFLOW_TRACKING_URI = mlflow.get_tracking_uri()

print(MLFLOW_TRACKING_URI)

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name="google-stock-model-randomforest")

https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow


2024/11/07 15:56:46 INFO mlflow.tracking.fluent: Experiment with name 'google-stock-model-randomforest' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/62536d2f603e4621b047d552ffc80167', creation_time=1731016605756, experiment_id='4', last_update_time=1731016605756, lifecycle_stage='active', name='google-stock-model-randomforest', tags={}>

In [26]:
mlflow.sklearn.autolog()

def objective_rf(params):
    with mlflow.start_run(nested=True):
        # Set model tag
        mlflow.set_tag("model_family", "random_forest")
        
        # Log parameters
        mlflow.log_params(params)
        
        # Train RandomForest model
        rf_model = RandomForestRegressor(
            n_estimators=int(params['n_estimators']),
            max_depth=int(params['max_depth']),
            min_samples_split=int(params['min_samples_split']),
            min_samples_leaf=int(params['min_samples_leaf']),
            random_state=42
        )
        rf_model.fit(X_train, y_train)
        
        # Predict on validation dataset
        y_pred = rf_model.predict(X_val)
        
        # Calculate RMSE
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        
        # Log RMSE metric
        mlflow.log_metric("rmse", rmse)
        
        return {'loss': rmse, 'status': STATUS_OK}

# Define search space for RandomForest
search_space_rf = {
    'n_estimators': hp.quniform('n_estimators', 50, 100, 1),
    'max_depth': hp.quniform('max_depth', 5, 15, 1),
    'min_samples_split': hp.quniform('min_samples_split', 2, 5, 1),
    'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 2, 1),
}


# Run hyperparameter optimization
with mlflow.start_run(run_name="Parent Random Forest", nested=True):
    best_params_rf = fmin(
        fn=objective_rf,
        space=search_space_rf,
        algo=tpe.suggest,
        max_evals=10,
        trials=Trials()
    )
    
    # Log best parameters
    mlflow.log_params(best_params_rf)




  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]




2024/11/07 15:57:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run bustling-frog-744 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/2bcbe321f99041a7a64caa6b2e6625c8.

2024/11/07 15:57:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 10%|█         | 1/10 [00:28<04:17, 28.64s/trial, best loss: 0.12185739266325228]




2024/11/07 15:58:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run able-carp-810 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/3f8554d9ba134baca694492292d2ab43.

2024/11/07 15:58:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 20%|██        | 2/10 [00:57<03:52, 29.03s/trial, best loss: 0.12185739266325228]




2024/11/07 15:58:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run angry-conch-350 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/7471e707a29e449383a2e9cd4ee8a0de.

2024/11/07 15:58:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 30%|███       | 3/10 [01:10<02:30, 21.46s/trial, best loss: 0.12185739266325228]




2024/11/07 15:58:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run blushing-sow-818 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/69021990adff43aaa0a8657a4c652c01.

2024/11/07 15:58:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 40%|████      | 4/10 [01:27<01:58, 19.70s/trial, best loss: 0.12185739266325228]




2024/11/07 15:58:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run selective-quail-528 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/dbcd71aa20904afdabfdc3ea82e09e94.

2024/11/07 15:58:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 50%|█████     | 5/10 [01:44<01:33, 18.77s/trial, best loss: 0.12185739266325228]




2024/11/07 15:59:12 INFO mlflow.tracking._tracking_service.client: 🏃 View run bemused-midge-798 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/0e5a71be4743453cb99f7301eeb226ea.

2024/11/07 15:59:12 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 60%|██████    | 6/10 [02:02<01:14, 18.63s/trial, best loss: 0.12185739266325228]




2024/11/07 15:59:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run likeable-smelt-417 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/73c32174c7054a21992864d7116dab31.

2024/11/07 15:59:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 70%|███████   | 7/10 [02:18<00:52, 17.65s/trial, best loss: 0.12185739266325228]




2024/11/07 15:59:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run invincible-shad-582 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/48d0c28be48243aa9e37488867194a27.

2024/11/07 15:59:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 80%|████████  | 8/10 [02:36<00:35, 17.82s/trial, best loss: 0.12185739266325228]




2024/11/07 16:00:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run powerful-auk-183 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/58deb5dc75fd45c6b7e890fbc68d30b4.

2024/11/07 16:00:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



 90%|█████████ | 9/10 [02:53<00:17, 17.47s/trial, best loss: 0.12185739266325228]




2024/11/07 16:00:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run delicate-grouse-14 at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/217b931773254cc1b62484ef7cf73e15.

2024/11/07 16:00:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.



100%|██████████| 10/10 [03:09<00:00, 18.97s/trial, best loss: 0.12185739266325228]


2024/11/07 16:00:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Parent Random Forest at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4/runs/e871fc362bc84a19a791641d2e24f4e9.
2024/11/07 16:00:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/PacoTinoco/Proyecto_Final_CDD.mlflow/#/experiments/4.


In [27]:
run_id = input("Ingrese el run_id")
run_uri = f"runs:/{run_id}/model"

result = mlflow.register_model(
    model_uri=run_uri,
    name="google-stock-model-randomforest"
)

Successfully registered model 'google-stock-model-randomforest'.
2024/11/07 16:02:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: google-stock-model-randomforest, version 1
Created version '1' of model 'google-stock-model-randomforest'.


In [28]:
from datetime import datetime
from mlflow import MlflowClient

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
client.update_registered_model(
    name="google-stock-model-randomforest",
    description="Model registry for the google stock randomforest prediction",
)

new_alias = "champion"
date = datetime.today()
model_version = "1"

# create "champion" alias for version 1 of model "nyc-taxi-model"
client.set_registered_model_alias(
    name="google-stock-model-randomforest",
    alias=new_alias,
    version=model_version
)

client.update_model_version(
    name="google-stock-model-randomforest",
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_alias} on {date}",
)

<ModelVersion: aliases=['champion'], creation_timestamp=1731016920090, current_stage='None', description='The model version 1 was transitioned to champion on 2024-11-07 16:02:09.404804', last_updated_timestamp=1731016928388, name='google-stock-model-randomforest', run_id='2bcbe321f99041a7a64caa6b2e6625c8', run_link='', source='mlflow-artifacts:/62536d2f603e4621b047d552ffc80167/2bcbe321f99041a7a64caa6b2e6625c8/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [29]:
best_params_rf

{'max_depth': 10.0,
 'min_samples_leaf': 2.0,
 'min_samples_split': 3.0,
 'n_estimators': 90.0}

In [30]:
import mlflow.pyfunc

model_name = "google-stock-model-randomforest"
alias = "champion"

model_uri = f"models:/{model_name}@{alias}"

champion_version = mlflow.pyfunc.load_model(
    model_uri=model_uri
)

champion_version.predict(X_val)

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

array([143.39611824,  72.33691959,  36.65969283,  39.56621352,
        28.44043678, 101.64589281,  55.1978257 , 135.71078295,
        27.35770143, 100.90802738,  98.74155235,  33.0730147 ,
        41.97921658,  53.66298278, 104.21847534, 144.66890164,
        78.43887062, 109.12898887,  37.44733363, 109.87744648,
       114.58751971,  74.57362504, 138.3260697 ,  88.07526337,
        55.81324986, 142.01213406,  59.45868574,  35.34376257,
        60.27217642,  65.96854082,  27.69732073,  99.54098795,
       112.06186148,  38.8278025 , 120.12662645, 135.79049695,
        59.36572084,  67.67225257,  42.50766845,  52.21065427,
       129.28772085, 140.73385527,  40.04446543,  54.57088909,
       118.16404847,  75.54631549, 148.00197613,  46.15179818,
       102.50932031,  41.1640359 ,  27.80818016, 145.49685679,
       134.05984034,  40.53051936, 144.87106793,  90.32964065,
        59.6538808 ,  47.9418361 ,  73.2794782 , 127.56433644,
        52.3915711 ,  29.76232568,  91.13215942,  71.67