In [52]:
# Set repo
import dagshub
import mlflow
dagshub.init(url="https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car", mlflow=True)

MLFLOW_TRACKING_URI = mlflow.get_tracking_uri()

print(MLFLOW_TRACKING_URI)

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name="Rent a Car")

https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow


<Experiment: artifact_location='mlflow-artifacts:/54cbc10138a74c07a6f0eed4054b5221', creation_time=1727331177951, experiment_id='0', last_update_time=1727331177951, lifecycle_stage='active', name='Rent a Car', tags={}>

In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('/home/pepechuy/Downloads/School/Lab5/PCD_Rent_a_Car/data/processed.csv')
y = df['ratedaily']
X = df.drop('ratedaily', axis=1)

#Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Train-val
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42) 

In [54]:
import os
data_dir = '../data'

X_train.to_csv(os.path.join(data_dir, 'X_train.csv'), index=False)
X_val.to_csv(os.path.join(data_dir, 'X_val.csv'), index=False)
y_train.to_csv(os.path.join(data_dir, 'y_train.csv'), index=False)
y_val.to_csv(os.path.join(data_dir, 'y_val.csv'), index=False)

In [55]:
# Convert Pandas DataFrame to NumPy arrays
X_train_np = X_train.to_numpy()
X_val_np = X_val.to_numpy()
y_train_np = y_train.to_numpy()
y_val_np = y_val.to_numpy()

# Use mlflow.data.from_numpy with NumPy arrays
training_dataset = mlflow.data.from_numpy(X_train_np, targets=y_train_np, name="Rent-a-Car-Train")
validation_dataset = mlflow.data.from_numpy(X_val_np, targets=y_val_np, name="Rent-a-Car-Test")

In [56]:
from dagshub import get_repo_bucket_client

# Get a boto3.client object
s3 = get_repo_bucket_client("Pepe-Chuy/PCD_Rent_a_Car")

# Upload file
s3.upload_file(
    Bucket="PCD_Rent_a_Car",  # name of the repo
    Filename="../data/X_train.csv",  # local path of file to upload
    Key="X_train_data.csv",  # remote path where to upload the file
)

s3.upload_file(
    Bucket="PCD_Rent_a_Car",  # name of the repo
    Filename="../data/y_train.csv",  # local path of file to upload
    Key="y_train_data.csv",  # remote path where to upload the file
)

s3.upload_file(
    Bucket="PCD_Rent_a_Car",  # name of the repo
    Filename="../data/X_val.csv",  # local path of file to upload
    Key="X_eval_data.csv",  # remote path where to upload the file
)

# Upload file
s3.upload_file(
    Bucket="PCD_Rent_a_Car",  # name of the repo
    Filename="../data/y_val.csv",  # local path of file to upload
    Key="y_eval_data.csv",  # remote path where to upload the file
)

In [57]:
from sklearn.ensemble import RandomForestRegressor as rfr, GradientBoostingRegressor as gbr
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

In [58]:
# Random Forest
def objective(params):
    with mlflow.start_run(nested=True):

        #model family
        mlflow.set_tag("model_family", "RandomForest")
        # Log parameters
        mlflow.log_params(params)
        
        # Train model
        rf_model = rfr(
            n_estimators=int(params['n_estimators']),
            max_depth=int(params['max_depth']),
            min_samples_split=int(params['min_samples_split']),
            min_samples_leaf=int(params['min_samples_leaf']),
            random_state=309
        )
        rf_model.fit(X_train, y_train)
        
        # Cross-validation
        cv_scores = cross_val_score(rf_model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
        rmse = np.sqrt(-cv_scores.mean())  # Convert negative MSE to RMSE

        # Predict 
        y_pred = rf_model.predict(X_val)
        
        # R-squared
        r_squared = rf_model.score(X_val, y_val)  # Calculate R-squared directly from the model
        
        # Log metrics 
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("R_squared", r_squared)
 
    return {'loss': rmse, 'status': STATUS_OK}



In [59]:
mlflow.sklearn.autolog()

#Params for RandomForest
search_space = {
    'n_estimators': hp.quniform('n_estimators', 10, 20, 1),
    'max_depth': hp.quniform('max_depth', 5, 10, 1),
    'min_samples_split': hp.quniform('min_samples_split', 2, 10, 1),
    'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 10, 1),
}


In [61]:
# hyperparameter optimization
with mlflow.start_run(run_name="Father Random Forest Regressor", nested=True):
    best_params = fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=10,
        trials=Trials()
    )
    
    # Log best model
    mlflow.log_params(best_params)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]





2024/11/07 16:16:57 INFO mlflow.tracking._tracking_service.client: 🏃 View run invincible-smelt-623 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/3db72dd8f0794fbc9c21f7475dd894c8.

2024/11/07 16:16:57 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 10%|█         | 1/10 [00:08<01:19,  8.82s/trial, best loss: 80.37839880917036]





2024/11/07 16:17:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run useful-panda-842 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/097f02bd8ce347db8db2275e4fc39646.

2024/11/07 16:17:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 20%|██        | 2/10 [00:26<01:53, 14.17s/trial, best loss: 79.14222443407908]





2024/11/07 16:17:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run loud-bee-283 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/6d0dcf2c66594d55a958ee18b518344b.

2024/11/07 16:17:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 30%|███       | 3/10 [00:44<01:51, 15.92s/trial, best loss: 76.72499704973973]





2024/11/07 16:17:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run enthused-eel-84 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/286896af01d94301b22e0a44d50bc8f0.

2024/11/07 16:17:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 40%|████      | 4/10 [01:03<01:41, 17.00s/trial, best loss: 76.72499704973973]





2024/11/07 16:18:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run honorable-turtle-9 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/5b0ea5ddf6c94f188f914b5c1fa900cb.

2024/11/07 16:18:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 50%|█████     | 5/10 [01:21<01:26, 17.23s/trial, best loss: 76.72499704973973]





2024/11/07 16:18:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run whimsical-toad-822 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/5ae1c9a97a0a42ec98a5cc423ca77373.

2024/11/07 16:18:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 60%|██████    | 6/10 [01:38<01:09, 17.46s/trial, best loss: 76.72499704973973]





2024/11/07 16:18:45 INFO mlflow.tracking._tracking_service.client: 🏃 View run bold-sow-149 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/aac7721eeabe483e9360a73a1ecd31c0.

2024/11/07 16:18:45 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 70%|███████   | 7/10 [01:56<00:52, 17.58s/trial, best loss: 75.55796085359684]





2024/11/07 16:19:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run useful-snail-10 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/3ee25ce432114d31be508c4df79c5137.

2024/11/07 16:19:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 80%|████████  | 8/10 [02:15<00:35, 17.91s/trial, best loss: 75.55796085359684]





2024/11/07 16:19:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run big-carp-238 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/48a079af993c428180ba8442ba3eaf21.

2024/11/07 16:19:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



 90%|█████████ | 9/10 [02:33<00:17, 17.95s/trial, best loss: 75.55796085359684]





2024/11/07 16:19:42 INFO mlflow.tracking._tracking_service.client: 🏃 View run enchanting-hound-236 at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/18bd23e746d9443fb9d5cbe5d18eb84a.

2024/11/07 16:19:42 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.



100%|██████████| 10/10 [02:54<00:00, 17.44s/trial, best loss: 75.55796085359684]


2024/11/07 16:19:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run Father Random Forest Regressor at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0/runs/63a7d7e1278a4cba8fb45242835f003e.
2024/11/07 16:19:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Pepe-Chuy/PCD_Rent_a_Car.mlflow/#/experiments/0.


In [62]:
best_params

{'max_depth': 10.0,
 'min_samples_leaf': 7.0,
 'min_samples_split': 5.0,
 'n_estimators': 16.0}

In [63]:
run_id = "fa7792b4494c4baab57fe253f8c321ca"
run_uri = f"runs:/{run_id}/model"
 
result = mlflow.register_model(
    model_uri=run_uri,
    name="not-dot-dagshub-model"
)

Successfully registered model 'not-dot-dagshub-model'.
2024/11/07 16:20:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: not-dot-dagshub-model, version 1
Created version '1' of model 'not-dot-dagshub-model'.
