## First trial running

In [7]:
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import joblib

MLFLOW_TRACKING_URI = "http://127.0.0.1:5000"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment("RandomForestRegressor_Experiment")

<Experiment: artifact_location='file:///C:/Users/USER/Downloads/mlops-project-2024/mlflow-azure/mlflow_artifacts/1', creation_time=1721722769894, experiment_id='1', last_update_time=1721722769894, lifecycle_stage='active', name='RandomForestRegressor_Experiment', tags={}>

In [22]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

In [27]:
# Jupyter notebooks typically don't use command line argument parsing
alpha = 0.5  # Set alpha value directly

with mlflow.start_run() as run:
    # Load data
    data = pd.read_csv("https://stmlwol3bxs001.blob.core.windows.net/azureml-blobstore-e2c46a71-de7e-4b08-adde-4cbb6825a7be/UI/2024-07-20_105220_UTC/gold2024_07_19.csv")
    features = data.drop(columns=["Date", "Adj Close"])
    target = data["Adj Close"]
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate model
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mlflow.log_metric("mse", mse)

    # Log model manually to track it in the model registry
    mlflow.sklearn.log_model(model, "random_forest_model", registered_model_name="RandomForestRegressorModel")

    # Save model
    joblib.dump(model, "random_forest_model.pkl")

# End the MLflow run explicitly (optional, as it's handled by the context manager)
mlflow.end_run()

print("Run ID:", run.info.run_id)

Registered model 'RandomForestRegressorModel' already exists. Creating a new version of this model...
2024/07/23 20:57:35 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestRegressorModel, version 6
Created version '6' of model 'RandomForestRegressorModel'.


Run ID: 126a28056db841c4ad56a3b256d01ca0


## Fix the Warning Code Above

In [3]:
# Enable automatic logging
mlflow.autolog()

# Jupyter notebooks typically don't use command line argument parsing
alpha = 0.5  # Set alpha value directly

with mlflow.start_run() as run:
    # Load data
    data = pd.read_csv("https://stmlwol3bxs001.blob.core.windows.net/azureml-blobstore-e2c46a71-de7e-4b08-adde-4cbb6825a7be/UI/2024-07-20_105220_UTC/gold2024_07_19.csv")
    
    # Handle missing values and convert integer columns to float
    features = data.drop(columns=["Date", "Adj Close"])
    features = features.astype(float)  # Convert all columns to float
    features = features.fillna(features.mean())  # Fill missing values with column mean
    
    target = data["Adj Close"]
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate model
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mlflow.log_metric("mse", mse)

    # Log model manually to track it in the model registry
    mlflow.sklearn.log_model(model, "random_forest_model", registered_model_name="RandomForestRegressorModel")

    # Save model
    joblib.dump(model, "random_forest_model.pkl")

# End the MLflow run explicitly (optional, as it's handled by the context manager)
mlflow.end_run()

print("Run ID:", run.info.run_id)

2024/07/23 15:39:10 INFO mlflow.tracking.fluent: Experiment with name 'gold-price-experiment' does not exist. Creating a new experiment.
2024/07/23 15:39:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
Registered model 'RandomForestRegressorModel' already exists. Creating a new version of this model...
2024/07/23 15:39:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestRegressorModel, version 2


Run ID: 061575c256434209914c7bcef92d5df6


Created version '2' of model 'RandomForestRegressorModel'.


## add mlflow.sklearn.autolog()

In [6]:
# Enable automatic logging
mlflow.sklearn.autolog()

# Jupyter notebooks typically don't use command line argument parsing
alpha = 0.5  # Set alpha value directly

with mlflow.start_run() as run:
    # Load data
    data = pd.read_csv("https://stmlwol3bxs001.blob.core.windows.net/azureml-blobstore-e2c46a71-de7e-4b08-adde-4cbb6825a7be/UI/2024-07-20_105220_UTC/gold2024_07_19.csv")

    # Convert the 'Date' column to datetime
    data['Date'] = pd.to_datetime(data['Date'])

    # Handle missing values and convert integer columns to float
    features = data.drop(columns=["Date", "Adj Close"])
    features = features.astype(float)  # Convert all columns to float
    features = features.fillna(features.mean())  # Fill missing values with column mean
    
    target = data["Adj Close"]
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate model
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mlflow.log_metric("mse", mse)

    # Log model manually to track it in the model registry
    mlflow.sklearn.log_model(model, "random_forest_model", registered_model_name="RandomForestRegressorModel")

    # Save model
    joblib.dump(model, "random_forest_model.pkl")

# End the MLflow run explicitly (optional, as it's handled by the context manager)
mlflow.end_run()

print("Run ID:", run.info.run_id)

Registered model 'RandomForestRegressorModel' already exists. Creating a new version of this model...
2024/07/23 15:58:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestRegressorModel, version 5


Run ID: 92741b75db7644048d9bcaec86ccf9e1


Created version '5' of model 'RandomForestRegressorModel'.
