In [1]:
import pandas as pd
df = pd.read_csv("azureml://subscriptions/5f7e7551-1c5d-42a7-9cdd-7582134d0f5f/resourcegroups/iiot-book2-resources/workspaces/iiot-book2-ml-workspace1/datastores/workspaceartifactstore/paths/UI/2024-08-14_093544_UTC/wind_turbine.csv") 

df 

Unnamed: 0,wind_speed_ms,power_generated_kw
0,6.07,16.972552
1,7.43,54.418928
2,8.19,78.077916
3,8.19,83.096364
4,8.19,80.569083
...,...,...
52555,7.27,52.851684
52556,7.28,49.878267
52557,7.22,44.911130
52558,7.30,50.840444


In [3]:

import os
import argparse
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from datetime import datetime


registered_model_name="wind_turbine"
n_estimators=100


# Start Logging
with mlflow.start_run():

    # enable autologging
    mlflow.sklearn.autolog()


    mlflow.log_metric("num_samples", df.shape[0])
    mlflow.log_metric("num_features", df.shape[1] - 1)

    #Split train and test datasets
    train_df, test_df = train_test_split(
        df,
        test_size=0.3,
    )
    ####################
    #</prepare the data>
    ####################

    ##################
    #<train the model>
    ##################
    # Extracting the label column
    y_train = train_df.pop("power_generated_kw")

    # convert the dataframe values to array
    X_train = train_df.values

    # Extracting the label column
    y_test = test_df.pop("power_generated_kw")

    # convert the dataframe values to array
    X_test = test_df.values

    print(f"Training with data of shape {X_train.shape}")

    reg = GradientBoostingRegressor(
        n_estimators=n_estimators
    )
    reg.fit(X_train, y_train)

    y_pred = reg.predict(X_test)

    print(r2_score(y_test, y_pred))
    mlflow.log_metric("r2_score_test", r2_score(y_test, y_pred))
    ###################
    #</train the model>
    ###################

    ##########################
    #<save and register model>
    ##########################
    # Registering the model to the workspace
    print("Registering the model via MLFlow")
    mlflow.sklearn.log_model(
        sk_model=reg,
        registered_model_name=registered_model_name,
        artifact_path=registered_model_name,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=reg,
        path=os.path.join(registered_model_name, "trained_model")
    )
    ###########################
    #</save and register model>
    ###########################

    # Stop Logging
    #mlflow.end_run()


Registered model 'wind_turbine' already exists. Creating a new version of this model...
2024/08/15 13:38:40 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: wind_turbine, version 6
Created version '6' of model 'wind_turbine'.


Training with data of shape (36792, 1)
0.9407326876025165
Registering the model via MLFlow


## Stop compute instance
If you're not going to use it now, stop the compute instance:

* In the studio, in the left navigation area, select Compute.
* In the top tabs, select Compute instances
* Select the compute instance in the list.
* On the top toolbar, select Stop.