# Step 1: Setup the Lab Environment
Before running any components, we need to initialize the lab environment with the configuration file generated in the previous step. This is done using `lab_setup`.


In [None]:
# Importing the `lab_setup` function to initialize the lab with the project settings
from plf.lab import lab_setup

# Provide the path to the settings file (the same path returned by `create_project` in the previous notebook)
# This will set up the lab environment for subsequent steps.
lab_setup(settings_path='.....')


# Step 2: Load and Run DataLoader Component
The first step in any machine learning pipeline is to load the dataset. Here, we use the `DataLoaderComponent` to load the diabetes dataset and configure it to shuffle the data, select a sample size, and choose specific features.


In [None]:
# Importing `load_component` to load our components
from plf.utils import load_component, Component

In [None]:
# Loading the DataLoader component with configuration settings:
# - shuffle: Whether to shuffle the data
# - sample_size: Limit the dataset to 16 samples
# - feature_indices: Selecting specific features from the dataset (2, 5, and 7)
ds = load_component(loc="myComps.DataLoaderComponent", args={
    "shuffle": True,  # Whether to shuffle the data
    "sample_size": 16,  # How many samples to take
    "feature_indices": [2, 5, 7]  # Which columns/features to select
})

# Running the DataLoader component to get the data (X and y)
x, y = ds.run()

# Checking the shape of the data (X and y) to ensure everything loaded correctly
x.shape, y.shape


((16, 3), (16,))

# Step 3: Preprocess the Data
Now that we have the dataset, we need to preprocess the features. We use the `PreprocessorComponent` to apply a Min-Max scaler and decide whether to apply a logarithmic transformation to the data.


In [None]:
# Loading the Preprocessor component with configuration settings:
# - scaler_type: Choose the scaler (MinMaxScaler in this case)
# - log_transform: Whether to apply a log transformation to the features
pp = load_component(loc="myComps.PreprocessorComponent", args={
    "scaler_type": 'minmax',  # Choose MinMax scaling
    "log_transform": False  # Do not apply log transformation
})

# Running the preprocessor to scale the features
pp.run(x)


(array([[0.84615385, 0.        , 0.62427746, 0.3674359 , 0.53932584,
         0.31057269, 0.07407407, 1.        , 0.98110009, 0.58139535],
        [1.        , 1.        , 0.38728324, 0.71794872, 0.70786517,
         0.57709251, 0.57407407, 0.30864198, 0.        , 0.30232558],
        [0.82051282, 1.        , 0.43352601, 0.30769231, 0.29213483,
         0.18942731, 0.18518519, 0.61728395, 0.6898154 , 0.55813953],
        [0.53846154, 0.        , 0.46242775, 0.92307692, 0.95505618,
         0.6784141 , 0.61111111, 0.30864198, 0.47331408, 0.34883721],
        [0.43589744, 1.        , 0.48554913, 0.53846154, 1.        ,
         0.67400881, 0.55555556, 0.30864198, 0.66765038, 0.86046512],
        [0.        , 1.        , 0.        , 0.        , 0.        ,
         0.        , 0.22222222, 0.30864198, 0.39470049, 0.09302326],
        [0.30769231, 0.        , 0.15606936, 0.33333333, 0.73033708,
         0.3215859 , 1.        , 0.        , 0.12953661, 0.34883721],
        [0.35897436, 0.    

# Step 4: Train the Model
With the data preprocessed, it's time to train a model. Here, we use the `RegressorComponent` to load and train a Ridge regression model with a specified regularization parameter (`alpha = 1.0`).


In [None]:
# Loading the Regressor component with configuration settings:
# - model_type: Specify the model type (Ridge regression in this case)
# - model_params: Pass the hyperparameters for the model (e.g., alpha)
rg = load_component(loc="myComps.RegressorComponent", args={
    "model_type": "ridge",  # Use Ridge regression
    "model_params": {"alpha": 1.0}  # Regularization strength (alpha)
})

# Running the model training process
mdl = rg.run(x, y)


# Step 5: Evaluate the Model
After training the model, it's important to evaluate its performance. We use the `EvaluatorComponent` to assess the model using multiple evaluation metrics, such as R² and RMSE.


In [None]:
# Loading the Evaluator component with configuration settings:
# - metrics: List of evaluation metrics to use (e.g., R² and RMSE)
ev = load_component(loc="myComps.EvaluatorComponent", args={
    "metrics": ["r2", "rmse"]  # Evaluate using R² and RMSE
})

# Running the evaluation and printing the results
ev.run(mdl, x, y)


{'r2': 0.09413598508691956, 'rmse': np.float64(58.77988641830677)}

np.float64(-0.018061886948495892)

In [8]:
x,y = datasets
x.shape,  y.shape

((442, 10), (442,))

In [9]:
y[0]

np.float64(151.0)

In [10]:
x[0]

array([ 0.03807591,  0.05068012,  0.06169621,  0.02187239, -0.0442235 ,
       -0.03482076, -0.04340085, -0.00259226,  0.01990749, -0.01764613])