In [1]:
import os
os.getcwd()
os.chdir("../")
os.getcwd()

'c:\\Users\\Marina\\Desktop\\ML Operations\\0 - KrishNaik Course\\18 - Getting Started With Your First End To End Data Science Project With\\my_project'

## Creating the Dataclass

**Todos os elementos do config.yaml devem estar nesta classe**

In [2]:
from dataclasses import dataclass
from pathlib import Path

# from yaml
# model_trainer:
  # root_dir: artifacts/model_trainer
  # X_train_data_path: artifacts/data_transformation/X_train.csv
  # X_test_data_path: artifacts/data_transformation/X_test.csv
  # y_train_data_path: artifacts/data_transformation/y}_train.csv
  # y_test_data_path: artifacts/data_transformation/y_test.csv
  # model_name: model.joblib

@dataclass
class ModelTrainerConfig():
  # config.yaml
  root_dir: Path
  X_train_data_path: Path
  X_test_data_path: Path
  y_train_data_path: Path
  y_test_data_path: Path
  model_name: str

  # params.yaml
  alpha: float
  l1_ratio: float
  
  # schema.yaml
  target_column: str 



## Update the Config Manager

In [9]:
from src.datascience.constants import CONFIG_FILEPATH, PARAMS_FILEPATH, SCHEMA_FILEPATH
from src.datascience.utils.commons import read_yaml, create_directories

class ConfigurationManager():
    def __init__(self, # recebe os parâmetros que seram usados para instanciar as propriedades p/leitura
                 config_filepath = CONFIG_FILEPATH,
                 params_filepath = PARAMS_FILEPATH,
                 schema_filepath = SCHEMA_FILEPATH,):
        
        # instância as propriedades com base nas leituras dos parâmetros passados acima
        self.config = read_yaml(config_filepath) # lê as configurações
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root]) # cria o /artifacts

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        
        config = self.config.model_trainer # change the "section" of the config file
        params = self.params.ElasticNet
        schema = self.schema.TARGET_COLUMNS


        create_directories([config.root_dir]) # cria o /artifacts/model_trainer

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            X_train_data_path= config.X_train_data_path,
            X_test_data_path= config.X_test_data_path,
            y_train_data_path= config.y_train_data_path,
            y_test_data_path= config.y_test_data_path,
            model_name = config.model_name,

            alpha= params.alpha,
            l1_ratio= params.l1_ratio,
            
            target_column= schema.name,

            )
        
        return model_trainer_config

## Update the Components

In [10]:
import urllib.request as request
import pandas as pd
from src.datascience.utils import logger
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.base import RegressorMixin
from sklearn.linear_model import ElasticNet
import joblib

class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        # config should be an instance of DataTransformationConfig(the output of the previous cell)
        self.config = config

        # model_trainer_config = ModelTrainerConfig(
        #     root_dir= config.root_dir,
        #     X_train_data_path= config.X_train_data_path,
        #     X_test_data_path= config.X_test_data_path,
        #     y_train_data_path= config.y_train_data_path,
        #     y_test_data_path= config.y_test_data_path,
        #     model_name = config.model_name,

        #     alpha= params.alpha,
        #     l1_ratio= params.l1_ratio,
            
        #     target_column= schema.target_column,

    def load_training_data(self) -> tuple[np.ndarray, np.ndarray]:
        """Loads and converts training and test data to NumPy arrays."""
        
        # Load the data:
        X_train = pd.read_csv(self.config.X_train_data_path)
        y_train = pd.read_csv(self.config.y_train_data_path)

        # Convert to np.arrays
        X_train = X_train.values
        y_train = y_train.values

        return X_train, y_train        

    def train_model(self, X_train, y_train) -> RegressorMixin:
        """Trains the model"""
        try:
            # Train the model
            hyperparams = {
                "alpha":self.config.alpha,
                "l1_ratio":self.config.l1_ratio
            }
            elastic_net = ElasticNet(random_state= 42, **hyperparams)
            elastic_net.fit(X_train, y_train)
            logger.info(f"Sucesfully trained the model")
            return elastic_net

        except Exception as e:
            logger.exception(f"Exception during model_training \n Exception:{e}")
            raise e
    
    def save_model(self, model: RegressorMixin):
        try:
            # Ensure the save directory exists
            os.makedirs(self.config.root_dir, exist_ok=True)

            # Define the complete file path
            file_path = os.path.join(self.config.root_dir, self.config.model_name)

            # Save the model
            joblib.dump(model, file_path)
            logger.info(f"Sucesfully Saved the model to: {self.config.root_dir}")

        except Exception as e:
            logger.exception(f"Exception during model_saving \n Exception:{e}")
            raise e

## Testing

In [11]:
from src.datascience.utils import logger

try:
    # Get the variables from needed for the process
    model_trainer_config = ConfigurationManager().get_model_trainer_config()
    
    # Instantiate the object for performing the Model Training:
    model_trainer_object = ModelTrainer(model_trainer_config)
    X_train, y_train = model_trainer_object.load_training_data()
    trained_model = model_trainer_object.train_model(X_train, y_train)
    model_trainer_object.save_model(trained_model)
    
except Exception as e:
    logger.error(f"Error during model_training. Error {e}")
    raise(e)

[2024-11-04 16:27:07,835: INFO: commons: yaml file: config\config.yaml loaded successfully ]
[2024-11-04 16:27:07,839: INFO: commons: yaml file: params.yaml loaded successfully ]
[2024-11-04 16:27:07,845: INFO: commons: yaml file: schema.yaml loaded successfully ]
[2024-11-04 16:27:07,848: INFO: commons: created directory at: artifacts ]
[2024-11-04 16:27:07,851: INFO: commons: created directory at: artifacts/model_trainer ]
[2024-11-04 16:27:07,882: INFO: 1581951645: Sucesfully trained the model ]
[2024-11-04 16:27:07,889: INFO: 1581951645: Sucesfully Saved the model to: artifacts/model_trainer ]


## Agora iremos converter para código modular

Começamos do passo 4 em diante(do workflow em notes.ipynb)

O código inteiro da seção `Creating the Dataclass` vai para `src/entity/config_entity.py`

O código inteiro da seção `Update the Config Manager` vai para `src/config/configuration.py`

O código inteiro da seção `Update the Components` vai para `src/components/nome_da_step.py`

O código inteiro da seção `Testing` vai para `src/pipeline/nome_da_step_pipeline.py`, com implementação OOP, ver exemplo 

O código anterior, vai para `main.py`, podendo chamar diretamente a classe, só ver o exemplo