In [1]:
import os
os.chdir("../")
os.getcwd()

'c:\\Users\\hp\\Documents\\ML_Intern'

In [2]:
from Concrete_CS.constant import *
from Concrete_CS.utils import read_yaml, create_directories

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen= True)
class ModelTrainerConfig:
    root_dir: Path
    training_features_path: Path
    training_target_path: Path
    testing_features_path: Path
    testing_target_path: Path
    saved_file: Path
    n_estimators: list
    max_features: list
    max_depth: list
    min_samples_split: list
    min_samples_leaf: list
    scoring: str
    n_iter: int
    cv: int
    verbose: int
    random_state: int
   

In [5]:
class ConfigurationManager:
    def __init__(self,
                config_filepath= CONFIG_FILE_PATH,
                params_filepath= PARAMS_FILE_PATH):
                self.config= read_yaml(config_filepath)
                self.params= read_yaml(params_filepath)
                create_directories([self.config.artifacts_root])
    def get_model_trainer_config(self)-> ModelTrainerConfig:
        config= self.config.model_trainer
        create_directories([config.root_dir])

        model_trainer_config= ModelTrainerConfig(
                              root_dir= Path(config.root_dir),
                              training_features_path= Path(config.training_features_path),
                              training_target_path= Path(config.training_target_path),
                              testing_features_path= Path(config.testing_features_path),
                              testing_target_path= Path(config.testing_target_path),
                              saved_file= Path(config.saved_file),
                              n_estimators= list(self.params.randomized_search.params.n_estimators),
                              max_features= list(self.params.randomized_search.params.max_features),
                              max_depth= list(self.params.randomized_search.params.max_depth),
                              min_samples_split= list(self.params.randomized_search.params.min_samples_split),
                              min_samples_leaf= list(self.params.randomized_search.params.min_samples_leaf),
                              scoring= str(self.params.randomized_search.scoring),
                              n_iter= int(self.params.randomized_search.n_iter),
                              cv= int(self.params.randomized_search.cv),
                              verbose= int(self.params.randomized_search.verbose),
                              random_state= int(self.params.randomized_search.random_state),
                              )
        return model_trainer_config


In [16]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import RandomizedSearchCV


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config= config
        self.testing_features= pd.read_csv(self.config.testing_features_path).iloc[:,1:]
        self.real_values= pd.read_csv(self.config.testing_target_path).iloc[:,1]
    def load_training_data(self):
        features_set= pd.read_csv(self.config.training_features_path).iloc[:,1:]
        target_set= pd.read_csv(self.config.training_target_path).iloc[:,1]
        features_set["concrete_compressive_strength_MPa"]=target_set
        return features_set
    def by_linear_regression(self):
        linear_reg= LinearRegression()
        linear_reg.fit(self.load_training_data().iloc[:,:-1], self.load_training_data().iloc[:,-1])
        predicted_values= pd.DataFrame(linear_reg.predict(self.testing_features))
        squared_error= mean_squared_error(predicted_values, self.real_values)
        error= np.sqrt(squared_error)
        return linear_reg
    def by_random_forest(self):
        rf= RandomForestRegressor()
        n_estimators= self.config.n_estimators
        max_features= self.config.max_features
        max_depth= self.config.max_depth
        min_samples_split= self.config.min_samples_split
        min_samples_leaf= self.config.min_samples_leaf
        random_grid= {
                      "n_estimators": n_estimators,
                      "max_features": max_features,
                      "max_depth": max_depth,
                      "min_samples_split": min_samples_split,
                      "min_samples_leaf": min_samples_leaf
                     }
        scoring= self.config.scoring
        n_iter= self.config.n_iter
        cv= self.config.cv
        verbose= self.config.verbose
        random_state= self.config.random_state
        RF_Random= RandomizedSearchCV(
                                      estimator= rf,
                                      param_distributions= random_grid,
                                      scoring= scoring,
                                      n_iter= n_iter,
                                      cv= cv,
                                      verbose= verbose,
                                      random_state= random_state
                                     )
        RF_Random.fit(self.load_training_data().iloc[:,:-1], self.load_training_data().iloc[:,-1])
        predicted_values= pd.DataFrame(RF_Random.predict(self.testing_features))
        squared_error= mean_squared_error(predicted_values, self.real_values)
        error= np.sqrt(squared_error)
        return error
    def save_file(self):
        random_forest= self.by_random_forest()
        file= open(self.config.saved_file, "wb")
        pickle.dump(random_forest, file)
   


In [17]:
config= ConfigurationManager()
model_trainer_config= config.get_model_trainer_config()
model_trainer= ModelTrainer(config= model_trainer_config)
model_trainer.by_random_forest()

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=35, max_features=sqrt, min_samples_leaf=30, min_samples_split=15, n_estimators=500; total time=   1.5s
[CV] END max_depth=35, max_features=sqrt, min_samples_leaf=30, min_samples_split=15, n_estimators=500; total time=   1.4s
[CV] END max_depth=35, max_features=sqrt, min_samples_leaf=30, min_samples_split=15, n_estimators=500; total time=   1.3s
[CV] END max_depth=35, max_features=sqrt, min_samples_leaf=30, min_samples_split=15, n_estimators=500; total time=   1.5s
[CV] END max_depth=35, max_features=sqrt, min_samples_leaf=30, min_samples_split=15, n_estimators=500; total time=   1.4s
[CV] END max_depth=60, max_features=sqrt, min_samples_leaf=50, min_samples_split=10, n_estimators=400; total time=   0.9s
[CV] END max_depth=60, max_features=sqrt, min_samples_leaf=50, min_samples_split=10, n_estimators=400; total time=   0.9s
[CV] END max_depth=60, max_features=sqrt, min_samples_leaf=50, min_samples_split=10, 

  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=30, min_samples_split=10, n_estimators=700; total time=   2.6s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=30, min_samples_split=10, n_estimators=700; total time=   2.5s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=30, min_samples_split=10, n_estimators=700; total time=   2.8s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=30, min_samples_split=10, n_estimators=700; total time=   2.5s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=30, min_samples_split=10, n_estimators=700; total time=   2.7s


  warn(


[CV] END max_depth=20, max_features=auto, min_samples_leaf=50, min_samples_split=50, n_estimators=400; total time=   1.3s


  warn(


[CV] END max_depth=20, max_features=auto, min_samples_leaf=50, min_samples_split=50, n_estimators=400; total time=   1.4s


  warn(


[CV] END max_depth=20, max_features=auto, min_samples_leaf=50, min_samples_split=50, n_estimators=400; total time=   1.6s


  warn(


[CV] END max_depth=20, max_features=auto, min_samples_leaf=50, min_samples_split=50, n_estimators=400; total time=   1.5s


  warn(


[CV] END max_depth=20, max_features=auto, min_samples_leaf=50, min_samples_split=50, n_estimators=400; total time=   1.2s
[CV] END max_depth=120, max_features=sqrt, min_samples_leaf=30, min_samples_split=20, n_estimators=500; total time=   1.5s
[CV] END max_depth=120, max_features=sqrt, min_samples_leaf=30, min_samples_split=20, n_estimators=500; total time=   1.6s
[CV] END max_depth=120, max_features=sqrt, min_samples_leaf=30, min_samples_split=20, n_estimators=500; total time=   1.6s
[CV] END max_depth=120, max_features=sqrt, min_samples_leaf=30, min_samples_split=20, n_estimators=500; total time=   1.4s
[CV] END max_depth=120, max_features=sqrt, min_samples_leaf=30, min_samples_split=20, n_estimators=500; total time=   1.9s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=4, min_samples_split=50, n_estimators=200; total time=   1.6s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=4, min_samples_split=50, n_estimators=200; total time=   1.5s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=4, min_samples_split=50, n_estimators=200; total time=   1.4s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=4, min_samples_split=50, n_estimators=200; total time=   1.5s


  warn(


[CV] END max_depth=60, max_features=auto, min_samples_leaf=4, min_samples_split=50, n_estimators=200; total time=   1.5s
[CV] END max_depth=110, max_features=sqrt, min_samples_leaf=9, min_samples_split=15, n_estimators=200; total time=   1.3s
[CV] END max_depth=110, max_features=sqrt, min_samples_leaf=9, min_samples_split=15, n_estimators=200; total time=   1.1s
[CV] END max_depth=110, max_features=sqrt, min_samples_leaf=9, min_samples_split=15, n_estimators=200; total time=   1.3s
[CV] END max_depth=110, max_features=sqrt, min_samples_leaf=9, min_samples_split=15, n_estimators=200; total time=   1.1s
[CV] END max_depth=110, max_features=sqrt, min_samples_leaf=9, min_samples_split=15, n_estimators=200; total time=   1.1s
[CV] END max_depth=110, max_features=sqrt, min_samples_leaf=50, min_samples_split=10, n_estimators=100; total time=   0.4s
[CV] END max_depth=110, max_features=sqrt, min_samples_leaf=50, min_samples_split=10, n_estimators=100; total time=   0.5s
[CV] END max_depth=110,

  warn(


7.838609750042632