In [1]:
import os

In [2]:
%pwd

'd:\\Projects\\Concrete_Prediction\\Concrete_Compressive_Strength_Prediction\\notebooks'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\Projects\\Concrete_Prediction\\Concrete_Compressive_Strength_Prediction'

In [5]:
from sklearn.metrics import r2_score

In [6]:
def evaluate_model(X_train, y_train, X_test, y_test, models):
    try:
        report = {}
        for model_name, model in models.items():
            
            # Train model on the full training set (no need to fit it twice)
            model.fit(X_train, y_train)

            # Predict Testing data
            y_test_pred = model.predict(X_test)

            # Predict Training data
            y_train_pred = model.predict(X_train)

            # Get R2 scores for train and test data
            train_model_score = r2_score(y_train, y_train_pred)
            test_model_score = r2_score(y_test, y_test_pred)

            report[model_name] = {'train_score': train_model_score, 'test_score': test_model_score}

        return report

    except Exception as e:
        logging.info('Exception occurred during model training')

        raise customexception(e, sys)

In [11]:
import os
import sys
import pickle
import numpy as np
import pandas as pd
from src.concrete_strength_prediction.exception import customexception
from src.concrete_strength_prediction.logger import logging
from dataclasses import dataclass
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor


In [9]:
@dataclass 
class ModelTrainerConfig:
    trained_model_file_path = os.path.join('artifacts','model.pkl')

In [13]:
class ModelTrainer:
    def __init__(self):
        self.model_trainer_config = ModelTrainerConfig()
    
    def initate_model_training(self,train_array,test_array):
        try:
            logging.info('Splitting Dependent and Independent variables from train and test data')
            X_train, y_train, X_test, y_test = (
                train_array[:,:-1],
                train_array[:,-1],
                test_array[:,:-1],
                test_array[:,-1]
            )

            # Extracting the first 5 rows for logging
            logging.info(f'X_train (first 5 rows):\n{pd.DataFrame(train_array[:,:-1]).head()}')
            logging.info(f'y_train (first 5 rows):\n{pd.DataFrame(train_array[:,-1]).head()}')
            logging.info(f'X_test (first 5 rows):\n{pd.DataFrame(test_array[:,:-1]).head()}')
            logging.info(f'y_test (first 5 rows):\n{pd.DataFrame(test_array[:,-1]).head()}')
            

            models={
                'LinearRegression':LinearRegression(),
                'DecisionTreeRegressor':DecisionTreeRegressor(),
                'Ridge': Ridge(alpha=1.0),
                'Lasso': Lasso(alpha=1.0),
                'RandomForestRegressor':RandomForestRegressor(),
                'KNN': KNeighborsRegressor(),
                'GradientBoosting': GradientBoostingRegressor(),
                'SVR': SVR(kernel='linear'),
                'AdaBoosting': AdaBoostRegressor()}
            
            logging.info('Evaluating models...')
            model_report:dict=evaluate_model(X_train,y_train,X_test,y_test,models)
            print(model_report)
            print('\n====================================================================================\n')
            logging.info(f'Model Report : {model_report}')

            # To get best model score from dictionary 
            best_model_score = max(model_report.values(), key=lambda x: x['test_score'])


            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            
            best_model = models[best_model_name]

            print(f'Best Model Found , Model Name : {best_model_name} , Accuracy Score : {best_model_score}')
            print('\n====================================================================================\n')
            logging.info(f'Best Model Found , Model Name : {best_model_name} , Accuracy Score : {best_model_score}')

            save_object(
                 file_path=self.model_trainer_config.trained_model_file_path,
                 obj=best_model
            )
          

        except Exception as e:
            logging.info('Exception occured at Model Training')
            raise customexception(e,sys)

In [14]:
def save_object(file_path, obj):
    try:
        dir_path = os.path.dirname(file_path)

        os.makedirs(dir_path, exist_ok=True)

        with open(file_path, "wb") as file_obj:
            pickle.dump(obj, file_obj)

    except Exception as e:
        raise customexception(e, sys)

In [21]:
from src.concrete_strength_prediction.pipelines.training_pipeline import train_data_path,test_data_path,train_arr,test_arr


[2024-02-02 22:25:20,234: INFO: training_pipeline: Training Pipeline has started]
[2024-02-02 22:25:20,239: INFO: data_ingestion: Data ingestion has started]
[2024-02-02 22:25:20,668: INFO: data_ingestion: shape of the dataset : (1030, 9)]
[2024-02-02 22:25:20,751: INFO: data_ingestion: Duplicates rows had been removed]
[2024-02-02 22:25:20,754: INFO: data_ingestion: shape of the dataset : (1005, 9)]
[2024-02-02 22:25:20,874: INFO: data_ingestion: files saved to artifacts]
[2024-02-02 22:25:20,877: INFO: data_transformation: Reading train and test data from CSV files]
[2024-02-02 22:25:20,953: INFO: data_transformation: Read train and test data complete]
[2024-02-02 22:25:20,986: INFO: data_transformation: Train Dataframe Head:
   cement  blast_furnace_slag  fly_ash  water  superplasticizer  coarse_aggregate  fine_aggregate   age  concrete_compressive_strength
0   531.3                 0.0      0.0  141.8              28.2             852.1            893.7    3                        

In [23]:
model_trainer_obj=ModelTrainer()
model_trainer_obj.initate_model_training(train_arr,test_arr)

[2024-02-02 22:26:25,197: INFO: 536573678: Splitting Dependent and Independent variables from train and test data]
[2024-02-02 22:26:25,218: INFO: 536573678: X_train (first 5 rows):
          0         1         2         3         4         5         6  \
0  2.000063 -0.836051 -0.934166  2.506114 -1.157347 -0.554398 -2.418435   
1  0.047662 -0.836051  0.576817 -1.109865  1.078455 -0.012290  1.327617   
2  1.615305 -0.836051 -0.934166  1.969619 -1.157347 -1.765407 -0.746280   
3 -1.040513  2.037186 -0.934166  1.191701 -1.157347  0.009704 -1.106784   
4  0.231162 -0.836051  0.548894 -0.525085  0.528978 -0.255527  1.109452   

          7  
0 -0.899289  
1 -1.043372  
2 -0.142854  
3 -0.899289  
4  2.450637  ]
[2024-02-02 22:26:25,226: INFO: 536573678: y_train (first 5 rows):
       0
0  38.60
1  23.14
2  23.85
3  15.75
4  52.04]
[2024-02-02 22:26:25,245: INFO: 536573678: X_test (first 5 rows):
          0         1         2         3         4         5         6  \
0 -1.179618  0.8256