In [1]:
import os

In [2]:
pwd


'c:\\Users\\Robin Aluma\\Desktop\\Car_Price_Pred\\Resources'

In [3]:
os.chdir('../')
from dataclasses import dataclass
from pathlib import Path
from src.Car_Price_Pred.constants import *
import yaml
from src.Car_Price_Pred.utils.common import read_yaml,create_directories,save_models,save_parameters

In [4]:
# Model training configuration
@dataclass
class ModelTrainingConfig():
    root_url: Path
    train_data: Path
    params: dict
    fit_intercept: bool
    n_jobs: int
    max_depth: int
    min_samples_split: int
    min_samples_leaf: int
    random_state: int
    n_estimators: int
    alpha: float
    max_iter: int
    tol: float
    n_neighbors: int
    weights: str
    algorithm: str
    p: int
    gamma: str
    kernel: str
    C: float
    epsilon: float

In [5]:
# Creating model training configuration manager

class ModelTrainerConfigurationManager:
    def __init__(self,config_file_path = CONFIG_FILE_PATH,
                 param_file_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(param_file_path)
        
        create_directories([self.config.Artifacts_root])
        
    def get_model_trainer_config(self)->ModelTrainingConfig:
        config = self.config.model_training
        params = self.params.parameters
        
        create_directories([config.root_url])
        
        get_model_trainer_config = ModelTrainingConfig(
            root_url = config.root_url,
            train_data = config.train_data,
            params = params,
            fit_intercept = params.fit_intercept,
            n_jobs = params.n_jobs,
            max_depth = params.max_depth,
            min_samples_split = params.min_samples_split,
            min_samples_leaf = params.min_samples_leaf,
            random_state = params.random_state,
            n_estimators = params.n_estimators,
            alpha = params.alpha,
            max_iter = params.max_iter,
            tol = params.tol,
            n_neighbors = params.n_neighbors,
            weights = params.weights,
            algorithm = params.algorithm,
            p = params.p,
            gamma = params.gamma,
            kernel = params.kernel,
            C = params.C,
            epsilon = params.epsilon
            
        )
        
        return get_model_trainer_config

In [6]:
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import  train_test_split
import pandas as pd
import sys
from Exceptions import CustomException
import joblib 

In [7]:
class ModelTrainer():
    def __init__(self,config: ModelTrainingConfig):
        self.config = config
        
    def get_train_and_test_data(self):
        # Gathering the training and testing data
        train_data_path = self.config.train_data
        
        train = pd.read_csv(train_data_path,header=None)
        train.fillna(train.mean(),inplace=True)
        
        X = train.iloc[:,:-1]
        Y = train.iloc[:,-1]
        
        x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.3,random_state=42)
        
        # Create dictionary of models
        models = {
            'linear_regression': LinearRegression(fit_intercept=self.config.fit_intercept,n_jobs=self.config.n_jobs),
            'decision_tree': DecisionTreeRegressor(max_depth=self.config.max_depth,min_samples_split=self.config.min_samples_split,min_samples_leaf=self.config.min_samples_leaf,random_state=self.config.random_state),
            'random_forest': RandomForestRegressor(n_estimators=self.config.n_estimators,max_depth=self.config.max_depth),
            'ridge': Ridge(alpha=self.config.alpha,fit_intercept=self.config.fit_intercept),
            'lasso': Lasso(alpha=self.config.alpha,fit_intercept=self.config.fit_intercept),
            'neighbors': KNeighborsRegressor(n_neighbors=self.config.n_neighbors,weights=self.config.weights,algorithm=self.config.algorithm,p=self.config.p),
            'svm': SVR(kernel=self.config.kernel,C=self.config.C,gamma=self.config.gamma,epsilon=self.config.epsilon)
        }
        
        best_model = None
        best_score = 0
        for name, model in models.items():
            try:
                model.fit(x_train, y_train)
                predictions = model.predict(x_test)
                params = model.get_params()
                score = r2_score(y_test, predictions)
                if score > best_score:
                    best_model = model
                    best_score = score
                    
            except Exception as e:
                raise CustomException(e,sys)
            
            print(f"Model:{best_model} Score:{best_score}")

In [8]:
try:
    config = ModelTrainerConfigurationManager()
    get_model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=get_model_trainer_config)
    model_trainer.get_train_and_test_data()
except Exception as e:
    raise CustomException(e,sys)

[ 2025-01-22 12:07:07,521 : common : INFO : Creates directory for path;artifacts in file paths :['artifacts'] ]
[ 2025-01-22 12:07:07,523 : common : INFO : Creates directory for path;artifacts/model_training in file paths :['artifacts/model_training'] ]
Model:LinearRegression(n_jobs=1) Score:0.7778757024516244
Model:LinearRegression(n_jobs=1) Score:0.7778757024516244
Model:RandomForestRegressor(max_depth=10) Score:0.8824415558134022
Model:RandomForestRegressor(max_depth=10) Score:0.8824415558134022
Model:RandomForestRegressor(max_depth=10) Score:0.8824415558134022
Model:RandomForestRegressor(max_depth=10) Score:0.8824415558134022
Model:RandomForestRegressor(max_depth=10) Score:0.8824415558134022
