In [1]:
import os
%pwd

'd:\\ML-Projects\\04-Mobile-Price-Predictor\\research'

In [2]:
os.chdir("../")
%pwd

'd:\\ML-Projects\\04-Mobile-Price-Predictor'

In [3]:
import warnings
warnings.filterwarnings("ignore")


In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path : Path
    trained_model_path : Path
    target_column : str
    C : int
    gamma : str
    kernel : str

In [5]:
from Mobile_Price_Predictor.constants import *
from Mobile_Price_Predictor.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments
        
        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path= config.train_data_path,
            target_column  = config.target_column,
            trained_model_path  = config.trained_model_path,
            C = params.C,
            gamma = params.gamma,
            kernel = params.kernel
        )

        return model_trainer_config

In [7]:
import os
from Mobile_Price_Predictor.utils.common import read_yaml, create_directories
from Mobile_Price_Predictor.logging import logger
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
import os
import pandas as pd

In [8]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

        
    def train(self):
            # Load data (assumes data is in CSV format with features and a target column)
            data = pd.read_csv(self.config.train_data_path)
            X = data.drop(columns=[self.config.target_column])
            y = data[self.config.target_column]

            # Split data into train and validation sets
            X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

            # Standardize features
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_val = scaler.transform(X_val)

            # Define the model with hyperparameters
            model_svc = SVC(C=self.config.C, gamma=self.config.gamma, kernel=self.config.kernel)


            # Train the model
            model_svc.fit(X_train, y_train)

            # Evaluate the model
            y_pred = model_svc.predict(X_val)
            accuracy = accuracy_score(y_val, y_pred)
            print(f"Validation Accuracy: {accuracy:.4f}")

            # Save the model and the scaler
            model_path = os.path.join(self.config.trained_model_path, "svc_model.pkl")
            joblib.dump(model_svc, model_path)
            logger.info("Model Trained and Saved Successfully")

In [9]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-05-22 19:15:56,545: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-22 19:15:56,547: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-22 19:15:56,549: INFO: common: created directory at: artifacts]
[2024-05-22 19:15:56,550: INFO: common: created directory at: artifacts/model_trainer]
Validation Accuracy: 0.9725
[2024-05-22 19:15:56,678: INFO: 2018096513: Model Trained and Saved Successfully]
