In [1]:
import os
os.chdir('../')
from src.sentiment_analysis.config.configuration import ConfigurationManager
from src.sentiment_analysis.components.data_cleaning import DataCleaning
from src.sentiment_analysis import logger

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_c: int

@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_c: int

In [3]:
from src.sentiment_analysis.constants import *
from src.sentiment_analysis.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

        

    def get_training_config(self) -> TrainingConfig:
        training = self.config.data_cleaning
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = self.config.data_non_transformer
        create_directories([
            Path(training.root_dir)
        ])
     
        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            params_c=params.C,
            
        )

        return training_config
    
    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        training_data = self.config.data_cleaning.data_non_transformer
        print(config)
        create_directories([config.root_dir])

        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            trained_model_path=config.base_model_path,
            updated_base_model_path=config.updated_base_model_path,
            training_data=training_data,
            params_c=self.params.C,
        )

        return prepare_base_model_config

In [8]:
config = ConfigurationManager()
config.get_prepare_base_model_config()

[2024-02-25 12:55:45,727: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-02-25 12:55:45,728: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-25 12:55:45,728: INFO: common: created directory at: artifacts]
{'root_dir': 'artifacts/prepare_base_model', 'base_model_path': 'artifacts/prepare_base_model/base_model.pkl', 'updated_base_model_path': 'artifacts/prepare_base_model/base_model_updated.pkl'}
[2024-02-25 12:55:45,728: INFO: common: created directory at: artifacts/prepare_base_model]


PrepareBaseModelConfig(root_dir=PosixPath('artifacts/prepare_base_model'), trained_model_path='artifacts/prepare_base_model/base_model.pkl', updated_base_model_path='artifacts/prepare_base_model/base_model_updated.pkl', training_data='artifacts/data_cleaning/basic_ml', params_c=0.05)

In [12]:
import os
import urllib.request as request
# from zipfile import ZipFile
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import CountVectorizer

import joblib
from sklearn.base import BaseEstimator
from sklearn.pipeline import Pipeline
from pathlib import Path
from src.sentiment_analysis.entity.config_entity import PrepareBaseModelConfig
                                                




class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        self.config = config

    
    def get_base_model(self):
        self.model  = Pipeline([
            ('vectorizer', CountVectorizer(binary=True)),
            ('classifier', LinearSVC(random_state=10, C=self.config.params_c))
        ])
        # self.model = LinearSVC(random_state=10,C=self.config.params)

        self.save_model(path=self.config.trained_model_path, model=self.model)

    @staticmethod
    def save_model(path: Path, model: BaseEstimator):
        joblib.dump(model, path)
        # model.save(path)
        # model = joblib.load(f'{path}/base_model.pkl')





In [13]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_base_model()
except Exception as e:
    raise e

[2024-02-25 13:38:10,541: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-02-25 13:38:10,542: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-25 13:38:10,542: INFO: common: created directory at: artifacts]
{'root_dir': 'artifacts/prepare_base_model', 'base_model_path': 'artifacts/prepare_base_model/base_model.pkl', 'updated_base_model_path': 'artifacts/prepare_base_model/base_model_updated.pkl'}
[2024-02-25 13:38:10,543: INFO: common: created directory at: artifacts/prepare_base_model]
