In [1]:
import os

In [2]:
%pwd

'/home/amul/Desktop/projects/nlp_usecases/mlops_sentiment_analysis/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/amul/Desktop/projects/nlp_usecases/mlops_sentiment_analysis'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    base_model_path: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_c: int

In [6]:
from src.sentiment_analysis.constants import *
from src.sentiment_analysis.utils.common import read_yaml, create_directories
# import tensorflow as tf

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

        

    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = self.config.data_cleaning.data_non_transformer
        create_directories([
            Path(training.root_dir)
        ])
     
        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            base_model_path = Path(training.base_model_path),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            params_c=params.C,
            
        )

        return training_config

In [8]:
import os
import urllib.request as request
from zipfile import ZipFile
import joblib
import time
import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from src.sentiment_analysis.entity.config_entity import TrainingConfig
from pathlib import Path
from sklearn.model_selection import train_test_split


In [11]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        print(config)
    
    def get_base_model(self):
        self.model = joblib.load(self.config.base_model_path)
    
    
    @staticmethod
    def save_model(path: Path, model: BaseEstimator):
        joblib.dump(model, path)

    def train(self):
        training_path = str(Path(self.config.training_data))
        df_train = pd.read_csv(training_path+'/train.csv')

        # df_train = pd.read_csv(self.config.training_data+'/train.csv')
        
        self.model.fit(df_train['text'], df_train['label'])
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )
        # # Load the pipeline from the pickle file
        # loaded_pipeline = joblib.load(self.config.base_model_path)

        # # Predict and evaluate on the test data using the loaded pipeline
        # predictions = loaded_pipeline.predict(reviews_test)
        # accuracy = accuracy_score(labels_test, predictions)

        # print(f"Accuracy on Unseen dataset using the loaded pipeline: {accuracy}")


In [12]:

config = ConfigurationManager()
training_config = config.get_training_config()
training = Training(config=training_config)
training.get_base_model()
# training.train_valid_generator()
training.train()


[2024-02-25 14:54:47,914: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-02-25 14:54:47,915: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-25 14:54:47,915: INFO: common: created directory at: artifacts]
[2024-02-25 14:54:47,916: INFO: common: created directory at: artifacts/training]
TrainingConfig(root_dir=PosixPath('artifacts/training'), base_model_path=PosixPath('artifacts/prepare_base_model/base_model.pkl'), trained_model_path=PosixPath('artifacts/training/model.pkl'), updated_base_model_path=PosixPath('artifacts/prepare_base_model/base_model_updated.pkl'), training_data=PosixPath('artifacts/data_cleaning/basic_ml'), params_c=0.05)


In [None]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    # training.train_valid_generator()
    training.train()
    
except Exception as e:
    raise e