In [1]:
import os

In [2]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Sentiment Analysis of Amazon Reviews\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Sentiment Analysis of Amazon Reviews'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_save_path: Path
    epochs:int
    classes:int
    learning_rate:float
    input_dtype: int
    params: any
    random_state:int
    max_tokens: int
    output_sequence_length: int
    input_dim: int
    output_dim: int
    batch_size: int
    label_col: str


In [6]:
from typing import Union
from pathlib import Path
from sentimentanalyzer.utils.common import read_yaml, create_directories, set_seed# adjust import as needed
from sentimentanalyzer.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers
from dataclasses import dataclass

In [7]:

CONFIG_FILE_PATH = Path("config/config.yaml")
PARAMS_FILE_PATH = Path("params.yaml")

class ConfigurationManager:
    def __init__(
        self,
        config_filepath: Union[str, Path] = CONFIG_FILE_PATH,
        params_filepath: Union[str, Path] = PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        print(">>> CONFIG CONTENTS:", self.config)
        print(">>> CONFIG KEYS:", list(self.config.keys()))

        self.params = read_yaml(params_filepath)
        print(">>> PARAMS CONTENTS:", self.params)
        print(">>> PARAMS KEYS:", list(self.params.keys()))

        # Create root directory if exists
        if 'artifacts_root' in self.config:
            create_directories([self.config.artifacts_root])
        else:
            raise KeyError("Missing 'artifacts_root' in config.yaml")


        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        """
        Read the `model_trainer` section of the config and
        combine it with training params into a ModelTrainerConfig.
        """
        config = self.config.model_trainer

        # make sure the model‐trainer folder exists
        create_directories([config.root_dir])

        return ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path = config.data_path,
            model_save_path=config.model_save_path,
            epochs=self.params.epochs,
            classes=self.params.classes,
            learning_rate=self.params.learning_rate,
            input_dtype=self.params.input_dtype,
            params=self.params,
            random_state= self.params.random_state,
            max_tokens=self.params.max_tokens,
            output_sequence_length=self.params.output_sequence_length,
            input_dim=self.params.input_dim,
            output_dim=self.params.output_dim,
            batch_size=self.params.batch_size,
            label_col=self.params.label_col
        )


In [8]:
import ast
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers
from tensorflow.keras.layers import TextVectorization, Embedding, GlobalAveragePooling1D, Dense,Dropout
from tensorflow.keras.models import Sequential
from sentimentanalyzer.logging import logger
from src.sentimentanalyzer.utils.common import load_saved_labels_and_texts,preprocess_ft_txt,load_fasttext_file
from sklearn.model_selection import train_test_split


In [9]:



class ModelTrainer:
    def __init__(self, config: 'ModelTrainerConfig'):
        self.config = config
        self.params = config
        tf.random.set_seed(self.config.random_state)
        np.random.seed(self.config.random_state)

        train_path = Path(config.data_path) / "train_ft.txt"
        test_path = Path(config.data_path) / "test_ft.txt"

        self.train_texts, self.train_labels = load_fasttext_file(train_path)
        self.test_texts, self.test_labels = load_fasttext_file(test_path)

        self.train_texts, self.val_texts, self.train_labels, self.val_labels = train_test_split(
        self.train_texts, self.train_labels, random_state=42, test_size=0.2)

        self.train_texts = self.train_texts
        self.train_labels = self.train_labels
        self.val_texts = self.val_texts
        self.val_labels =self. val_labels

        self.test_texts = self.test_texts
        self.test_labels = self.test_labels


        
    def train(self):
        inputs = tf.keras.Input(
            shape=(),    
            dtype=tf.string
        )

        vectorizer = layers.TextVectorization(
            max_tokens=self.config.max_tokens,
            output_sequence_length=self.config.output_sequence_length,
            standardize="lower_and_strip_punctuation",
            split="whitespace"
        )
        vectorizer.adapt(self.train_texts)
        x = vectorizer(inputs)
        x = layers.Embedding(
            input_dim=self.config.max_tokens,
            output_dim=self.config.output_dim,
            mask_zero=True
        )(x)
        x = layers.Conv1D(64, kernel_size=5, padding="same", activation="relu")(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPool1D(3)(x)
        x = layers.Conv1D(64, 5, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPool1D(5)(x)
        x = layers.Conv1D(64, 5, activation='relu')(x)
        x = layers.GlobalMaxPool1D()(x)
        x = layers.Flatten()(x)
        x = layers.Dense(20,activation='relu')(x)
        # 4) Classification head — use `classes` from params
        outputs = layers.Dense(
            units=self.params.classes,
            activation="softmax",
            name="classifier"
        )(x)
        model = models.Model(inputs=inputs, outputs=outputs, name="EmbeddingConv1DModel")

        model.compile(
            optimizer=optimizers.Adam(learning_rate=self.params.learning_rate),
            loss=losses.SparseCategoricalCrossentropy(), 
            metrics=["accuracy"]
        )
        model.summary()

        # 7) Train
        history = model.fit(
            x=np.array(self.train_texts),
            y=np.array(self.train_labels),
            batch_size=self.config.batch_size,
            epochs=self.config.epochs,
            validation_data=(
                np.array(self.val_texts),
                np.array(self.val_labels)
            )
        )
        # 8) Save
        model.save(self.config.model_save_path, save_format='tf')
        logger.info(f"Model saved to {self.config.model_save_path}")


     

        return model


In [10]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    raise e

[2025-06-17 21:00:35,204: INFO: common: yaml file: config\config.yaml loaded successfully]
>>> CONFIG CONTENTS: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_URL': 'https://github.com/Spencer0013/NLP-Text-Summarizer-Project/raw/refs/heads/main/Dataa.zip', 'local_data_file': 'artifacts/data_ingestion/data.zip', 'unzip_dir': 'artifacts/data_ingestion'}, 'data_preprocessing': {'root_dir': 'artifacts/data_preprocessing', 'ingestion_dir': 'artifacts/data_ingestion', 'output_dir': 'artifacts/data_preprocessing'}, 'data_transformation': {'root_dir': 'artifacts/data_transformation', 'data_path': 'artifacts/data_preprocessing'}, 'model_trainer': {'root_dir': 'artifacts/model_trainer', 'data_path': 'artifacts/data_transformation', 'model_save_path': 'artifacts/model_trainer/sentiment_model'}, 'model_trainer_use': {'root_dir': 'artifacts/model_trainer_USE', 'data_path': 'artifacts/data_preprocessing', 'use_model_path': 'https://tfhub.dev/google