In [3]:
import os

os.chdir("../")

In [13]:
import pickle
import numpy as np
import pandas as pd
from dataclasses import dataclass
from pathlib import Path
from langchain_huggingface import HuggingFaceEmbeddings
from src.movieRecommendation.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.movieRecommendation.utils.common import read_yaml, create_directories

In [14]:
@dataclass
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_name: str
    model_path: Path

In [15]:
class ConfigurationManager:
    def __init__(
        self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])
        
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        create_directories([config.root_dir])
        model_trainer_config = ModelTrainerConfig(
            root_dir=Path(config.root_dir),
            data_path=Path(config.data_path),
            model_name=config.model_name,
            model_path=Path(config.model_path)
        )
        return model_trainer_config  

In [20]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        
    def load_hf_embedding(self):
        embedding = HuggingFaceEmbeddings(model_name=self.config.model_name)
        return embedding

    def train(self):
        df = pd.read_csv(os.path.join(self.config.data_path, "prepared.csv"))
        descriptions = df["cleaned_description"].tolist()
        embedding = self.load_hf_embedding()
        movie_embedding = np.array(embedding.embed_documents(descriptions))
        embeddings_path = os.path.join(self.config.root_dir, "movie_embeddings.pkl")
        with open(embeddings_path, "wb") as f:
            pickle.dump(movie_embedding, f)

In [21]:
config = ConfigurationManager()
model_trainer_config = config.get_model_trainer_config()
model_trainer = ModelTrainer(config=model_trainer_config)
model_trainer.train()

[2026-02-15 12:26:40,582: INFO: common: YAML file 'config/config.yaml' read successfully.]
[2026-02-15 12:26:40,587: INFO: common: YAML file 'params.yaml' read successfully.]
[2026-02-15 12:26:40,589: INFO: common: Directory 'artifacts' created successfully or already exists.]
[2026-02-15 12:26:40,593: INFO: common: Directory 'artifacts/model_trainer' created successfully or already exists.]
[2026-02-15 12:26:41,138: INFO: SentenceTransformer: Use pytorch device_name: cuda:0]
[2026-02-15 12:26:41,139: INFO: SentenceTransformer: Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2]
