In [1]:
import os
import json
from dotenv import load_dotenv

from huggingface_hub import hf_hub_download
import transformers
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [2]:
class ConfigManager:
    def __init__(self, json_config_path: str):
        self.json_config_path = json_config_path
        self.json_config = None

        self._load_config_json()

    def _load_config_json(self):
        try:
            with open(self.json_config_path, "r") as file_obj:
                self.json_config = json.load(file_obj)
        except FileNotFoundError:
            raise FileNotFoundError(f"Configuration file not found: {self.json_config_path}")
        except json.JSONDecodeError as e:
            raise ValueError(f"Error decoding JSON file: {e}")

    def get_config(self, keys: str | list, default=None):
        if isinstance(keys, str):
            keys = [keys]

        data = self.json_config
        for key in keys:
            if isinstance(data, dict):
                data = data.get(key, default)
            else:
                return default
        return data

In [3]:
_ = load_dotenv()

In [4]:
config_manager = ConfigManager("config.json")

In [5]:
model_path = hf_hub_download(
    repo_id=config_manager.get_config("model_repository_name"), 
    filename=config_manager.get_config("model_file_name"),
    local_dir="./model/",
    token=os.getenv("HUGGINGFACE_TOKEN")
)

llama-2-7b-chat.ggmlv3.q4_0.bin:   5%|5         | 199M/3.79G [00:00<?, ?B/s]

In [None]:
def load_data(data_path: str, verbose: bool = False):
    loader = DirectoryLoader(
        data_path,
        glob="*.pdf",
        loader_cls=PyPDFLoader,
        show_progress=verbose
    )

    documents = loader.load()

    return documents
