In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
#from langchain.vectorstores import FAISS
from langchain_community.vectorstores import FAISS

from langchain.document_loaders import PyPDFLoader
from glob import glob
from tqdm import tqdm

import yaml

## Load Documents and Build the Vector Store

In [11]:
def load_config():
    with open("./config/config.yaml", "r") as file:
        config = yaml.safe_load(file)
    return config

config = load_config()

def load_documents(directory : str):
    """Loads all documents from a directory and returns a list of Document objects
    args: directory format = directory/
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = config["TextSplitter"]["chunk_size"], 
                                                   chunk_overlap = config["TextSplitter"]["chunk_overlap"])
    documents = []
    for item_path in tqdm(glob(directory + "*.pdf")):
        loader = PyPDFLoader(item_path)
        documents.extend(loader.load_and_split(text_splitter=text_splitter))

    return documents

def load_embeddings(model_name=config["embeddings"]["name"],
                    model_kwargs = {'device': config["embeddings"]["device"]}):
    return HuggingFaceEmbeddings(model_name=model_name, model_kwargs = model_kwargs)


def load_db(embedding_function, save_path=config["faiss_indexstore"]["save_path"], index_name=config["faiss_indexstore"]["index_name"]):
    db = FAISS.load_local(folder_path=save_path, index_name=index_name, embeddings = embedding_function, allow_dangerous_deserialization=True)
    return db

def save_db(db, save_path=config["faiss_indexstore"]["save_path"], index_name=config["faiss_indexstore"]["index_name"]):
    db.save_local(save_path, index_name)
    print("Saved db to " + save_path + index_name)

In [12]:
documents = load_documents("data/")

100%|██████████| 3/3 [00:06<00:00,  2.19s/it]


In [13]:
embedding_function = load_embeddings()

In [14]:
db = FAISS.from_documents(documents, embedding_function)

In [15]:
db.save_local("faiss_db/","books")

In [16]:
print(db.similarity_search("5G Subscribers in Asia"))

[Document(page_content='will soon follow, given the country’s  \nlarge manufacturing base and 5G  \nspectrum availability.\n4G is currently the dominant technology, \nand is expected to account for 81 percent  \nof all subscriptions at the end of 2023. \nMobile subscription growth has flattened \nand is expected to be virtually zero in the \ncoming years. However, the migration from \n2G/3G to 4G continues to look strong up  \nto 2024. From 2025, 5G is expected to be \nthe only growing subscription type.\nDuring the forecast period, there will \ncontinue to be a significant decline in 3G \nsubscriptions, from 14 percent of mobile \nsubscriptions to just 1 percent.\nSouth East Asia and Oceania\n5G subscriptions are forecast to reach \naround 550 million in the region by the \nend of the forecast period. After the initial \ninvestment in 5G infrastructure across the \nSouth East Asian markets of Thailand, \nthe Philippines, Singapore and Malaysia, \nthe focus has now shifted to diversify

## Build The QA Retreiver

In [17]:
import sys, os
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
sys.path.append("../../Langchain")
from constants import openai_key

os.environ['OPENAI_API_KEY'] = openai_key

In [18]:
db = load_db(embedding_function)
qa = RetrievalQA.from_llm(llm = ChatOpenAI(temperature=0.1),
                          retriever = db.as_retriever(kwargs={"k": 7}),
                          return_source_documents = False)

  warn_deprecated(


In [19]:
question = "What are the main factors fuelling subscriber growth "
print(qa(question)['result'])

  warn_deprecated(


The main factors fueling subscriber growth include the increasing adoption of 5G technology, the migration of 3G subscribers to 4G and 5G networks, the push for mobile financial services in certain regions, the growth of Fixed Wireless Access (FWA) solutions, and the overall expansion of smartphone subscriptions.
