# Importing Libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [20]:
import os
import shutil
import sys
sys.path.append("../source/")
sys.path.append("../")

import paths
import webscraper

from dotenv import load_dotenv

load_dotenv("../.env")

import pinecone
from langchain.llms import Replicate
from langchain.vectorstores import Pinecone
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain

REPLICATE_API_TOKEN = os.environ["REPLICATE_API_TOKEN"]
PINECONE_API_TOKEN = os.environ["PINECONE_API_TOKEN"]

pinecone.init(api_key = PINECONE_API_TOKEN, environment = "gcp-starter")

# Populating the Transformed Data Directory with pdf files

In [8]:
webscraper.ScrapeData(StartYear = 2023, EndYear = 2023)

# Populating the Vector Database

In [21]:
#Fetching all .pdf Files in the data/transformed/ Directory
#Then for each one of them we Load, Split and Generate Embeddings
#Ultimately we Push the Embeddings Generated to the Vector Database (aka VectorStore)

for x in os.listdir(paths.TRANSFORMED_DATA_DIR):
    if x.endswith(".pdf"):
        FilePath = str(paths.TRANSFORMED_DATA_DIR / x)
        Loader = PyPDFLoader(FilePath)
        Document = Loader.load()
        
        TextSplitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)
        Text = TextSplitter.split_documents(Document)
        Embeddings = HuggingFaceEmbeddings() #(model_name = "sentence-transformers/all-MiniLM-L6-v2")
        
        #Loading Documents into the Vector Database

        IndexName = "centralbanksllm"
        Index = pinecone.Index(IndexName)
        VectorDB = Pinecone.from_documents(Text, Embeddings, index_name = IndexName)
        
        #After Loading the Documents' Embeddings to the Vector Store, the File is Moved to a Storage Directory
        
        shutil.move(FilePath, str(paths.STORAGE_DATA_DIR / x))

In [22]:
LLM = Replicate(model = "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", input = {"temperature": 0.75, "max_length": 3000})

Init param `input` is deprecated, please use `model_kwargs` instead.


In [23]:
QA_Chain = ConversationalRetrievalChain.from_llm(LLM, VectorDB.as_retriever(search_kwargs = {'k': 2}), return_source_documents = True)

In [24]:
ChatHistory = []

while True:
    Query = input('Prompt: ')
    
    if Query.lower() in ["exit", "quit", "q"]:
        print('Exiting')
        sys.exit()
        
    Result = QA_Chain({'question': Query, 'chat_history': ChatHistory})
    print('Answer: ' + Result['answer'] + '\n')
    ChatHistory.append((Query, Result['answer']))

Prompt:  how did the Monetary Policy Shift throughout the course of the year 2023?


Answer: According to the provided minutes, the Monetary Policy Shift in 2023 was characterized by a focus on bringing inflation back down to target rates. In the first half of the year, members noted that core inflation remained well above target rates and was proving to be persistent, despite labor market conditions gradually easing. In response, the Reserve Bank took steps to tighten monetary policy, including increasing interest rates. However, as the year progressed, members noted that inflation had declined further than expected in some cases, and that wages growth remained high relative to productivity growth. As a result, the Reserve Bank began to shift its focus towards slower growth in wages to bring inflation back down to target rates. By the end of the year, members noted that while inflation remained above target rates, it had declined further than expected in some cases, and that there were some indications that households real disposable incomes and wealth were starting t

Prompt:  quit


Exiting


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
