In [13]:
import os

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

import pinecone

## Certifications
Please ensure that the below certifications are not uploaded to GitHub

In [14]:
os.environ['OPENAI_API_KEY'] = 'KEY'
os.environ["PINECONE_API_KEY"] = 'KEY'
os.environ["PINECONE_ENV"] = 'ENV'

## Document Loading
Please only load the documents if they have not been previously loaded.
If the Index has already been loaded, please skip to the next section: **Similarity Query**

In [15]:
def load_documents(file_path: str):
    loader = DirectoryLoader(file_path, 
                             glob="**/*.txt", 
                             loader_cls=TextLoader, 
                             show_progress=True)  # Directory uploader for txt documents
    documents = loader.load()  # Load documents
    return documents

In [4]:
path = "../kb/"
documents = load_documents(path)

100%|████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1985.00it/s]


## Split Document Text

In [5]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

Created a chunk of size 1719, which is longer than the specified 1000
Created a chunk of size 1270, which is longer than the specified 1000
Created a chunk of size 1094, which is longer than the specified 1000
Created a chunk of size 1839, which is longer than the specified 1000
Created a chunk of size 1087, which is longer than the specified 1000
Created a chunk of size 1015, which is longer than the specified 1000
Created a chunk of size 1076, which is longer than the specified 1000
Created a chunk of size 1021, which is longer than the specified 1000
Created a chunk of size 1051, which is longer than the specified 1000
Created a chunk of size 1084, which is longer than the specified 1000
Created a chunk of size 1438, which is longer than the specified 1000
Created a chunk of size 1041, which is longer than the specified 1000
Created a chunk of size 1201, which is longer than the specified 1000
Created a chunk of size 1341, which is longer than the specified 1000
Created a chunk of s

## Embed Chunks into Vector Storage

In [10]:
embeddings = OpenAIEmbeddings()

In [54]:
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

In [55]:
def create_index(index_name: str):
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(
            name=index_name,
            metric='cosine',
            dimension=1536  
)

In [56]:
index_name = "flourishing-humanity"
create_index(index_name)

In [57]:
vectordb = Pinecone.from_documents(docs, embeddings, index_name=index_name)

## Similarity Query
If you have already created the Index, then please continue from here. 
The following cells aim to perfrom a similarity query within the Vector Database

In [16]:
index_name = "flourishing-humanity"
embeddings = OpenAIEmbeddings()

pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

vectordb = Pinecone.from_existing_index(index_name, embeddings)

In [17]:
query = "Who is the Character growth Manifesto designed for?"
similar_docs = vectordb.similarity_search(query)
print(similar_docs[0].page_content)

The Character Growth Manifesto is designed for anyone committed to personal growth and self-improvement, regardless of age, background, or current stage in life. Whether you're a student, a professional, a parent, or simply someone seeking to better understand yourself and your potential, this monograph will guide you on your journey to self-discovery. By providing actionable strategies and practical advice, The Character Growth Manifesto empowers readers to build a stronger, more resilient character, enabling them to overcome challenges, achieve their goals, and live a more fulfilling, authentic life.


## Initialize Chat Memory

In [18]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

## Vector Store in Combination with OpenAI LLM Model
Please note this uses a Generative LLM model. 
Additional steps in ... notebook will make use of a Chat LLM model. 

In [19]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [20]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(), memory=memory)

In [21]:
query = "What is the Character Growth manifesto?"
result = qa({"question": query})

In [22]:
result["answer"]

'The Character Growth Manifesto is a comprehensive manual and guide to personal growth and self-improvement. It is designed for anyone who is committed to improving themselves and their impact on the world. The manifesto offers practical advice, actionable steps, and evidence-based strategies to help individuals develop a stronger, more resilient character. By following the principles outlined in the manifesto, readers can embark on a transformative journey towards self-discovery, personal growth, and lasting success.'

Applying chat memory.

In [23]:
query = "What is the name of the manual to lead a more fulfilling, authentic life?"
result = qa({"question": query})

In [24]:
result['answer']

'The name of the manual is "The Character Growth Manifesto."'