# Importing Dependencies

In [1]:
import os 

import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
import numpy as np

from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.conversation.memory import ConversationSummaryMemory
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.callbacks import get_openai_callback
from langchain.chains import RetrievalQA
from langchain.retrievers.multi_query import MultiQueryRetriever
from sentence_transformers import SentenceTransformer

from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
import time

import nest_asyncio
from fastapi import FastAPI, Body
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

import requests
import threading
import uvicorn

  from tqdm.autonotebook import tqdm, trange


# Setting up API Keys

In [2]:
os.environ['OPENAI_API_KEY'] = 'Set your OpenAI API KEY Here'
os.environ['PINECONE_API_KEY'] = 'Set your Pinecone API KEY Here'

# Embedding method

In [3]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    model = model_name,
    openai_api_key = OPENAI_API_KEY
)

  warn_deprecated(


# Connecting to Pinecone DB Index

- If you do not have an index to connect to, go to the end of the notebook and create/populate one

In [4]:
api_key = os.getenv("PINECONE_API_KEY")

pc = Pinecone(api_key=api_key)

index_name = "celestial-7k-db"

index = pc.Index(index_name)
time.sleep(1)

index.describe_index_stats()

vectorstore = PineconeVectorStore.from_existing_index(index_name, embed)

# Setting up the Retrieval QA Bot Class

In [5]:
class RetrievalQABot():

    llm = ChatOpenAI(
        openai_api_key = OPENAI_API_KEY,
        model_name = 'gpt-4o',
        #streaming = True,        # Disabled streaming because get_openai_callback() (Testing method) does not work for prompts that are streamed
        callbacks=[StreamingStdOutCallbackHandler()],
        temperature = 0.5
    )

    prompt_template = """
    You are a very knowledgeable NPC assistant in a video game. Players will come to you and ask questions regarding the gameplay.
    Try to retrieve the answer from the context alone and limit your answer to these documents. 
    If you do not know the answer to their question, just say you don't know. 
    Keep the answer within maximum 2-3 sentences and concise.

    Context: {context}

    Given the following conversation, answer the question.

    Chat History: {chat_history}
    
    Question: {question}
    Answer: 
    """

    prompt = PromptTemplate(
    input_variables=["chat_history", "context", "question"],
    template = prompt_template,
    )

    rag_chain = RetrievalQA.from_chain_type(
                                    llm = llm,
                                    chain_type = 'stuff',
                                    retriever = MultiQueryRetriever.from_llm(               # Each query will be the equivalent of 3 queries
                                                retriever = vectorstore.as_retriever(),
                                                llm=llm),
                                    #verbose = True,
                                    chain_type_kwargs = {
                                    #"verbose": True,
                                    "prompt": prompt,
                                    "memory": ConversationBufferWindowMemory(
                                        llm = llm,
                                        k = 6,
                                        memory_key = "chat_history",
                                        input_key = "question"),
                                    }
    )
    
    def process_query(self, query):
        result = self.rag_chain.invoke(query)
        return f"Final Answer: {result}"

  warn_deprecated(


# Initializing the bot and querying

In [22]:
bot = RetrievalQABot()
user_input = input("Ask me anything: ")
result = bot.process_query(user_input)
print(result)

Ask me anything:  What game is this ?


Final Answer: {'query': 'What game is this ?', 'result': "I don't know."}


In [141]:
bot = RetrievalQABot()
bot.rag_chain.invoke("What are some good looking biomes ?")

{'query': 'What are some good looking biomes ?',
 'result': 'Some good-looking biomes include the dense forested biome with lush green landscapes and large jungle trees, and the swamp biome characterized by shallow pools of green water with floating lily pads, trees covered with vines, and abundant mushrooms and sugar canes.'}

# Building the chatbot API using FASTAPI

In [11]:
# Necessary to run FastAPI within Jupyter
nest_asyncio.apply()

app = FastAPI()
chatbot = RetrievalQABot()  

class Query(BaseModel):
    text: str

@app.post("/chat")
async def chat(query: Query):
    response = chatbot.process_query(query.text)  
    return {"response": response}

@app.get("/status")
async def health():
    return {"status": "Application is functional !"}

In [12]:
import threading
import uvicorn

def run_api():
    config = uvicorn.Config(app=app, host="localhost", port=8000, log_level="info", reload=True)
    server = uvicorn.Server(config)
    server.run()

# Function to stop the server
def stop_server():
    # You would need a way to reference and stop the running server.
    pass

# Run the server in a thread
thread = threading.Thread(target=run_api)
thread.start()

INFO:     Started server process [42792]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://localhost:8000 (Press CTRL+C to quit)


INFO:     ::1:56552 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56557 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56558 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56559 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56560 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56561 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56562 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56563 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56564 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56565 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56566 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56567 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56568 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56569 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56570 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56571 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56572 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56573 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56574 - "POST /chat HTTP/1.1" 200 OK
INFO:     ::1:56575 - "POST /ch

# Testing POST Requests on the API

In [None]:
response = requests.post('http://localhost:8000/chat', json={"text": "What is the best tool?"})
print(response.json())

In [11]:
response = requests.post('http://localhost:8000/chat', json={"text": "Imagine me a story about Techton's past"})
print(response.json()) 

{'response': 'Final Answer: {\'query\': "Imagine me a story about Techton\'s past", \'result\': "Techton\'s past is shrouded in mystery, with rumors of a dark past as a warrior before he turned to blacksmithing. Some say he once wielded a legendary sword that held the power to control the elements, but he now uses his skills to forge weapons for the greater good."}'}


In [12]:
response = requests.post('http://localhost:8000/chat', json={"text": "What is the Crystal Nexus?"})
print(response.json()) 

{'response': "Final Answer: {'query': 'What is the Crystal Nexus?', 'result': 'The Crystal of Nexus is a powerful artifact that maintains the balance between the realms in Eldoria. It is sought after by dark forces seeking to manipulate the convergence for their own nefarious purposes.'}"}


# Index Creation and Populating a DB - ONLY If needed

## Data Loading

In [4]:
loader = TextLoader('CelestialAscension_7k.txt', encoding='utf-8')
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 400, chunk_overlap = 20)
docs = text_splitter.split_documents(documents)

In [5]:
# Clean up the document
clean_kb = [doc.page_content for doc in docs if doc.page_content]

In [6]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    model = model_name,
    openai_api_key = OPENAI_API_KEY
)

## Index Creation

In [7]:
from pinecone import ServerlessSpec

spec = ServerlessSpec(
    cloud="aws", region="us-east-1"
)

In [8]:
api_key = os.getenv("PINECONE_API_KEY")

pc = Pinecone(api_key=api_key)

In [9]:
import time

index_name = "celestial-ascension-7k-db"
existing_indexes = [
    index_info["name"] for index_info in pc.list_indexes()
]

if index_name not in existing_indexes:
    pc.create_index(
        index_name,
        dimension = 1536,  # dimensionality of ada-002
        metric = 'dotproduct',
        spec=spec
    )
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# connect to index
index = pc.Index(index_name)
time.sleep(1)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

## Index population

In [10]:
from tqdm.auto import tqdm

# Assuming clean_kb is already a list of cleaned text strings
batch_size = 100

for i in tqdm(range(0, len(clean_kb), batch_size)):
    i_end = min(len(clean_kb), i + batch_size)
    batch = clean_kb[i:i_end]

    ids = [f'doc_{j}' for j in range(i, i_end)]

    try:
        embeds = embed.embed_documents(batch)

        metadata = [{'text': text} for text in batch]

        upsert_data = zip(ids, embeds, metadata)

        index.upsert(vectors=upsert_data)
    except Exception as e:
        print(f"Failed to embed or upsert documents: {e}")

print(index.describe_index_stats())

  0%|          | 0/2 [00:00<?, ?it/s]

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 100}},
 'total_vector_count': 100}
