In [1]:
import openai
import numpy as np
import faiss
from dotenv import load_dotenv
import os

load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

## Prepare the document corpus

In [2]:
documents = [
    "Python is a high-level programming language known for its simplicity and readability.",
    "Machine learning is a subset of artificial intelligence that focuses on data and algorithms.",
    "Natural Language Processing (NLP) enables computers to understand human language.",
    "Deep learning models can automatically learn representations from data.",
    "Vector databases are specialized databases for storing and retrieving vector embeddings."
]

## Generate embeddings for the document corpus

In [3]:
def generate_embeddings(texts):
    response = openai.Embedding.create(
        input=texts,
        model="text-embedding-ada-002"
    )
    return [embedding['embedding'] for embedding in response['data']]

print("Generating embeddings for documents...")
document_embeddings = generate_embeddings(documents)

Generating embeddings for documents...


## Store embeddings in FAISS vector database

In [4]:
dimension = len(document_embeddings[0])
index = faiss.IndexFlatL2(dimension)
document_embeddings_array = np.array(document_embeddings).astype('float32')
index.add(document_embeddings_array)
print(f"Vector database created with {len(documents)} documents")

Vector database created with 5 documents


## Function to retrieve relevant documents

In [5]:
def get_relevant_context(query, k=2):
    # Generate embedding for the query
    query_embedding = generate_embeddings([query])[0]
    query_array = np.array([query_embedding]).astype('float32')
    
    # Search in vector database
    distances, indices = index.search(query_array, k)
    
    # Get relevant documents
    relevant_docs = [documents[i] for i in indices[0]]
    return relevant_docs

##  Function to generate chatbot response

In [6]:
def generate_response(query):
    # Get relevant documents
    relevant_context = get_relevant_context(query)
    
    # Prepare prompt with context
    prompt = f"""Use the following context to answer the question. 
    If the context doesn't contain relevant information, say so.
    
    Context: {' '.join(relevant_context)}
    
    Question: {query}
    
    Answer:"""
    
    # Generate response using ChatGPT
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that answers questions based on the provided context."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )
    
    return {
        'answer': response.choices[0].message['content'],
        'relevant_documents': relevant_context
    }

## Interactive chatbot interface

In [7]:
def chat():
    print("Chatbot initialized. Type 'quit' to exit.")
    while True:
        query = input("\nYou: ").strip()
        if query.lower() == 'quit':
            break
            
        result = generate_response(query)
        print("\nChatbot:", result['answer'])
        print("\nRelevant documents used:")
        for doc in result['relevant_documents']:
            print(f"- {doc}")

# Run the chatbot
if __name__ == "__main__":
    chat()

Chatbot initialized. Type 'quit' to exit.



You:  What is Python?



Chatbot: Python is a high-level programming language known for its simplicity and readability.

Relevant documents used:
- Python is a high-level programming language known for its simplicity and readability.
- Natural Language Processing (NLP) enables computers to understand human language.



You:  Tell me about Naruto Anime



Chatbot: The provided context does not contain relevant information about the Naruto Anime.

Relevant documents used:
- Natural Language Processing (NLP) enables computers to understand human language.
- Python is a high-level programming language known for its simplicity and readability.



You:  quit
