In [145]:
! pip install openai sentence_transformers



In [1]:
import os
import openai
import docx
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone

In [2]:
# Setting up the API key for OpenAI
os.environ['OPENAI_API_KEY'] = "openai-api"
openai.api_key = os.environ['OPENAI_API_KEY']

In [3]:
# Initialize Pinecone
pc = Pinecone(api_key="pinecone-index-api", environment="")
index = pc.Index("pinecone-index-name", host="pinecone-index-host")

In [4]:
# Loading the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [5]:
# Function to split long documents into smaller chunks
def split_text_into_chunks(plain_text, max_chars=2000):
    text_chunks = []
    current_chunk = ""
    for line in plain_text.split("\n"):
        if len(current_chunk) + len(line) + 1 <= max_chars:
            current_chunk += line + " "
        else:
            text_chunks.append(current_chunk.strip())
            current_chunk = line + " "
    if current_chunk:
        text_chunks.append(current_chunk.strip())
    return text_chunks

In [6]:
# Function to add data to the Pinecone vector database
def add_data(corpus_data):
    id = index.describe_index_stats()['total_vector_count']
    for i in range(len(corpus_data)):
        chunk = corpus_data[i]
        chunk_info = (str(id + i),
                      model.encode(chunk).tolist(),
                      {'context': chunk})
        index.upsert(vectors=[chunk_info])

In [7]:
# This function is responsible for matching the input string with already existing data on the vector database.
def find_match(query, k):
    query_em = model.encode(query).tolist()
    index = pc.Index("pinecone-index-name", host="pinecone-index-host") # Fill out with index name
    #result = index.query(query_em, top_k=k, include_metadata=True)
    result = index.query(vector=query_em, top_k=k, include_metadata=True)
    return [result['matches'][i]['metadata']['context'] for i in range(min(k, len(result['matches'])))]

In [8]:
# Create a prompt
def create_prompt(contexts, query):
    prompt = f"Context: {', '.join(contexts)}\n\nQuestion: {query}\n\nAnswer:"
    return prompt

In [9]:
# Function to generate an answer using GPT-3
def generate_answer(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",  # Specify the correct GPT-3 model here
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
        max_tokens=150
    )
    return response.choices[0].message["content"].strip()

In [10]:
# Function to handle user queries
def user_query(query):
    # Find the best matching context based on the query
    best_context = find_match(query, k=1)[0]

    # Create a prompt using the best context and the query
    prompt = create_prompt(best_context, query)

    # Generate an answer using GPT-3
    answer = generate_answer(prompt)

    return answer

In [11]:
# Load the docx file and split it into chunks
def load_and_process_docx(file_path):
    doc = docx.Document(file_path)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + "\n"
    chunks = split_text_into_chunks(text)
    add_data(chunks)
    return chunks

In [12]:
# Test the process

docx_file_path = "path-to-knowledge-docx"
chunks = load_and_process_docx(docx_file_path)
print("Document loaded and processed successfully.")

# Now, we can use user_query function to get answers for queries.
# Let's define a sample query and get an answer.
query = "What are the implications of climate change?"
answer = user_query(query)
print("Answer:", answer)

Document loaded and processed successfully.


APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


# Suggestions for improvement:
# 1. Error Handling: Add error handling mechanisms to handle potential issues during document loading, chunk splitting, and API calls.
# 2. Model Selection: Experiment with different sentence embedding models to find the one that best suits your specific use case.
# 3. Fine-tuning: Fine-tune the GPT-3 model on your specific task or domain for better answer generation.
# 4. Performance Optimization: Optimize the code for better performance, especially when dealing with large documents or high query volumes.
# 5. Feedback Loop: Implement a feedback loop to continuously improve the system based on user feedback and interaction.
# 6. Security Measures: Ensure proper security measures are in place, especially when dealing with sensitive documents or data.

# Alternative Approach:
# Alternatively, instead of using separate functions for each step, we could encapsulate the entire process into a class.
# This class would handle document loading, chunking, adding data to the vector database, creating prompts, generatin nswers,
# and handling user queries in a more cohesive and organized manner. This would make the code more modular, reusable, and easier to maintain.to maintain.