In [None]:
import openai
from sentence_transformers import SentenceTransformer
import faiss
import PyPDF2
import os

openai.api_key = "enter your open ai key"

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

def chunk_text(text, chunk_size=500, overlap=100):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = " ".join(words[i:i + chunk_size])
        chunks.append(chunk)
    return chunks

def build_vector_index(chunks):
    embeddings = embedding_model.encode(chunks)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, embeddings

def retrieve_relevant_chunks(query, chunks, index, embeddings, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return [chunks[i] for i in indices[0]]

def generate_answer(query, context):
    prompt = f"Answer the question based on the context below:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:"
    response = openai.Completion.create(
        engine="text-davinci-003",  # You can use 'gpt-3.5-turbo' for chat-based models
        prompt=prompt,
        max_tokens=200,
        temperature=0.2
    )
    return response.choices[0].text.strip()

def main(file_path, query):
    # Step 1: Extract and preprocess document
    text = extract_text_from_pdf(file_path)
    chunks = chunk_text(text)

    # Step 2: Build vector index
    index, embeddings = build_vector_index(chunks)

    # Step 3: Retrieve relevant chunks
    relevant_chunks = retrieve_relevant_chunks(query, chunks, index, embeddings)
    context = " ".join(relevant_chunks)

    # Step 4: Generate answer
    answer = generate_answer(query, context)
    return answer

if __name__ == "__main__":
    document_path = " "

    user_query = "What are the benefits of using LoRA in fine-tuning?"

    answer = main(document_path, user_query)
    print("Answer:", answer)
