In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from PyPDF2 import PdfReader
from flask import Flask, request, jsonify, render_template_string
from pyngrok import ngrok
import os

In [None]:
# Step 1: Load the Language Model (GPT-Neo 2.7B)
model_name = "EleutherAI/gpt-neo-2.7B"  # Replace with "EleutherAI/gpt-neo-2.7B" or "EleutherAI/gpt-j-6B" for larger models
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [None]:
# Step 2: Document Processing Functions
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text
def split_text(text, chunk_size=500, chunk_overlap=50):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return splitter.split_text(text)

In [None]:
# Step 3: Embedding and Vector Store Functions
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def create_embeddings(text_chunks):
    return embedder.encode(text_chunks)

def build_vector_store(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index

def retrieve_relevant_chunks(query, index, text_chunks, k=3):
    query_embedding = embedder.encode([query])
    distances, indices = index.search(query_embedding, k)
    return [text_chunks[i] for i in indices[0]]

In [None]:
# Step 4: Response Generation Function
def truncate_context(context, max_tokens=1024):
    tokens = tokenizer.encode(context, truncation=True, max_length=max_tokens)
    return tokenizer.decode(tokens, skip_special_tokens=True)

def generate_response(query, relevant_chunks, max_new_tokens=150, max_context_tokens=1024):
    context = "\n".join(relevant_chunks)
    context = truncate_context(context, max_tokens=max_context_tokens)  # Truncate context
    prompt = f"Question: {query}\nAnswer:"  # Removed context from the prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate response with max_new_tokens
    outputs = model.generate(
        inputs.input_ids,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
# Step 5: Flask App
app = Flask(__name__)

# Set up ngrok
ngrok.set_auth_token("2sDYG8aLzyIrInFG3fBRzVnZpQ5_BVeA63JeA4HMukM6epDg")  # Replace with your ngrok auth token
public_url = ngrok.connect(5000).public_url  # Expose the Flask app on port 5000
print(f" * Running on {public_url}")

UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

@app.route("/")
def home():
    return render_template_string('''
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Document Chat</title>
            <style>
                body {
                    font-family: Arial, sans-serif;
                    background-color: #f4f4f9;
                    margin: 0;
                    padding: 20px;
                    display: flex;
                    justify-content: center;
                    align-items: center;
                    height: 100vh;
                    flex-direction: column;
                }
                .container {
                    background: white;
                    padding: 20px;
                    border-radius: 10px;
                    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
                    max-width: 500px;
                    width: 100%;
                }
                h1 {
                    text-align: center;
                    color: #333;
                }
                .upload-section, .chat-section {
                    margin-bottom: 20px;
                }
                input[type="file"], input[type="text"] {
                    width: 100%;
                    padding: 10px;
                    margin: 10px 0;
                    border: 1px solid #ccc;
                    border-radius: 5px;
                }
                button {
                    width: 100%;
                    padding: 10px;
                    background-color: #007bff;
                    color: white;
                    border: none;
                    border-radius: 5px;
                    cursor: pointer;
                }
                button:hover {
                    background-color: #0056b3;
                }
                .loading {
                    display: none;
                    text-align: center;
                    margin: 20px 0;
                }
                .loading-spinner {
                    border: 4px solid #f3f3f3;
                    border-top: 4px solid #007bff;
                    border-radius: 50%;
                    width: 30px;
                    height: 30px;
                    animation: spin 1s linear infinite;
                    margin: 0 auto;
                }
                @keyframes spin {
                    0% { transform: rotate(0deg); }
                    100% { transform: rotate(360deg); }
                }
                .success {
                    display: none;
                    text-align: center;
                    color: #28a745;
                    margin: 20px 0;
                }
                .response {
                    margin-top: 20px;
                    padding: 10px;
                    background-color: #f8f9fa;
                    border: 1px solid #ddd;
                    border-radius: 5px;
                }
            </style>
        </head>
        <body>
            <div class="container">
                <h1>Document Chat</h1>
                <div class="upload-section">
                    <h2>Upload Your Document</h2>
                    <form id="uploadForm">
                        <input type="file" name="file" id="file" required>
                        <button type="submit">Upload</button>
                    </form>
                </div>
                <div class="chat-section">
                    <h2>Ask a Question</h2>
                    <input type="text" id="query" placeholder="Enter your question">
                    <button onclick="sendQuery()">Send</button>
                    <div class="loading" id="loading">
                        <div class="loading-spinner"></div>
                        <p>Generating response...</p>
                    </div>
                    <div class="success" id="success">
                        <p>✅ Response generated successfully!</p>
                    </div>
                    <div class="response" id="response"></div>
                </div>
            </div>
            <script>
                document.getElementById("uploadForm").onsubmit = async (e) => {
                    e.preventDefault();
                    const formData = new FormData();
                    formData.append("file", document.getElementById("file").files[0]);
                    const response = await fetch("/upload", {
                        method: "POST",
                        body: formData
                    });
                    const result = await response.json();
                    alert(result.message);
                };

                async function sendQuery() {
                    const query = document.getElementById("query").value;
                    if (!query) {
                        alert("Please enter a question.");
                        return;
                    }

                    // Show loading spinner
                    document.getElementById("loading").style.display = "block";
                    document.getElementById("success").style.display = "none";
                    document.getElementById("response").innerText = "";

                    const response = await fetch("/chat", {
                        method: "POST",
                        headers: { "Content-Type": "application/json" },
                        body: JSON.stringify({ query, file_path: "uploads/uploaded_file.pdf" })
                    });
                    const result = await response.json();

                    // Hide loading spinner and show success message
                    document.getElementById("loading").style.display = "none";
                    document.getElementById("success").style.display = "block";

                    // Display the response
                    document.getElementById("response").innerText = result.response;
                }
            </script>
        </body>
        </html>
    ''')

@app.route("/upload", methods=["POST"])
def upload_file():
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400
    file = request.files["file"]
    file_path = os.path.join(UPLOAD_FOLDER, "uploaded_file.pdf")
    file.save(file_path)
    return jsonify({"message": "File uploaded successfully"})

@app.route("/chat", methods=["POST"])
def chat():
    query = request.json.get("query")
    file_path = request.json.get("file_path")

    # Process the document and generate a response
    text = extract_text_from_pdf(file_path)
    text_chunks = split_text(text)
    embeddings = create_embeddings(text_chunks)
    index = build_vector_store(embeddings)
    relevant_chunks = retrieve_relevant_chunks(query, index, text_chunks)
    response = generate_response(query, relevant_chunks)

    return jsonify({"response": response})

In [None]:
# Step 6: Run the Flask App
if __name__ == "__main__":
    app.run() Properly segment the code, also add the neccessary installation , for each code segment, give an appropriate heading