# Install Requirements

In [1]:
!pip install PyMuPDF python-docx python-pptx tiktoken
!pip install gradio spaces
!pip install bitsandbytes==0.43.2
!pip install git+https://github.com/huggingface/accelerate.git
!pip install transformers==4.40.2
!pip install pyngrok
# !pip install -U transformers/

Collecting bitsandbytes==0.43.2
  Downloading bitsandbytes-0.43.2-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.43.2-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
  Attempting uninstall: bitsandbytes
    Found existing installation: bitsandbytes 0.45.0
    Uninstalling bitsandbytes-0.45.0:
      Successfully uninstalled bitsandbytes-0.45.0
Successfully installed bitsandbytes-0.43.2
Collecting git+https://github.com/huggingface/accelerate.git
  Cloning https://github.com/huggingface/accelerate.git to /tmp/pip-req-build-zsj2dn87
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/accelerate.git /tmp/pip-req-build-zsj2dn87
  Resolved https://github.com/huggingface/accelerate.git to commit d6d3e03cd4868b1f69d4f334ddd341e4d414c342
  Installing build depende

# Import Libraries

In [2]:
from pyngrok import ngrok
from flask import Flask, request, jsonify
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
import torch
from PIL import Image
import pymupdf
import docx
from pptx import Presentation
import os
from threading import Thread

# Load The Model

In [4]:
MODEL_LIST = ["nikravan/glm-4vq"]
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL_ID = MODEL_LIST[0]

In [None]:
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    # load_in_4bit=True,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    force_download=False,
    # device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model.eval()

# Functions For File Handling

In [10]:
# File extractors
def extract_text(path):
    with open(path, 'r') as f:
        return f.read()

def extract_pdf(path):
    doc = pymupdf.open(path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def extract_docx(path):
    doc = docx.Document(path)
    data = [paragraph.text for paragraph in doc.paragraphs]
    return '\n\n'.join(data)

def extract_pptx(path):
    prs = Presentation(path)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text

def mode_load(file):
    file_type = file.filename.split(".")[-1]
    content = ""
    if file_type in ["pdf", "txt", "docx", "pptx"]:
        if file_type == "pdf":
            content = extract_pdf(file)
        elif file_type == "docx":
            content = extract_docx(file)
        elif file_type == "pptx":
            content = extract_pptx(file)
        else:
            content = extract_text(file)
        return "doc", content
    elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
        content = Image.open(file).convert('RGB')
        return "image", content
    else:
        return None, None

def generate_response(message, conversation_history, temperature, max_length, top_p, top_k, penalty):
    conversation = [{"role": "user", "content": message}]
    input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)

    streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = {
        "max_length": max_length,
        "streamer": streamer,
        "do_sample": True,
        "top_p": top_p,
        "top_k": top_k,
        "temperature": temperature,
        "repetition_penalty": penalty,
        "eos_token_id": [151329, 151336, 151338],
    }
    gen_kwargs = {**input_ids, **generate_kwargs}

    with torch.no_grad():
        thread = Thread(target=model.generate, kwargs=gen_kwargs)
        thread.start()
        response = ""
        for new_text in streamer:
            response += new_text
    return response

# Auth. For ngrok API Services

In [11]:
!ngrok config add-authtoken "Your Auth. Code From ngrock"

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


# Flask API

In [3]:
app = Flask(__name__)

In [None]:
@app.route('/')
def index():
    return '''
    <!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Smarty Pants Bot</title>
    <style>
        body { font-family: Arial, sans-serif; background-color: #f0f4f8; margin: 0; padding: 0; display: flex; height: 100vh; }
        .container { display: flex; width: 100%; height: 100%; }
        .chat-container { flex: 3; display: flex; flex-direction: column; padding: 20px; background-color: #ffffff; box-shadow: 2px 0 10px rgba(0, 0, 0, 0.1); }
        .chat-title { font-size: 24px; color: #007bff; text-align: center; padding: 15px; margin-bottom: 10px; }
        .chat-box { flex-grow: 1; overflow-y: auto; padding: 10px; margin-bottom: 15px; }
        .message { display: flex; margin-bottom: 10px; }
        .message.user { justify-content: flex-end; }
        .message.ai { justify-content: flex-start; }
        .message.loading { justify-content: flex-start; font-style: italic; color: #888; }
        .bubble { max-width: 70%; padding: 10px; border-radius: 10px; font-size: 14px; }
        .bubble.user { background-color: #e1ffc7; align-self: flex-end; }
        .bubble.ai { background-color: #e9e9e9; }
        .input-area { display: flex; }
        .input-area input[type="text"] { flex-grow: 1; padding: 10px; border-radius: 5px; border: 1px solid #ccc; }
        .input-area button { padding: 10px 20px; border: none; border-radius: 5px; background-color: #007bff; color: #fff; cursor: pointer; transition: background-color 0.3s; }
        .input-area button:hover { background-color: #0056b3; }
        .sidebar { flex: 1; padding: 20px; background-color: #f7f9fc; box-shadow: -2px 0 10px rgba(0, 0, 0, 0.1); }
        .sidebar h2 { font-size: 18px; color: #333; margin-bottom: 15px; }
        .sidebar .form-group { margin-bottom: 15px; }
        .sidebar label { font-size: 14px; color: #555; display: block; margin-bottom: 5px; }
        .sidebar input[type="number"], .sidebar input[type="file"] { width: 100%; padding: 8px; border: 1px solid #ccc; border-radius: 5px; font-size: 14px; }
        .action-buttons { display: flex; justify-content: space-between; margin-top: 10px; }
        .action-buttons button { padding: 8px 12px; font-size: 14px; cursor: pointer; border-radius: 5px; border: none; transition: background-color 0.3s; }
        .delete-btn { background-color: #ff4d4d; color: white; }
        .delete-btn:hover { background-color: #e60000; }
        .regenerate-btn { background-color: #ffcc00; color: white; }
        .regenerate-btn:hover { background-color: #cc9900; }
    </style>
</head>
<body>
    <div class="container">
        <div class="sidebar">
            <h2>AI Chat Settings</h2>
            <form id="chatForm" enctype="multipart/form-data">
                <div class="form-group">
                    <label for="temperature">Temperature (0.0 to 1.0):</label>
                    <input type="number" step="0.1" id="temperature" name="temperature" value="0.8">
                </div>
                <div class="form-group">
                    <label for="max_length">Max Length:</label>
                    <input type="number" id="max_length" name="max_length" value="4096">
                </div>
                <div class="form-group">
                    <label for="top_p">Top P:</label>
                    <input type="number" step="0.1" id="top_p" name="top_p" value="0.9">
                </div>
                <div class="form-group">
                    <label for="top_k">Top K:</label>
                    <input type="number" id="top_k" name="top_k" value="50">
                </div>
                <div class="form-group">
                    <label for="penalty">Penalty:</label>
                    <input type="number" step="0.1" id="penalty" name="penalty" value="1.0">
                </div>
                <div class="form-group">
                    <label for="file">Upload File (optional):</label>
                    <input type="file" id="file" name="file">
                </div>
            </form>
            <div class="action-buttons">
                <button class="delete-btn" id="deleteChat">Delete Chat</button>
                <button class="regenerate-btn" id="regenerateResponse">Regenerate</button>
            </div>
        </div>
        <div class="chat-container">
            <h1 class="chat-title">Smarty Pants Bot</h1>
            <div class="chat-box" id="chatBox"></div>
            <div class="input-area">
                <input type="text" id="message" name="message" placeholder="Type a message...">
                <button type="submit" form="chatForm">Send</button>
            </div>
        </div>
    </div>

    <script>
        let lastUserMessage = '';  // Variable to store the last user message

        document.getElementById('chatForm').addEventListener('submit', async function(event) {
            event.preventDefault();
            const formData = new FormData(this);
            const message = document.getElementById('message').value.trim();

            if (!message) return; // Ignore if no message entered

            // Store the message in lastUserMessage
            lastUserMessage = message;

            // Display user's message
            const chatBox = document.getElementById('chatBox');
            const userMessage = document.createElement('div');
            userMessage.classList.add('message', 'user');
            userMessage.innerHTML = `<div class="bubble user">${message}</div>`;
            chatBox.appendChild(userMessage);

            formData.append("message", message);

            // Add loading animation
            const loadingMessage = document.createElement('div');
            loadingMessage.classList.add('message', 'loading');
            loadingMessage.innerHTML = `<div class="bubble ai">Bot is typing...</div>`;
            chatBox.appendChild(loadingMessage);

            // Send message to server
            const response = await fetch('/chat/', {
                method: 'POST',
                body: formData
            });

            // Remove loading animation and display AI response
            chatBox.removeChild(loadingMessage);
            const data = await response.json();
            const aiMessage = document.createElement('div');
            aiMessage.classList.add('message', 'ai');
            aiMessage.innerHTML = `<div class="bubble ai">${data.response}</div>`;
            chatBox.appendChild(aiMessage);

            document.getElementById('message').value = ''; // Clear input
            chatBox.scrollTop = chatBox.scrollHeight; // Auto-scroll to latest message
        });

        document.getElementById('deleteChat').addEventListener('click', function() {
            document.getElementById('chatBox').innerHTML = ''; // Clear chat
            lastUserMessage = '';  // Reset last user message
        });

        document.getElementById('regenerateResponse').addEventListener('click', async function() {
            if (!lastUserMessage) return; // Ignore if there's no last user message

            const formData = new FormData(document.getElementById('chatForm'));
            formData.append("message", lastUserMessage);

            // Add loading animation
            const chatBox = document.getElementById('chatBox');
            const loadingMessage = document.createElement('div');
            loadingMessage.classList.add('message', 'loading');
            loadingMessage.innerHTML = `<div class="bubble ai">Regenerating response...</div>`;
            chatBox.appendChild(loadingMessage);

            // Send request to regenerate response
            const response = await fetch('/chat/', {
                method: 'POST',
                body: formData
            });

            // Remove loading animation and update AI response
            chatBox.removeChild(loadingMessage);
            const data = await response.json();
            const aiMessage = document.createElement('div');
            aiMessage.classList.add('message', 'ai');
            aiMessage.innerHTML = `<div class="bubble ai">${data.response}</div>`;
            chatBox.appendChild(aiMessage);

            chatBox.scrollTop = chatBox.scrollHeight; // Auto-scroll to latest message
        });
    </script>
</body>
</html>

    '''

@app.route("/chat/", methods=["POST"])
def chat():
    message = request.form.get("message")
    file = request.files.get("file")
    temperature = float(request.form.get("temperature", 0.8))
    max_length = int(request.form.get("max_length", 4096))
    top_p = float(request.form.get("top_p", 0.9))
    top_k = int(request.form.get("top_k", 50))
    penalty = float(request.form.get("penalty", 1.0))
    history = request.form.getlist("history")

    if file:
        choice, content = mode_load(file)
        if choice == "doc":
            message = content + "\n" + message

    response = generate_response(message, history, temperature, max_length, top_p, top_k, penalty)
    return jsonify({"response": response})

# Expose the app via ngrok
public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")
app.run(port=5000)