In [None]:
%%writefile app.py 
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import json
import streamlit as st

# Logging
import logging
logging.getLogger("pdfminer").setLevel(logging.ERROR)

# Load sentence transformer model
model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")

# Load FAISS index and data
paragraph_index = faiss.read_index("paragraph.index")
paragraph_texts = json.load(open("all_paragraphs.json"))
paragraph_sources = json.load(open("paragraph_sources.json"))

# Search function using FAISS for efficient retrieval of relevant paragraphs
def search_faiss(query, k=5):
    query_vec = model.encode([query]).astype("float32")
    distances, indices = paragraph_index.search(query_vec, k)
    results = [(paragraph_texts[i], paragraph_sources[i], distances[0][rank]) for rank, i in enumerate(indices[0])]
    return results

# Load BitNet model for generation
model_id = "microsoft/bitnet-b1.58-2B-4T"

def ask_bitnet(query):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)

    # Search for relevant context using FAISS
    results = search_faiss(query)
    top_paragraphs = "\n\n".join([text for text, _, _ in results[:5]])

    # Format the message to send to BitNet
    messages = [
        {"role": "system", "content": f"""You are a helpful AI assistant. Use only the provided context to answer the question clearly and concisely.
        <<<
        context: {top_paragraphs}
        >>>

        <<<
        question: {query}
        >>>"""}
    ]

    # Tokenize the query with the context
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    chat_input = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate response using BitNet
    chat_outputs = model.generate(**chat_input, max_new_tokens=50)
    response = tokenizer.decode(chat_outputs[0][chat_input['input_ids'].shape[-1]:], skip_special_tokens=True)  # Decode only the response part
    return response

# Streamlit UI: Chat Interface
st.title("AI Assistant for Technical Questions")

# Display instructions
st.write("Ask your technical question below, and the system will provide an answer based on relevant documents.")

# User input: Question from the user
user_input = st.text_input("Your question:", "")

# Button to trigger response
if st.button("Ask"):
    if user_input.strip() != "":
        with st.spinner("Fetching answer..."):
            answer = ask_bitnet(user_input)
            st.subheader("Answer:")
            st.write(answer)
    else:
        st.error("Please enter a question to get an answer.")


In [None]:
!streamlit run app.py

In [None]:
!npm install localtunnel

In [None]:
!streamlit run app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com