In [1]:
!pip install streamlit pyngrok sentence-transformers faiss-cpu transformers langdetect

Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m111.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.0-py3-none-any.whl (25 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   

In [4]:
!pip install deep-translator langchain-community

Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.30-py3-none-any.whl.metadata (3.0 kB)
Collecting requests<3.0.0,>=2.23.0 (from deep-translator)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Downloading 

------------**********------------------

# FLAN-T5-BASE

In [10]:
%%writefile streamlitapp3.py
import streamlit as st
import re
import base64
import time
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langdetect import detect
from deep_translator import GoogleTranslator
import torch

# -------------------------
# Helpers
# -------------------------
def clean_text(text):
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'\[[^\]]*\]', '', text)
    text = re.sub(r'\(.*?\)', '', text)
    text = re.sub(r'[^\w\s.,?!;:\'\"-]', '', text, flags=re.UNICODE)
    return text.strip()

def detect_language(text, user_selected_lang):
    if user_selected_lang == "auto":
        try:
            return detect(text)
        except Exception:
            return "en"
    return user_selected_lang

def translate_if_needed(text, src_lang, tgt_lang="en"):
    if not text:
        return text
    if src_lang != tgt_lang:
        source = src_lang if src_lang != "auto" else "auto"
        try:
            return GoogleTranslator(source=source, target=tgt_lang).translate(text)
        except Exception:
            return text
    return text

# -------------------------
# Caches
# -------------------------
@st.cache_resource(show_spinner=False)
def setup_vectorstore():
    loader = TextLoader("guvi_txt1.txt", encoding="utf-8")
    raw_docs = loader.load()
    for doc in raw_docs:
        doc.page_content = clean_text(doc.page_content)
    splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
    docs = splitter.split_documents(raw_docs)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    vector_store = FAISS.from_documents(docs, embeddings)
    return vector_store

@st.cache_resource(show_spinner=False)
def setup_llm():
    model_name = "google/flan-t5-base"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0 if torch.cuda.is_available() else -1,
        max_length=550,
        do_sample=True,
        temperature=0.6
    )
    return HuggingFacePipeline(pipeline=pipe)

vectorstore = setup_vectorstore()
llm = setup_llm()
retriever = vectorstore.as_retriever(search_kwargs={"k": 8})

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "You are a strict Q&A assistant. Extract the exact text from the context that answers the question. "
        "Do NOT paraphrase, add, infer, or assume anything. "
        "If the answer is not in the context, respond with 'I don’t know'.\n\n"
        "Context:\n{context}\n\nQuestion: {question}\nAnswer:"
    )
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt_template},
    return_source_documents=True,
)

# -------------------------
# Chat UI Styling
# -------------------------
st.markdown("""
    <style>
    .stApp {
        background: linear-gradient(to right, #f0faff, #ffffff);
        font-family: 'Segoe UI', sans-serif;
    }
    .chat-bubble {
        padding: 12px 18px;
        margin: 8px 0;
        border-radius: 18px;
        max-width: 75%;
        font-size: 16px;
        line-height: 1.5;
        word-wrap: break-word;
        display: inline-block;
    }
    .user-bubble {
        background: #e1f5fe;
        color: #000;
        border-bottom-right-radius: 4px;
        float: right;
        clear: both;
    }
    .bot-bubble {
        background: #f1f1f1;
        color: #000;
        border-bottom-left-radius: 4px;
        float: left;
        clear: both;
    }
    .chat-row {
        display: flex;
        align-items: flex-end;
        margin-bottom: 10px;
        width: 100%;
        clear: both;
    }
    .chat-avatar {
        width: 40px;
        height: 40px;
        border-radius: 50%;
        margin: 0 10px;
    }
    .thinking-dots {
        display: inline-block;
        width: 6px;
        height: 6px;
        margin: 0 2px;
        background-color: #555;
        border-radius: 50%;
        animation: blink 1.4s infinite both;
    }
    .thinking-dots:nth-child(2) {
        animation-delay: 0.2s;
    }
    .thinking-dots:nth-child(3) {
        animation-delay: 0.4s;
    }
    @keyframes blink {
        0%, 80%, 100% { opacity: 0; }
        40% { opacity: 1; }
    }
    /* Chat input bar */
    .chat-input-container {
        display: flex;
        align-items: center;
        justify-content: space-between;
        border: 1px solid #ddd;
        border-radius: 25px;
        padding: 5px 10px;
        margin-top: 20px;
        background: #fff;
    }
    .chat-input-container input {
        border: none;
        outline: none;
        flex-grow: 1;
        font-size: 16px;
        padding: 10px;
        border-radius: 20px;
    }
    .chat-input-container button {
        background: #1a73e8;
        color: white;
        border: none;
        padding: 10px 18px;
        border-radius: 20px;
        cursor: pointer;
        font-weight: bold;
    }
    </style>
""", unsafe_allow_html=True)

USER_AVATAR = "https://cdn-icons-png.flaticon.com/512/847/847969.png"
BOT_AVATAR = "https://cdn-icons-png.flaticon.com/512/4712/4712109.png"

def display_chat():
    for speaker, message, lang in st.session_state.history:
        if speaker == "You":
            st.markdown(
                f"""
                <div class="chat-row" style="justify-content: flex-end;">
                    <div class="chat-bubble user-bubble">{message}</div>
                    <img src="{USER_AVATAR}" class="chat-avatar"/>
                </div>
                """,
                unsafe_allow_html=True,
            )
        else:
            st.markdown(
                f"""
                <div class="chat-row">
                    <img src="{BOT_AVATAR}" class="chat-avatar"/>
                    <div class="chat-bubble bot-bubble">{message}</div>
                </div>
                """,
                unsafe_allow_html=True,
            )

def bot_thinking():
    thinking_html = f"""
        <div class="chat-row">
            <img src="{BOT_AVATAR}" class="chat-avatar"/>
            <div class="chat-bubble bot-bubble">
                <span class="thinking-dots"></span>
                <span class="thinking-dots"></span>
                <span class="thinking-dots"></span>
            </div>
        </div>
    """
    placeholder = st.empty()
    placeholder.markdown(thinking_html, unsafe_allow_html=True)
    return placeholder

# -------------------------
# Streamlit UI
# -------------------------
with open("/content/guv.png", "rb") as f:
    image_bytes = f.read()
    encoded = base64.b64encode(image_bytes).decode()

st.markdown(f"""
     <div style="display: flex; align-items: center; margin-bottom: 10px;">
        <img src="data:image/png;base64,{encoded}" style="width:120px; height:120px; margin-right:15px;"/>
        <h2 style="margin: 0; font-family: 'Segoe UI', sans-serif;">GUVI Multilingual Chatbot</h2>
    </div>
""", unsafe_allow_html=True)


LANG_CHOICES = {
    "Auto Detect": "auto",
    "English": "en",
    "Tamil": "ta",
    "Hindi": "hi",
    "French": "fr",
    "Spanish": "es",
    "German": "de",
    "Chinese": "zh"
}
selected_lang_name = st.selectbox("Select language:", list(LANG_CHOICES.keys()), index=0)
selected_lang_code = LANG_CHOICES[selected_lang_name]

if "history" not in st.session_state:
    st.session_state.history = []

# -------------------------
# Query Processing
# -------------------------
def process_query(q):
    if not q.strip():
        return

    user_lang_detected = detect_language(q, selected_lang_code)
    query_in_english = translate_if_needed(q, user_lang_detected, "en")

    # Show bot thinking animation
    thinking_placeholder = bot_thinking()

    # Get bot answer
    result = qa_chain({"query": query_in_english})
    answer_en = result.get("result", "I don’t know")
    answer_final = translate_if_needed(answer_en, "en", user_lang_detected)

    # Clear thinking dots
    thinking_placeholder.empty()

    # Append Q&A to history
    if not st.session_state.history or st.session_state.history[-1][1] != q:
      st.session_state.history.append(("You", q, user_lang_detected))
      st.session_state.history.append(("Bot", answer_final, user_lang_detected))

def clear_all():
    st.session_state.history = []

# -------------------------
# Render Chat + Input Bar
# -------------------------


# Chat input bar (custom form)
with st.form(key="chat_form", clear_on_submit=True):
    cols = st.columns([8,1])
    user_input = cols[0].text_input("Type your message...", label_visibility="collapsed")
    send_btn = cols[1].form_submit_button("➤")
    if send_btn and user_input:
        process_query(user_input)

display_chat()

st.button("Clear Chat", on_click=clear_all)

# Show detected language
if selected_lang_name == "Auto Detect" and st.session_state.history:
    last_user_lang = [lang for speaker, _, lang in st.session_state.history if speaker=="You"][-1]
    st.info(f"🌐 Detected language: {last_user_lang.upper()}")


Overwriting streamlitapp3.py


In [11]:
from pyngrok import ngrok
# Kill any existing tunnels
ngrok.kill()



# Set your Ngrok authtoken (only needed once per session)
ngrok.set_auth_token("31AnkqDpmepL3vshXWcvaULCpEc_3R66vAgymti8ggGU2mp83")

# Start Streamlit
import os
os.system('streamlit run streamlitapp3.py --server.port 8501 &')

# Open a tunnel on port 8501
public_url = ngrok.connect(8501)
print("👉 Your app is live here:", public_url)

👉 Your app is live here: NgrokTunnel: "https://9150e85e5338.ngrok-free.app" -> "http://localhost:8501"
