In [None]:
pip install faiss-cpu sentence-transformers


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [3]:
from sentence_transformers import SentenceTransformer  # 📦 For generating sentence embeddings
import faiss                                            # 📦 Facebook AI Similarity Search (for fast nearest neighbor search)
import numpy as np                                      # 📦 Numerical operations
import pandas as pd                                     # 📦 For loading and working with datasets
from transformers import pipeline                       # 📦 (Optional in this script, but useful for future NLP tasks)

# ==========================
# 🔹 Load Hadith Dataset
# ==========================
df = pd.read_csv("//content//sahih_bukhari_updated.csv")  # 📂 Load Hadiths from a CSV file into a pandas DataFrame

# ==========================
# 🔹 Load Embedding Model
# ==========================
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# 🤖 Load a pre-trained sentence transformer model to convert Hadith text into vector embeddings

# ==========================
# 🔹 Generate Embeddings for Hadith Text
# ==========================
hadith_embeddings = model.encode(df["Text"].tolist())
# 🧠 Convert all Hadith texts into embeddings using the model (each Hadith becomes a vector)

# ==========================
# 🔹 Create & Save FAISS Index
# ==========================
index = faiss.IndexFlatL2(hadith_embeddings.shape[1])
# 🧮 Create a FAISS index using L2 (Euclidean) distance with the same dimension as the embeddings

index.add(np.array(hadith_embeddings, dtype=np.float32))
# ➕ Add the embeddings to the FAISS index

faiss.write_index(index, "hadith_index.faiss")
# 💾 Save the index to disk so it can be reused later for fast Hadith search

print("✅ Hadiths stored in FAISS successfully!")
# ✅ Success message for confirmation


✅ Hadiths stored in FAISS successfully!


**deployment**

In [None]:
!pip install faiss-cpu sentence-transformers transformers gradio huggingface_hub


Collecting gradio
  Downloading gradio-5.22.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
import gradio as gr
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd
from transformers import pipeline
from huggingface_hub import login


In [4]:
#!pip install faiss-cpu sentence-transformers transformers gradio huggingface_hub
# 📦 Uncomment above line to install required libraries if not already installed.

import gradio as gr  # 🖼️ For building web UI
from sentence_transformers import SentenceTransformer  # 🔤 For generating embeddings from text
import faiss  # 🔍 Facebook AI Similarity Search - used for fast vector similarity search
import numpy as np  # 🧮 Numerical operations
import pandas as pd  # 🐼 Data handling and manipulation
from transformers import pipeline  # 🤗 For using pre-trained models like LLMs and translators
from huggingface_hub import login  # 🔐 To authenticate with Hugging Face using an API token

# ==========================
# 🔹 Load Hadith Dataset
# ==========================
df = pd.read_csv("/content/sahih_bukhari_updated.csv")  # 📄 Load CSV containing Hadiths into a pandas DataFrame

# ==========================
# 🔹 Load Embedding Model
# ==========================
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# 🤖 Load a sentence-transformer model to convert Hadith text into vector embeddings

# ==========================
# 🔹 Load FAISS Index
# ==========================
index = faiss.read_index("/content/hadith_index.faiss")
# 📚 Load the precomputed FAISS index file which stores Hadith embeddings for fast search

# ==========================
# 🔹 Load LLM Model for Tafsir Generation
# ==========================
hf_token = "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"  # 🔑 Your Hugging Face access token
login(token=hf_token)  # 🔐 Log into Hugging Face to access private or gated models

llm = pipeline("text-generation", model="google/flan-t5-large", max_length=300)
# 🧠 Load a text-generation pipeline with Google FLAN-T5 model to generate Tafsir explanations

# ==========================
# 🔹 Load Translator Model (English → Urdu)
# ==========================
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
# 🌐 Load a translation model to translate Tafsir (from English to Urdu)

# ==========================
# 🔹 Hadith Search Function
# ==========================
def search_hadith(query):
    # 💬 Encode the user query into vector
    query_embedding = model.encode([query])

    # 🔍 Search top 3 most similar Hadiths using FAISS
    D, I = index.search(np.array(query_embedding, dtype=np.float32), k=3)
    retrieved_hadiths = df.iloc[I[0]]  # 📜 Get top 3 Hadiths from the DataFrame based on indices

    hadith_results = []  # 📦 List to store results

    # 🔁 Loop over each Hadith retrieved
    for _, hadith in retrieved_hadiths.iterrows():
        volume = hadith["Volume"]  # 📚 Volume number of Hadith
        book = hadith["Book"]      # 📘 Book name or ID
        number = hadith["Number"]  # 🔢 Hadith number
        text = hadith["Text"]      # 📝 Actual Hadith text

        # 🧠 Generate Tafsir using LLM
        tafsir_prompt = f"Explain the following Hadith in detail with Tafsir:\n{text}"
        tafsir = llm(tafsir_prompt)[0]["generated_text"]

        # 🌐 Translate Tafsir from English to Urdu
        tafsir_urdu = translator(tafsir)[0]["translation_text"]

        # 🔤 Convert Urdu Tafsir to Roman Urdu
        roman_urdu_prompt = f"Convert the following Urdu text to Roman Urdu:\n{tafsir_urdu}"
        tafsir_roman = llm(roman_urdu_prompt)[0]["generated_text"]

        # 📦 Format result and add to list
        hadith_results.append(
            f"📜 **Volume {volume}, Book {book}, Number {number}**\n📖 {text}\n\n"
            f"🕌 Tafsir in Urdu: {tafsir_urdu}\n"
            f"🔤 Tafsir in Roman Urdu: {tafsir_roman}\n"
        )

    # 🔚 Join all Hadiths and return
    return "\n\n".join(hadith_results)


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cuda:0
The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BitNetForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'G

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/816k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/848k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Device set to use cuda:0


In [7]:
import gradio as gr  # 🖼️ Gradio is used for creating interactive UI in Python

# 🔹 Define a Gradio app using Blocks layout with custom CSS for styling
with gr.Blocks(css="""
    body { background: linear-gradient(to bottom, #f3e4c8, #c8a36e); font-family: 'Segoe UI', sans-serif; }

    .gradio-container {
        max-width: 900px;                /* 🧱 Set the width of the app container */
        margin: 0 auto;                  /* 🎯 Center the container */
        background-color: #fffaf0;       /* 🏠 Light background inside box */
        border-radius: 12px;             /* 🎨 Rounded corners */
        box-shadow: 0 0 15px rgba(0,0,0,0.2);  /* 🌫️ Soft shadow effect */
        padding: 20px;                   /* 📏 Inner spacing */
    }

    h1, h2, h3, h4 {
        font-family: 'Georgia', serif;   /* ✒️ Classy font for headings */
        color: #4b2e1e;                  /* 🎨 Dark brown text color */
    }

    input, textarea {
        border: 2px solid #a87b4f;       /* 🟫 Border styling */
        border-radius: 8px;              /* 🎨 Rounded corners */
        padding: 10px;                   /* 🧻 Inner padding */
        background-color: #fff8e7;       /* 🍦 Creamy input background */
        font-size: 16px;
        color: #3e3e3e;                  /* 🖤 Text color */
    }

    button {
        background-color: #4b944b;       /* 🟩 Green button */
        color: white;
        font-size: 16px;
        padding: 10px 15px;
        border-radius: 10px;
        border: none;
    }

    button:hover {
        background-color: #3c7e3c;       /* 🟩 Darker green on hover */
        cursor: pointer;                 /* 🖱️ Pointer cursor on hover */
    }

    .header-box {
        text-align: center;
        background: linear-gradient(to right, #d9a673, #b18152);  /* 🎨 Header gradient */
        padding: 15px;
        border-radius: 10px;
        margin-bottom: 20px;
        color: white;
        box-shadow: 0 0 10px #b18152;     /* ✨ Header shadow */
    }

    .output-box {
        background-color: #fff5e6;       /* ☁️ Light box for results */
        border: 2px solid #e0c49a;       /* 🟫 Light brown border */
        padding: 20px;
        border-radius: 10px;
        color: #4b2e1e;                  /* 🧾 Text color for output */
    }

""") as app:

    # 🔸 Header Section (Title + Description)
    gr.HTML("""
        <div class='header-box'>
            <h1>📖 Hadith Chatbot (Sahih Bukhari)</h1>
            <p style='font-size: 18px;'>🔹 Search Hadith and Get Tafsir in Urdu & Roman Urdu</p>
        </div>
    """)

    # 🔸 Input Textbox
    query_input = gr.Textbox(
        label="Enter Your Query",
        placeholder="E.g., Hadith about honesty..."  # 🔍 Hint for user
    )

    # 🔸 Submit Button
    submit_btn = gr.Button("🔍 Search Hadith")  # 🖱️ Button to trigger search

    # 🔸 Output Section (Markdown format)
    output_text = gr.Markdown("### 📜 Results will appear here", elem_classes="output-box")

    # 🔸 Define interaction: When button is clicked, call `search_hadith` with input and show output
    submit_btn.click(fn=search_hadith, inputs=query_input, outputs=output_text)

# 🚀 Launch the Gradio app with public sharing enabled
app.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5684aa2bee1173e840.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


