In [4]:
import os
import json
import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

# === Paths ===
emotion_dir = "emotions"
ibadah_dir = "ibadah"

docs = []
metadata = []

# === Handle Emotions ===
for filename in os.listdir(emotion_dir):
    if filename.endswith(".json"):
        with open(os.path.join(emotion_dir, filename), "r", encoding="utf-8") as f:
            data = json.load(f)
            emotion = data.get("emotion", "unknown")

            for item in data.get("ayahs", {}).get("primary", []):
                docs.append(f"Ayah ({item['reference']}): {item['text_en']}")
                metadata.append({ "type": "ayah", "category": "emotion", "tag": emotion })

            for item in data.get("hadiths", {}).get("primary", []):
                docs.append(f"Hadith: {item['text_en']}")
                metadata.append({ "type": "hadith", "category": "emotion", "tag": emotion })

            for item in data.get("duas", []):
                docs.append(f"Dua: {item['text_en']}")
                metadata.append({ "type": "dua", "category": "emotion", "tag": emotion })

# === Handle Ibadah ===
for filename in os.listdir(ibadah_dir):
    if filename.endswith(".json"):
        with open(os.path.join(ibadah_dir, filename), "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
            except json.JSONDecodeError:
                print(f"⚠️ Skipping invalid or empty file: {filename}")
                continue

            topic = data.get("topic", "unknown")

            for key, value in data.items():
                if isinstance(value, list):
                    for item in value:
                        if isinstance(item, dict):
                            text = item.get("text") or item.get("text_en")
                            if text:
                                docs.append(f"{key.capitalize()}: {text}")
                                metadata.append({ "type": key, "category": "ibadah", "topic": topic })
                        elif isinstance(item, str):
                            docs.append(f"{key.capitalize()}: {item}")
                            metadata.append({ "type": key, "category": "ibadah", "topic": topic })

                elif isinstance(value, dict):
                    for subkey, subvalue in value.items():
                        if isinstance(subvalue, list):
                            for subitem in subvalue:
                                if isinstance(subitem, dict):
                                    text = subitem.get("text") or subitem.get("text_en")
                                    if text:
                                        docs.append(f"{key.capitalize()} - {subkey}: {text}")
                                        metadata.append({ "type": key, "category": "ibadah", "topic": topic })
                                elif isinstance(subitem, str):
                                    docs.append(f"{key.capitalize()} - {subkey}: {subitem}")
                                    metadata.append({ "type": key, "category": "ibadah", "topic": topic })


# === Encode and Index ===
print(f"Encoding {len(docs)} total docs...")
embeddings = model.encode(docs, show_progress_bar=True)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))

# === Save Index + Metadata ===
with open("fiass_indexer.pkl", "wb") as f:
    pickle.dump({
        "index": index,
        "metadata": metadata,
        "docs": docs
    }, f)

print("Indexing completed.")


Encoding 474 total docs...


Batches:   0%|          | 0/15 [00:00<?, ?it/s]

Indexing completed.


In [5]:
from transformers import pipeline

# Initialize emotion detection classifier
classifier = pipeline("text-classification", model="nateraw/bert-base-uncased-emotion")

def detect_emotion(text):
    """
    Detect the primary emotion in a text.
    Returns a tuple: (label, score)
    """
    try:
        result = classifier(text)[0]
        label = result['label']  # e.g., 'sadness'
        score = round(result['score'] * 100, 2)  # e.g., 97.2
        return label, score
    except Exception as e:
        return "unknown", 0.0, f"Emotion detection error: {str(e)}"

Device set to use cpu


In [10]:
import pickle
import numpy as np
import re
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

# --- Load FAISS Index ---
with open("fiass_indexer.pkl", "rb") as f:
    data = pickle.load(f)

index = data["index"]
metadata = data["metadata"]
docs = data["docs"]

# --- Embedding Model ---
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# --- LLM Setup (DeepSeek via OpenRouter) ---
llm = ChatOpenAI(
    model="deepseek/deepseek-r1-0528:free",
    openai_api_key="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",
    openai_api_base="https://openrouter.ai/api/v1"
)

# --- Format Retrieved Context ---
def format_context(docs, metadata, indices):
    context_blocks = []
    for idx in indices[0]:
        meta = metadata[idx]
        text = docs[idx]
        block_type = meta.get("type", "").lower()

        if block_type == "ayah":
            block = f"📖 Ayah ({meta.get('reference', 'unknown')}):\n{text}"
        elif block_type == "hadith":
            block = f"🗣 Hadith ({meta.get('source', 'unknown')}):\n{text}"
        elif block_type == "dua":
            block = f"🤲 Dua:\n{text}"
        else:
            block = f"{text}"

        context_blocks.append(block)
    return "\n\n".join(context_blocks)

# --- Classify Query Intent ---
def classify_query(user_input: str) -> str:
    query = user_input.lower()
    if any(w in query for w in ["sad", "depressed", "alone", "hopeless", "anxious", "scared", "heart", "crying"]):
        return "emotional"
    elif any(w in query for w in ["how to pray", "how to fast", "explain wudu", "zakat", "hajj", "umrah", "ibadah", "tahajjud"]):
        return "ibadah"
    elif any(w in query for w in ["fiqh", "is it haram", "halal", "fatwa", "allowed in islam", "permissible"]):
        return "fiqh"
    elif any(w in query for w in ["tafsir", "ayah", "verse", "surah", "explain this ayah", "what does this mean in quran"]):
        return "tafsir"
    elif any(w in query for w in ["prophet", "story of", "life of", "nabi", "messenger", "who was"]):
        return "story"
    else:
        return "general"

# --- Main Deen Buddy Function ---
def deen_buddy(user_input: str, top_k=6):
    try:
        query_type = classify_query(user_input)
        emotion, confidence = "sadness", 0.95  # Replace with actual model if available

        # Embed and search FAISS index
        query_embedding = embedding_model.encode([user_input])
        distances, indices = index.search(np.array(query_embedding), top_k)
        formatted_context = format_context(docs, metadata, indices)

        # === Prompts ===
        if query_type == "emotional":
            prompt = f"""
You are Deen Buddy, a compassionate and wise Islamic friend.

The user is feeling emotionally low (detected: {emotion.upper()}, confidence {confidence:.2f}).

Here is some Islamic guidance for your reference:
{formatted_context}

Now, speak like a close friend — warm, heartfelt, and understanding. Comfort them using beautiful reminders from Qur'an and Hadith. Avoid bullet points or headings. Just speak with love and wisdom.
"""

        elif query_type == "ibadah":
            prompt = f"""
The user wants to learn about an Ibadah topic (e.g., prayer, fasting, tahajjud).

Use the following authentic Islamic material:
{formatted_context}

Respond like a friendly teacher helping someone new to the faith. Be warm, simple, and accurate. Include ayahs and hadiths as needed, but do not use bullet points. Just flow like you're having a natural conversation.
"""

        elif query_type == "fiqh":
            return "This seems like a fiqh-related question. It's best to consult a qualified Mufti or scholar, as fiqh can depend on specific madhabs and contexts. May Allah guide you!"

        elif query_type == "tafsir":
            prompt = f"""
The user asked for explanation of a Qur'anic ayah or surah.

Use the context below if it helps:
{formatted_context}

Explain the ayah clearly and spiritually, based on authentic tafsir. Include the Arabic and a good English translation. No bullets. Explain gently, with wisdom.
"""

        elif query_type == "story":
            prompt = f"""
The user wants to hear a story from the life of the Prophets or companions.

If the context below helps, you may use it:
{formatted_context}

Narrate the story like a loving friend — make it feel real, warm, and spiritually uplifting. Don't list facts. Just flow with emotion and wisdom, using authentic details.
"""

        else:  # General
            prompt = f"""
User asked: "{user_input}"

You are Deen Buddy — a kind and knowledgeable Islamic companion. Use wisdom and gentle speech.

If you can answer from Qur'an or Hadith, do so with references. If not, admit respectfully.

Here is some reference context:
{formatted_context}

Respond naturally, without listing — just a warm, conversational reply.
"""

        # Call LLM
        response = llm.invoke([HumanMessage(content=prompt)])
        return response.content.strip()

    except Exception as e:
        return f"⚠️ Error: {str(e)}"


In [13]:
query = input("Ask something: ")
response = deen_buddy(query)
print("\n💬 Response from Deen Buddy:\n")
print(response)

Ask something:  can you please make me understand the tafsir of verse 3 of surah baqarah?



💬 Response from Deen Buddy:

**Explanation of Sūrah Al-Baqarah (2:2):**

**Arabic:**  
ذَٰلِكَ الْكِتَابُ لَا رَيْبَ ۛ فِيهِ ۛ هُدًى لِّلْمُتَّقِينَ  

**Translation:**  
"This is the Book about which there is no doubt—a guidance for those conscious of Allah."  

---

This profound verse opens the Qur’ān’s second chapter by affirming the divine authenticity of the Book and its purpose as an unwavering source of light. Let us reflect on its layers of meaning:  

1. **"This is the Book about which there is no doubt"**  
   The phrase *"لَا رَيْبَ"* (no doubt) eliminates any ambiguity about the Qur’ān’s origin, truth, or perfection. It is a definitive statement from Allah, transcending human speculation. Scholars like Ibn Kathir emphasize that the Qur’ān’s clarity, consistency, and miraculous nature dispel uncertainty. It is a covenant from the Most Merciful, inviting hearts to submit with certainty.  

2. **"A guidance for the muttaqīn"**  
   The *muttaqīn* (those conscious of Allah) a