# Google "LK Hadith Corpus Github"

In [1]:
!git clone https://github.com/ShathaTm/LK-Hadith-Corpus.git

'git' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
import pandas as pd
import glob

In [3]:
colnames = ['Chapter_Number', 'Chapter_English', 'Chapter_Arabic', 'Section_Number',
       'Section_English', 'Section_Arabic', 'Hadith_number', 'English_Hadith',
       'English_Isnad', 'English_Matn', 'Arabic_Hadith', 'Arabic_Isnad',
       'Arabic_Matn', 'Arabic_Comment', 'English_Grade', 'Arabic_Grade']

In [4]:
# import glob
path = '/content/LK-Hadith-Corpus'
files = sorted(glob.glob(path + '//**//*.csv', recursive=True))

In [5]:
import re

def clean_text(text):
  text = text.lower()
  text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # removes puctuations
  text = re.sub(r'\s+', ' ', text)          # removes extra spaces
  return text

In [6]:
all_hadith = []
for file in files:
  df = pd.read_csv(file, names=colnames, skiprows=1)
  # print(df.columns)
  # break
  if 'English_Hadith' in df.columns:
    df['Clean_Hadith'] = df['English_Hadith'].astype(str).apply(clean_text)
    all_hadith.extend(df[['Chapter_Number', 'Chapter_English', 'Section_Number',
       'Section_English', 'Hadith_number', 'English_Hadith', 'Clean_Hadith', 'English_Grade']].values.tolist())

In [7]:
hadith_df = pd.DataFrame(all_hadith, columns=['Chapter_Number', 'Chapter_English', 'Section_Number',
       'Section_English', 'Hadith_number', 'English_Hadith', 'Clean_Hadith', 'English_Grade'])

In [8]:
hadith_df.to_csv('cleaned_hadith_data.csv', index=False)

# Hugging Face - Sentence Transformer

In [2]:
!pip install sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Using cached transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Using cached sentence_transformers-4.1.0-py3-none-any.whl (345 kB)
Using cached transformers-4.51.3-py3-none-any.whl (10.4 MB)
Installing collected packages: transformers, sentence-transformers

   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [transformers]
   ---------------------------------------- 0/2 [trans

ERROR: Could not install packages due to an OSError: [Errno 28] No space left on device



In [3]:
from sentence_transformers import SentenceTransformer

ModuleNotFoundError: No module named 'sentence_transformers'

In [11]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
embeddings = model.encode(hadith_df['Clean_Hadith'].values)

In [13]:
import numpy as np
embeddings = np.array(embeddings)

In [14]:
np.save('hadith_embeddings.npy', embeddings)

In [15]:
embeddings = np.load('hadith_embeddings.npy')

# FAISS

In [16]:
# !pip install faiss-gpu
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [17]:
import faiss

dimensions = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimensions)  # L2 Distance (Euclidean Distance)

In [18]:
faiss_index.add(embeddings)

In [19]:
faiss.write_index(faiss_index, 'faiss_index.faiss')

In [20]:
def get_similar_hadith(query, model, faiss_index, count=5):
  query_embedding = model.encode([query])
  distance, indices = faiss_index.search(query_embedding, count)

  for i in range(count):
    print(f"Hadith {i+1}")
    print(f"Distance: {distance[0][i]}")
    print(hadith_df['English_Hadith'].iloc[indices[0][i]])

In [22]:
get_similar_hadith("How many prayers?", model, faiss_index, 5)

Hadith 1
Distance: 0.6548013091087341
It was narrated that Ibn ‘Abbas said: “Your Prophet (ﷺ) was enjoined to do fifty prayers but he returned to your Lord to make (i.e., reduce) them to five prayers.”
Hadith 2
Distance: 0.7232993245124817
Abu Huraira reported Allah's Messenger (ﷺ) as saying: Prayer said in a congregation is equivalent to twenty-five (prayers) as compared with the prayer said by a single person.
Hadith 3
Distance: 0.7286797761917114
Narrated `Abdullah bin `Umar: Allah's Messenger (ﷺ) said, "The prayer in congregation is twenty seven times superior to the prayer offeredby person alone."
Hadith 4
Distance: 0.7461212873458862
It was narrated from Abu Hurairah that the Messenger of Allah (ﷺ) said: "Praying in congregation is twenty-five portions better than one of you praying alone."
Hadith 5
Distance: 0.7466328740119934
It was narrated from Abu Hurairah that: The Messenger of Allah said: "The prayer in congregation is twenty-five times more virtuous than the prayer of any

CHATBOT IMPLEMENTATION:

In [6]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [7]:
from nltk.chat.util import Chat, reflections

In [10]:
pairs = [
    [r"(?i).*hadith.*", ["Hadith refers to the sayings, actions, and approvals of Prophet Muhammad (ﷺ)."]],
    [r"(?i).*intention.*", ["Prophet Muhammad (ﷺ) said: 'Actions are judged by intentions.' (Sahih Bukhari 1)"]],
    [r"(?i).*kindness.*", ["The Prophet (ﷺ) said: 'The best among you are those who are best to others.'"]],
    [r"(?i).*knowledge.*", ["The Messenger of Allah (ﷺ) said: 'Seeking knowledge is an obligation upon every Muslim.' (Ibn Majah)"]],
    [r"(?i).*honesty.*", ["The Prophet (ﷺ) said: 'Truthfulness leads to righteousness, and righteousness leads to Paradise.' (Sahih Bukhari)"]],
    [r"(?i).*truth.*", ["The Prophet (ﷺ) said: 'Truthfulness leads to righteousness, and righteousness leads to Paradise.' (Sahih Bukhari)"]],
    [r"(?i).*parents.*", ["The Prophet (ﷺ) said: 'Paradise lies under the feet of mothers.'"]],
    [r"(?i).*neighbor.*", ["The Prophet (ﷺ) said: 'He is not a believer whose neighbor is not safe from his harm.' (Sahih Bukhari)"]],
    [r"(?i).*prayer.*", ["The Prophet (ﷺ) said: 'The first matter that the slave will be brought to account for on the Day of Judgment is the prayer.'"]],
    [r"(?i).*salah.*", ["The Prophet (ﷺ) said: 'The first matter that the slave will be brought to account for on the Day of Judgment is the prayer.'"]],
    [r"(?i).*mercy.*", ["The Prophet (ﷺ) said: 'He who does not show mercy to others will not be shown mercy.' (Sahih Bukhari)"]],
    [r"(?i).*forgiveness.*", ["The Prophet (ﷺ) said: 'Show mercy to those on earth, and the One in the heavens will show mercy to you.'"]],
    [r"(?i).*forgive.*", ["The Prophet (ﷺ) said: 'Show mercy to those on earth, and the One in the heavens will show mercy to you.'"]],
    [r"(?i).*charity.*", ["The Prophet (ﷺ) said: 'Charity does not decrease wealth.' (Sahih Muslim)"]],
    [r"(?i).*sadaqa.*", ["The Prophet (ﷺ) said: 'Charity does not decrease wealth.' (Sahih Muslim)"]],
    [r"(?i).*fasting.*", ["The Prophet (ﷺ) said: 'Whoever fasts during Ramadan out of sincere faith and hoping to attain Allah's rewards, all his past sins will be forgiven.' (Sahih Bukhari)"]],
    [r"(?i).*ramadan.*", ["The Prophet (ﷺ) said: 'Whoever fasts during Ramadan out of sincere faith and hoping to attain Allah's rewards, all his past sins will be forgiven.' (Sahih Bukhari)"]],
    [r"(?i).*character.*", ["The Prophet (ﷺ) said: 'The best among you are those who have the best manners and character.' (Sahih Bukhari)"]],
    [r"(?i).*manners.*", ["The Prophet (ﷺ) said: 'The best among you are those who have the best manners and character.' (Sahih Bukhari)"]],
    [r"(?i).*smile.*", ["The Prophet (ﷺ) said: 'Your smile for your brother is charity.' (Tirmidhi)"]],
    [r"(?i).*patience.*", ["The Prophet (ﷺ) said: 'Whoever remains patient, Allah will make him patient. Nobody can be given a blessing better and greater than patience.' (Sahih Bukhari)"]],
    [r"(?i).*sabr.*", ["The Prophet (ﷺ) said: 'Whoever remains patient, Allah will make him patient. Nobody can be given a blessing better and greater than patience.' (Sahih Bukhari)"]],
    [r"(?i).*arrogance.*", ["The Prophet (ﷺ) said: 'No one who has an atom's-weight of arrogance in his heart will enter Paradise.' (Sahih Muslim)"]],
    [r"(?i).*pride.*", ["The Prophet (ﷺ) said: 'No one who has an atom's-weight of arrogance in his heart will enter Paradise.' (Sahih Muslim)"]],
    [r"(?i).*help.*", ["The Prophet (ﷺ) said: 'Allah helps the servant as long as he helps his brother.' (Sahih Muslim)"]],
    [r"(?i).*halal.*", ["The Prophet (ﷺ) said: 'That which is lawful is clear and that which is unlawful is clear.' (Sahih Bukhari)"]],
    [r"(?i).*haram.*", ["The Prophet (ﷺ) said: 'That which is lawful is clear and that which is unlawful is clear.' (Sahih Bukhari)"]],
    [r"(?i).*modesty.*", ["The Prophet (ﷺ) said: 'Modesty brings nothing except good.' (Sahih Bukhari)"]],
    [r"(?i).*haya.*", ["The Prophet (ﷺ) said: 'Modesty brings nothing except good.' (Sahih Bukhari)"]],
    [r"(?i).*envy.*", ["The Prophet (ﷺ) said: 'Beware of envy, for it devours good deeds just as fire devours wood.' (Abu Dawood)"]],
    [r"(?i).*jealous.*", ["The Prophet (ﷺ) said: 'Beware of envy, for it devours good deeds just as fire devours wood.' (Abu Dawood)"]],
    [r"(?i).*cleanliness.*", ["The Prophet (ﷺ) said: 'Cleanliness is half of faith.' (Sahih Muslim)"]],
    [r"(?i).*clean.*", ["The Prophet (ﷺ) said: 'Cleanliness is half of faith.' (Sahih Muslim)"]],
    [r"(?i).*gratitude.*", ["The Prophet (ﷺ) said: 'He who does not thank people, does not thank Allah.' (Tirmidhi)"]],
    [r"(?i).*thankful.*", ["The Prophet (ﷺ) said: 'He who does not thank people, does not thank Allah.' (Tirmidhi)"]],
    [r"(?i)thank.*", ["You're welcome! Let me know if you want to hear another Hadith."]],
    [r"(?i)bye|exit|quit|khuda hafiz|allah hafiz", ["Khuda Hafiz! May Allah bless you."]],
    [r"^\s*$", ["Please type something to ask about Hadith."]],
    [r"(.*)", ["Sorry, I don't have a Hadith about that topic. Try another word like 'kindness', 'prayer', or 'charity'."]],
]


In [12]:
chatbot = Chat(pairs, reflections)

print("Welcome to the Hadith QnA Bot! Type 'bye', 'exit', or 'quit' to leave.")

while True:
    try:
        user_input = input("You: ").strip()
        if user_input.lower() in ['bye', 'exit', 'quit']:
            print("Bot: Khuda Hafiz! May Allah bless you.")
            break
        response = chatbot.respond(user_input)
        print("Bot:", response)
    except (KeyboardInterrupt, EOFError):
        print("\nBot: Khuda Hafiz! May Allah bless you.")
        break
    except Exception as e:
        print("Bot: Sorry, something went wrong. Please try again.")

Welcome to the Hadith QnA Bot! Type 'bye', 'exit', or 'quit' to leave.
Bot: Hadith refers to the sayings, actions, and approvals of Prophet Muhammad (ﷺ).
Bot: The Prophet (ﷺ) said: 'Paradise lies under the feet of mothers.'
Bot: Hadith refers to the sayings, actions, and approvals of Prophet Muhammad (ﷺ).
Bot: Sorry, I don't have a Hadith about that topic. Try another word like 'kindness', 'prayer', or 'charity'.
Bot: Khuda Hafiz! May Allah bless you.
