In [None]:
from bs4 import BeautifulSoup
import pandas as pd
from urllib.request import Request, urlopen
import seaborn as sns
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

In [None]:
URL = "https://sunnah.com/"

# Send a GET request to the website
response = requests.get(URL)

# Create a BeautifulSoup object from the response content
soup = BeautifulSoup(response.content, 'html.parser')

# Find all the <a> elements that contain hadith links
table = soup.find('div', class_="collections")
hadith_links = table.find_all('a')


In [None]:
# Iterate over the hadith links and print their text
df = pd.DataFrame()
titles=[]
for link in hadith_links:
    hadith_link = link['href']
    hadith_book = link.find('div',class_="english_collection_title")
    response = requests.get(str(URL+hadith_link))
    # Create a BeautifulSoup object from the response content
    soup = BeautifulSoup(response.content, 'html.parser')
    temp = soup.find('div',class_="book_titles titles")
    if temp:
        hadith_books_links = temp.find_all('a')
        book_range = temp.find_all('div',class_="book_range")
        if hadith_books_links:
            for book, range_value in zip(hadith_books_links, book_range):
                book_name = book.find('div', class_="english english_book_name").text.strip()
                if book_range:
                  book_range_value = range_value.text.strip()
                else:
                  book_range_value = 'NaN'
                hadith_text_link = book['href']
                response = requests.get(str(URL+hadith_text_link))
                soup = BeautifulSoup(response.content, 'html.parser')
                temp2 = soup.find_all('div',class_="hadithTextContainers")
                if temp2:
                  for narration in temp2:
                    translation = narration.find('div',class_="text_details")
                    df = df.append({'Hadith Collection': hadith_book.text.strip(),
                                'Book Name': book_name,
                                'Translation': translation.text.strip()}, ignore_index=True)

In [None]:
df_copy = df
df

Unnamed: 0,Hadith Collection,Book Name,Translation
0,Sahih al-Bukhari,Revelation,"I heard Allah's Messenger (ﷺ) saying, ""The rew..."
1,Sahih al-Bukhari,Revelation,(the mother of the faithful believers) Al-Hari...
2,Sahih al-Bukhari,Revelation,The commencement of the Divine Inspiration to ...
3,Sahih al-Bukhari,Revelation,"""While I was walking, all of a sudden I heard ..."
4,Sahih al-Bukhari,Revelation,Ibn 'Abbas in the explanation of the statement...
...,...,...,...
43858,Bulugh al-Maram,Fasting,The one who is performing I'tikaf does not hav...
43859,Bulugh al-Maram,Fasting,"Ibn ’Umar (RAA) narrated, ‘Some of the compani..."
43860,Bulugh al-Maram,Fasting,"Mu'awiyah bin Abi Sufian (RAA) narrated, ’The ..."
43861,Bulugh al-Maram,Fasting,"O Allah, You are the Pardoner and You love to ..."


In [None]:
# Sample categorized pillars
pillar_keywords = {
    'Shahada': ['faith', 'believe', 'monotheism'],
    'Salat': ['prayer', 'salat'],
    'Zakat': ['charity', 'zakat', 'give'],
    'Sawm': ['fasting', 'sawm', 'Ramadan'],
    'Hajj': ['pilgrimage', 'hajj', 'Mecca']
}

# Iterate through the DataFrame and categorize Hadiths
for index, row in df.iterrows():
    translation = row['Translation'].lower()  # Convert to lowercase for easier keyword matching
    for pillar, keywords in pillar_keywords.items():
        if any(keyword in translation for keyword in keywords):
            df.at[index, 'Category'] = pillar

In [None]:
df['Category'].unique()

array(['Hajj', 'Shahada', 'Zakat', 'Salat', 'Sawm'], dtype=object)

In [None]:
df

Unnamed: 0,Hadith Collection,Book Name,Translation,Category
0,Sahih al-Bukhari,Revelation,"I heard Allah's Messenger (ﷺ) saying, ""The rew...",Hajj
1,Sahih al-Bukhari,Revelation,(the mother of the faithful believers) Al-Hari...,Shahada
2,Sahih al-Bukhari,Revelation,The commencement of the Divine Inspiration to ...,Hajj
3,Sahih al-Bukhari,Revelation,"""While I was walking, all of a sudden I heard ...",Hajj
4,Sahih al-Bukhari,Revelation,Ibn 'Abbas in the explanation of the statement...,Zakat
...,...,...,...,...
43858,Bulugh al-Maram,Fasting,The one who is performing I'tikaf does not hav...,Hajj
43859,Bulugh al-Maram,Fasting,"Ibn ’Umar (RAA) narrated, ‘Some of the compani...",Hajj
43860,Bulugh al-Maram,Fasting,"Mu'awiyah bin Abi Sufian (RAA) narrated, ’The ...",Hajj
43861,Bulugh al-Maram,Fasting,"O Allah, You are the Pardoner and You love to ...",Hajj


In [None]:
import sqlite3

# Create a new SQLite database or connect to an existing one
conn = sqlite3.connect('Hadith_database.db')
cursor = conn.cursor()

In [None]:
# Define the table schema for Hadiths
cursor.execute('''
    CREATE TABLE IF NOT EXISTS hadiths (
        id INTEGER PRIMARY KEY,
        collection TEXT,
        book_name TEXT,
        translation TEXT,
        category TEXT
    )
''')
conn.commit()

In [None]:
# Iterate over the DataFrame and insert data into the database
for index, row in df.iterrows():
    values = (row['Hadith Collection'], row['Book Name'], row['Translation'], row['Category'])
    cursor.execute('INSERT INTO hadiths (collection, book_name, translation, category) VALUES (?, ?, ?, ?)', values)
    conn.commit()

In [None]:
conn.close()