<a href="https://colab.research.google.com/github/Ibtisam-a/Integrated-Islamic-Ontology/blob/main/Automatic_Ontology_Integration_of_Islamic_Resources.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **AI for Automated Ontology Integration of Islamic Resources (Qur'an and Hadith)**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Installing required libraries


In [None]:
!pip install pandas rdflib openpyxl

Reading the Datasets

In [None]:
import pandas as pd
#Hadith dataset
# Load the Excel file
file_path = 'yourFileName.xlsx'  # Replace with your file path (Quran)
df = pd.read_excel(file_path)
# Show the first few rows
#print(df.head())


In [None]:
import pandas as pd
#Quran dataset
# Load the Excel file
file_path = 'yourFileName.xlsx'   # Replace with your other file path (Hadith)
df1 = pd.read_excel(file_path)

# **Building the Ontology**

In [None]:

# ----------------------
# Step 1: Imports & Graph Initialization
# ----------------------

from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal

# Create RDF graph
g = Graph()

# Define namespace
ns = Namespace("http://IslamicOntology.org/Resources#")
g.bind("ibs", ns)
g.bind("rdfs", RDFS)

# ----------------------
# Step 2: Define Classes
# ----------------------

classes = {
    "IslamicResources": None, #(None, "Islamic Resources", "ÿßŸÑŸÜÿµŸàÿµ ÿßŸÑÿ•ÿ≥ŸÑÿßŸÖŸäÿ©"),#IslamicTexts    ÿßŸÑŸÜÿµŸàÿµ
    "HadithBook": "IslamicResources", #  ("IslamicResources", "Hadith Book", "ŸÉÿ™ÿ® ÿßŸÑÿ≠ÿØŸäÿ´"),
    "Quran": "IslamicResources", # ("IslamicResources", "Quran", "ÿßŸÑŸÇÿ±ÿ¢ŸÜ"),
    "QuranChapter": "IslamicResources", # ("IslamicResources", "Chapter",  "ÿßŸÑÿ≥Ÿàÿ±ÿ©"),
    "Verse": "IslamicResources", # ("IslamicResources", "Verse",  "ÿßŸÑÿ¢Ÿäÿ©"),
    "QuranicWord": "IslamicResources",
    "QuranicSegment":"IslamicResources",
    "QuranTopic": None, #("IslamicResources", "QTopic",  "ŸÖŸàÿ∂Ÿàÿπ ÿßŸÑŸÇÿ±ÿ¢ŸÜ"),
    "HadithChapter": "IslamicResources", #("IslamicTexts", "Hadith Chapter", "ÿ®ÿßÿ® ÿßŸÑÿ≠ÿØŸäÿ´"),
    "HadithText": "IslamicResources", #("IslamicTexts", "Hadith Text", "ŸÜÿµ ÿßŸÑÿ≠ÿØŸäÿ´"),
    "HadithTopic": None, #("IslamicTexts", "Hadith Topic", "ŸÖŸàÿ∂Ÿàÿπ ÿßŸÑÿ≠ÿØŸäÿ´")
}
# Class labels in English and Arabic
class_labels = {
    "IslamicResources": {"en": "Islamic Resources", "ar": "ÿßŸÑŸÜÿµŸàÿµ ÿßŸÑÿ•ÿ≥ŸÑÿßŸÖŸäÿ©"},
    "Quran": {"en": "Quran", "ar": "ÿßŸÑŸÇÿ±ÿ¢ŸÜ"},
    "HadithBook": {"en":"Hadith Book", "ar": "ŸÉÿ™ÿßÿ® ÿßŸÑÿ≠ÿØŸäÿ´"},
    "QuranChapter": {"en": "Quran Chapter", "ar": "ÿ≥Ÿàÿ±ÿ©"},
    "Verse": {"en": "Quran Verse", "ar": "ÿ¢Ÿäÿ©"},
    "QuranicWord": {"en": "Quran Word", "ar": "ŸÉŸÑŸÖÿ© ŸÇÿ±ÿ¢ŸÜŸäÿ©"},
    "QuraniSegment": {"en": "Quran Segment", "ar": "ŸÖŸÇÿ∑ÿπ ŸÇÿ±ÿ¢ŸÜŸä"},
    "QuranTopic": {"en": "Quran Topic", "ar": "ŸÖŸàÿ∂Ÿàÿπ ŸÇÿ±ÿ¢ŸÜŸä"},
    "HadithChapter": {"en": "Hadith Chapter", "ar": "ÿ®ÿßÿ® ÿßŸÑÿ≠ÿØŸäÿ´"},
    "HadithText": {"en": "Hadith Text", "ar": "ŸÜÿµ ÿßŸÑÿ≠ÿØŸäÿ´"},
    "HadithTopic": {"en":"Hadith Topic", "ar": "ŸÖŸàÿ∂Ÿàÿπ ÿßŸÑÿ≠ÿØŸäÿ´"},
}

# Create classes, subclass relationships, and labels
for cls, parent in classes.items():
    cls_uri = ns[cls]
    g.add((cls_uri, RDF.type, OWL.Class))
    if parent:
        parent_uri = ns[parent]
        g.add((cls_uri, RDFS.subClassOf, parent_uri))

    # Add English and Arabic labels if available
    if cls in class_labels:
        labels = class_labels[cls]
        g.add((cls_uri, RDFS.label, Literal(labels['en'], lang='en')))
        g.add((cls_uri, RDFS.label, Literal(labels['ar'], lang='ar')))

        # -------------------------
# Hadith books INDIVIDUALS (not classes)
# -------------------------
hadith_books_individuals = {
    "SahihAlBukhari": {"en": "Sahih Al-Bukhari", "ar": "ÿµÿ≠Ÿäÿ≠ ÿßŸÑÿ®ÿÆÿßÿ±Ÿä"},
    "SahihMuslim": {"en": "Sahih Muslim", "ar": "ÿµÿ≠Ÿäÿ≠ ŸÖÿ≥ŸÑŸÖ"},
    "SunanAbuDawood": {"en": "Sunan Abu Dawood", "ar": "ÿ≥ŸÜŸÜ ÿ£ÿ®Ÿä ÿØÿßŸàÿØ"},
    "JamiAtTirmidhi": {"en": "Jami' At-Tirmidhi", "ar": "ÿ¨ÿßŸÖÿπ ÿßŸÑÿ™ÿ±ŸÖÿ∞Ÿä"},
    "SunanAnNasai": {"en": "Sunan An-Nasa'i", "ar": "ÿ≥ŸÜŸÜ ÿßŸÑŸÜÿ≥ÿßÿ¶Ÿä"},
    "SunanIbnMajah": {"en": "Sunan Ibn Majah", "ar": "ÿ≥ŸÜŸÜ ÿßÿ®ŸÜ ŸÖÿßÿ¨Ÿá"},
}

for ind, lbl in hadith_books_individuals.items():
    ind_uri = ns[ind]
    g.add((ind_uri, RDF.type, ns["HadithBook"]))
    g.add((ind_uri, RDFS.label, Literal(lbl["en"], lang="en")))
    g.add((ind_uri, RDFS.label, Literal(lbl["ar"], lang="ar")))


In [None]:
# ----------------------
## Step 3:  Define Quran and Hadith object properties
# ----------------------

from rdflib.namespace import XSD

hasChapter = ns.hasChapter
isPartOf= ns.isPartOf
discussHadithTopics = ns.discussHadithTopics

g.add((hasChapter, RDF.type, OWL.ObjectProperty))
g.add((hasChapter, RDFS.domain, ns.HadithBook))
g.add((hasChapter, RDFS.range, ns.HadithChapter))
g.add((hasChapter, RDFS.label, Literal("Has chapter", lang="en")))
g.add((hasChapter, RDFS.label, Literal("Ÿäÿ≠ÿ™ŸàŸä ÿπŸÑŸâ ÿ®ÿßÿ®", lang="ar")))

g.add((isPartOf, RDF.type, OWL.ObjectProperty))
g.add((isPartOf, RDFS.domain, ns["IslamicResources"]))
g.add((isPartOf, RDFS.range,  ns["IslamicResources"]))
g.add((isPartOf, RDFS.label, Literal("is part of", lang="en")))
g.add((isPartOf, RDFS.label, Literal("ÿ¨ÿ≤ÿ° ŸÖŸÜ", lang="ar")))


g.add((discussHadithTopics, RDF.type, OWL.ObjectProperty))
g.add((discussHadithTopics, RDFS.domain, ns.HadithText))
g.add((discussHadithTopics, RDFS.range, ns.HadithTopic))
g.add((discussHadithTopics, RDFS.label, Literal("Discuss Hadith Topics", lang="en")))
g.add((discussHadithTopics, RDFS.label, Literal("ŸäŸÜÿßŸÇÿ¥ ŸÖŸàÿßÿ∂Ÿäÿπ ÿßŸÑÿ≠ÿØŸäÿ´", lang="ar")))

# Define object property isPartOf

discussQuranTopics = ns.discussQuranTopics
hasQWord = ns.hasQWord
hasQSegment = ns.hasQSegment
hasBuckwalterSegment = ns.hasBuckwalterSegment
hasPOS = ns.hasPOS


# hasWord: Verse ‚Üí Word
g.add((ns.hasQWord, RDF.type, OWL.ObjectProperty))
g.add((ns.hasQWord, RDFS.domain, ns.Verse))
g.add((ns.hasQWord, RDFS.range, ns.QuranicWord))
g.add((ns.hasQWord, RDFS.label, Literal("Has Quranic Word", lang="en")))
g.add((ns.hasQWord, RDFS.label, Literal("Ÿäÿ≠ÿ™ŸàŸä ÿπŸÑŸâ ŸÉŸÑŸÖÿ© ŸÇÿ±ÿ¢ŸÜŸäÿ©", lang="ar")))

# hasSegment: Word ‚Üí Segment
g.add((ns.hasQSegment, RDF.type, OWL.ObjectProperty))#"QuranicSegmen
g.add((ns.hasQSegment, RDFS.domain, ns.QuranicWord))
g.add((ns.hasQSegment, RDFS.range, ns.QuranicSegment))
g.add((ns.hasQSegment, RDFS.label, Literal("Has Quranic Segment", lang="en")))
g.add((ns.hasQSegment, RDFS.label, Literal("Ÿäÿ≠ÿ™ŸàŸä ÿπŸÑŸâ ŸÖŸÇÿ∑ÿπ ŸÇÿ±ÿ¢ŸÜŸä", lang="ar")))


# ===========================
# Data Property: English Segment
# ===========================
g.add((ns.hasBuckwalterSegment, RDF.type, OWL.DatatypeProperty))
g.add((ns.hasBuckwalterSegment, RDFS.domain, ns.QuranicSegment))
g.add((ns.hasBuckwalterSegment, RDFS.range, RDFS.Literal))
g.add((ns.hasBuckwalterSegment, RDFS.label, Literal("Buckwalter Segment", lang="en")))
g.add((ns.hasBuckwalterSegment, RDFS.label, Literal("ŸÖŸÇÿ∑ÿπ ÿ®ÿßŸÉŸàÿßŸÑÿ™ÿ±", lang="ar")))

# ===========================
# Data Property: POS (Part of Speech)
# ===========================
g.add((ns.hasPOS, RDF.type, OWL.DatatypeProperty))
g.add((ns.hasPOS, RDFS.domain, ns.QuranicSegment))
g.add((ns.hasPOS, RDFS.range, XSD.string))
g.add((ns.hasPOS, RDFS.label, Literal("Part of Speech", lang="en")))
g.add((ns.hasPOS, RDFS.label, Literal("ÿßŸÑŸàÿ≥ŸÖ ÿßŸÑŸÜÿ≠ŸàŸä", lang="ar")))

g.add((discussQuranTopics, RDF.type, OWL.ObjectProperty))
g.add((discussQuranTopics, RDFS.domain, ns.Verse))#we need to edit it
g.add((discussQuranTopics, RDFS.range, ns.QuranTopic))
g.add((discussQuranTopics, RDFS.label, Literal("Discuss Quran Topics", lang="en")))
g.add((discussQuranTopics, RDFS.label, Literal("ÿ™ŸÜÿßŸÇÿ¥ ŸÖŸàÿßÿ∂Ÿäÿπ ÿßŸÑŸÇÿ±ÿ¢ŸÜ", lang="ar")))


# Symmetric property: relatedTo
relatedTo = ns.relatedTo
g.add((relatedTo, RDF.type, OWL.ObjectProperty))
g.add((relatedTo, RDF.type, OWL.SymmetricProperty))   # üîë make it symmetric
g.add((relatedTo, RDFS.domain, ns.QuranTopic))
g.add((relatedTo, RDFS.range, ns.HadithTopic))
g.add((relatedTo, RDFS.label, Literal("Related To", lang="en")))
g.add((relatedTo, RDFS.label, Literal("ŸÖÿ±ÿ™ÿ®ÿ∑ ÿ®ŸÄ", lang="ar")))

hasIsnad = ns.hasIsnad
hasMatn = ns.hasMatn
hasGrade = ns.hasGrade

# Isnad Property
g.add((hasIsnad, RDF.type, OWL.DatatypeProperty))
g.add((hasIsnad, RDFS.domain, ns.HadithText))
g.add((hasIsnad, RDFS.range, RDFS.Literal))
g.add((hasIsnad, RDFS.label, Literal("English Isnad", lang="en")))
g.add((hasIsnad, RDFS.label, Literal("ÿßŸÑŸÖÿ™ŸÜ ÿ®ÿßŸÑÿπÿ±ÿ®Ÿäÿ©", lang="ar")))

# Matn Property
g.add((hasMatn, RDF.type, OWL.DatatypeProperty))
g.add((hasMatn, RDFS.domain, ns.HadithText))
g.add((hasMatn, RDFS.range, RDFS.Literal))
g.add((hasMatn, RDFS.label, Literal("English Matn", lang="en")))
g.add((hasMatn, RDFS.label, Literal("ÿßŸÑÿ•ÿ≥ŸÜÿßÿØ ÿ®ÿßŸÑÿπÿ±ÿ®Ÿäÿ©", lang="ar")))

# Grade Property
g.add((hasGrade, RDF.type, OWL.DatatypeProperty))
g.add((hasGrade, RDFS.domain, ns.HadithText))
g.add((hasGrade, RDFS.range, RDFS.Literal))
g.add((hasGrade, RDFS.label, Literal("English Grading", lang="en")))
g.add((hasGrade, RDFS.label, Literal("ÿßŸÑÿ™ÿµŸÜŸäŸÅ ÿ®ÿßŸÑÿπÿ±ÿ®Ÿäÿ©", lang="ar")))


hasTafsirByJalalayn = ns.hasTafsirByJalalayn
hasTafsirByMuyasser = ns.hasTafsirByMuyasser
hasQuranicConcept = ns.hasQuranicConcept

# Jalalayn Property
g.add((hasTafsirByJalalayn, RDF.type, OWL.DatatypeProperty))
g.add((hasTafsirByJalalayn, RDFS.domain, ns.Verse))
g.add((hasTafsirByJalalayn, RDFS.range, RDFS.Literal))
g.add((hasTafsirByJalalayn, RDFS.label, Literal("Has Tafsir By Jalalayn", lang="en")))
g.add((hasTafsirByJalalayn, RDFS.label, Literal("ÿ™ŸÅÿ≥Ÿäÿ± ÿßŸÑÿ¨ŸÑÿßŸÑŸäŸÜ", lang="ar")))

# Muyasser Property
g.add((hasTafsirByMuyasser, RDF.type, OWL.DatatypeProperty))
g.add((hasTafsirByMuyasser, RDFS.domain, ns.Verse))
g.add((hasTafsirByMuyasser, RDFS.range, RDFS.Literal))
g.add((hasTafsirByMuyasser, RDFS.label, Literal("Has Tafsir by Muyasser", lang="en")))
g.add((hasTafsirByMuyasser, RDFS.label, Literal("ÿ™ŸÅÿ≥Ÿäÿ± ÿßŸÑŸÖŸäÿ≥ÿ±", lang="ar")))

#Quranic Concepts
g.add((hasQuranicConcept, RDF.type, OWL.DatatypeProperty))
g.add((hasQuranicConcept, RDFS.domain, ns.Verse))
g.add((hasQuranicConcept, RDFS.range, RDFS.Literal))
g.add((hasQuranicConcept, RDFS.label, Literal("Has Concept", lang="en")))
g.add((hasQuranicConcept, RDFS.label, Literal("ÿßŸÑŸÖŸÅŸáŸàŸÖ", lang="ar")))

totalChapters_prop = ns["totalChapters"]
g.add((totalChapters_prop, RDF.type, OWL.DatatypeProperty))
g.add((totalChapters_prop, RDFS.label, Literal("Total Chapters", lang='en')))
g.add((totalChapters_prop, RDFS.label, Literal("ÿ•ÿ¨ŸÖÿßŸÑŸä ÿßŸÑÿ£ÿ®Ÿàÿßÿ®", lang='ar')))

totalHadiths_prop = ns["totalHadiths"]
g.add((totalHadiths_prop, RDF.type, OWL.DatatypeProperty))
g.add((totalHadiths_prop, RDFS.label, Literal("Total Hadiths", lang='en')))
g.add((totalHadiths_prop, RDFS.label, Literal("ÿ•ÿ¨ŸÖÿßŸÑŸä ÿßŸÑÿ£ÿ≠ÿßÿØŸäÿ´", lang='ar')))

#for Quran, verses count
totalVerses_prop = ns["totalVerses"]
g.add((totalVerses_prop, RDF.type, OWL.DatatypeProperty))
g.add((totalVerses_prop, RDFS.label, Literal("Total Verses", lang='en')))
g.add((totalVerses_prop, RDFS.label, Literal("ÿ•ÿ¨ŸÖÿßŸÑŸä ÿßŸÑÿ¢Ÿäÿßÿ™", lang='ar')))

# Non-related property (not used in this work)
#unrelatedTo = ns.unrelatedTo
#g.add((unrelatedTo, RDF.type, OWL.ObjectProperty))
#g.add((unrelatedTo, RDF.type, OWL.SymmetricProperty))   # ‚úÖ make it symmetric
#g.add((unrelatedTo, RDFS.domain, ns.QuranTopic))
#g.add((unrelatedTo, RDFS.range, ns.HadithTopic))
#g.add((unrelatedTo, RDFS.label, Literal("Unrelated To", lang="en")))
#g.add((unrelatedTo, RDFS.label, Literal("ÿ∫Ÿäÿ± ŸÖÿ±ÿ™ÿ®ÿ∑ ÿ®ŸÄ", lang="ar")))

In [None]:
# -------------------------
# Step 4: OWL Restrictions (reviewer-proof)
#    Core: someValuesFrom
# -------------------------
def some_values_restriction(prop, filler):
    r = BNode()
    g.add((r, RDF.type, OWL.Restriction))
    g.add((r, OWL.onProperty, prop))
    g.add((r, OWL.someValuesFrom, filler))
    return r

# --- Quran structure ---
# QuranChapter ‚äë ‚àÉ isPartOf . Quran
g.add((ns["QuranChapter"], RDFS.subClassOf, some_values_restriction(isPartOf, ns["Quran"])))

# Verse ‚äë ‚àÉ isPartOf . QuranChapter
g.add((ns["Verse"], RDFS.subClassOf, some_values_restriction(isPartOf, ns["QuranChapter"])))

# QuranicWord ‚äë ‚àÉ isPartOf . Verse
g.add((ns["QuranicWord"], RDFS.subClassOf, some_values_restriction(isPartOf, ns["Verse"])))

# QuranicSegment ‚äë ‚àÉ isPartOf . Verse  (you can change to QuranChapter if you prefer)
g.add((ns["QuranicSegment"], RDFS.subClassOf, some_values_restriction(isPartOf, ns["QuranicWord"])))



# --- Hadith structure ---
# HadithChapter ‚äë ‚àÉ isPartOf . HadithBook
g.add((ns["HadithChapter"], RDFS.subClassOf, some_values_restriction(isPartOf, ns["HadithBook"])))

# HadithText ‚äë ‚àÉ isPartOf . HadithChapter
g.add((ns["HadithText"], RDFS.subClassOf, some_values_restriction(isPartOf, ns["HadithChapter"])))


# --- Disjointness (quality control) ---
# Prevent mixing Quran structure classes with Hadith structure classes
g.add((ns["QuranChapter"], OWL.disjointWith, ns["HadithChapter"]))
g.add((ns["Verse"], OWL.disjointWith, ns["HadithText"]))



In [None]:
# -------------------------
#Step 5: Function to add Quran Data
# -------------------------
def add_qurand_to_graph(df1, g, ns):
    quran_uri = URIRef(ns + "Quran")
    g.add((quran_uri, RDF.type, ns.Quran))

    for _, row in df1.iterrows():
        chapter_id = URIRef(ns + row['Chapter_Index'])#(ns + f"QChapter_{row['Chapter_ID']}")
        g.add((chapter_id, RDF.type, ns.QuranChapter))
        g.add((chapter_id, RDFS.label, Literal(row['Chapter_English'], lang='en')))
        g.add((chapter_id, RDFS.label, Literal(row['Chapter_Arabic'], lang='ar')))
        #surah_uri = URIRef(ns + row['Chapter_Index'])
        #graph.add((surah_uri, RDF.type, ns.Surah))
       # graph.add((surah_uri, RDFS.label, Literal(row['Chapter_English'], lang='en')))
       # graph.add((surah_uri, RDFS.label, Literal(row['Chapter_Arabic'], lang='ar')))
        g.add((chapter_id, ns.isPartOf, quran_uri))


        verse_id = URIRef(ns+ row['Verse_ID'])
        g.add((verse_id, RDF.type, ns.Verse))
        g.add((verse_id, RDFS.label, Literal(row['Translation'], lang='en')))
        g.add((verse_id, RDFS.label, Literal(row['Verse'], lang='ar')))
        g.add((verse_id, ns.isPartOf, chapter_id))   # ‚úÖ fix here!
        #ayah_uri = URIRef(ns+ row['Verse_ID'])
        #g.add((verse_id, ns.hasQWord, word_uri))
        #graph.add((ayah_uri, RDF.type, ns.Ayah))
        #graph.add((ayah_uri, RDFS.label, Literal(row['Translation'], lang='en')))
        #graph.add((ayah_uri, RDFS.label, Literal(row['Verse'], lang='ar')))
        g.add((verse_id, ns.hasTafsirByJalalayn, Literal(row["desc_ByJalalayn"], lang="ar")))
    # Tafsir by Muyasser (data property: Literal)
        g.add((verse_id, ns.hasTafsirByMuyasser, Literal(row["desc_ByMuyasser"], lang="ar")))
        g.add((verse_id, ns.hasQuranicConcept, Literal(row["Concepts_E"], lang="en")))
        g.add((verse_id, ns.hasQuranicConcept, Literal(row["Concepts_A"], lang="ar")))

        #graph.add((ayah_uri, ns.isPartOfQ, surah_uri))
        #g.add((ayah_uri, ns.hasTafsirByMuyasser, Literal(row["desc_ByMuyasser"], lang="ar")))
        #g.add((ayah_uri, ns.hasTafsirByJalalayn, Literal(row["desc_ByJalalayn"], lang="ar")))
        #g.add((ayah_uri, ns.hasQuranicConcept, Literal(row["Concepts_E"], lang="en")))
        #g.add((ayah_uri, ns.hasQuranicConcept, Literal(row["Concepts_A"], lang="ar")))

        if pd.notna(row.get('Topic_Index')):
            topic_uri = URIRef(ns + row['Topic_Index'])
            g.add((topic_uri, RDF.type, ns.QuranTopic))
            if pd.notna(row.get('Topics_E')):
                g.add((topic_uri, RDFS.label, Literal(row['Topics_E'], lang='en')))
            if pd.notna(row.get('Topics_A')):
                g.add((topic_uri, RDFS.label, Literal(row['Topics_A'], lang='ar')))
            g.add((verse_id, ns.discussQuranTopics, topic_uri)) # I could change it to discussVerseTopic



In [None]:
# Add Quran Data
add_qurand_to_graph(df1, g, ns)

In [None]:
# -------------------------
# Step 6: Function to add Words (based on your dataset)
# -------------------------

def add_words_to_graph(wordDF, g, ns):
    for _, row in wordDF.iterrows():
        word_id_str = str(row['word']).strip()
        word_uri = URIRef(ns + word_id_str)

        # parent Verse ID = first 2 parts of word ID
        verse_id_str = "-".join(word_id_str.split("-")[:2])
        verse_uri = URIRef(ns + verse_id_str)

        # add QuranicWord individual
        g.add((word_uri, RDF.type, ns.QuranicWord))
        g.add((word_uri, RDFS.label, Literal(str(row['wordArabic']).strip(), lang='ar')))

        # link to verse
        g.add((verse_uri, ns.hasQWord, word_uri))
        g.add((word_uri, ns.isPartOf, verse_uri))

In [None]:
# --- CALL FUNCTION ---
add_words_to_graph(wordDF, g, ns)

In [None]:
# -------------------------
# Step 7: Function to add Segments (based on your dataset)
# -------------------------
def add_segments_to_graph(segmdf, g, ns):
    for _, row in segmdf.iterrows():
        seg_id_str = str(row['ID']).strip()
        seg_uri = URIRef(ns + seg_id_str)

        # parent Word ID = first 3 parts
        word_id_str = "-".join(seg_id_str.split("-")[:3])
        word_uri = URIRef(ns + word_id_str)

        # add Segment individual
        g.add((seg_uri, RDF.type, ns.QuranicSegment))
        g.add((seg_uri, RDFS.label, Literal(str(row['seg']).strip(), lang='ar')))

        # link to word
        g.add((word_uri, ns.hasQSegment, seg_uri))
        g.add((seg_uri, ns.isPartOfWord, word_uri))
        g.add((seg_uri, ns.hasBuckwalterSegment, Literal(str(row['buck']).strip(), lang='en')))
        # When adding actual values
        g.add((seg_uri, ns.hasPOS, Literal(row["pos"], datatype=XSD.string)))



In [None]:
add_segments_to_graph(segmdf, g, ns)

In [None]:
# -------------------------
# Step 8: Function to count total verses per Surah using Chapter_Index as Surah URI
# -------------------------
verses_per_surah = df1.groupby('Chapter_Index')['Verse_ID'].count().to_dict()

for chapter_index, total_verses in verses_per_surah.items():
    surah_uri = URIRef(ns + str(chapter_index))
    g.add((surah_uri, ns.totalVerses_prop, Literal(total_verses)))

In [None]:
# -------------------------
# Step 9: Function to map HadithBooks and add them (based on your dataset and sheets names)
# -------------------------

hadith_sheet_to_book = {
    "SB": "SahihAlBukhari",
    "MUS": "SahihMuslim",
    "ad": "SunanAbuDawood",
    "TIR": "JamiAtTirmidhi",
    "NES": "SunanAnNasai",
    "im": "SunanIbnMajah"
}


def add_hadithd_book_to_graph(book_class_name, df, g, ns):
    book_uri = ns[book_class_name]  # Refer to existing class URI

    for _, row in df.iterrows():
        chapter_uri = URIRef(ns + str(row['Chapter_Index']))
        g.add((chapter_uri, RDF.type, ns.HadithChapter))
        g.add((chapter_uri, RDFS.label, Literal(row['Chapter_English'], lang='en')))
        g.add((chapter_uri, RDFS.label, Literal(row['Chapter_Arabic'], lang='ar')))
        g.add((chapter_uri, ns.isPartOf, book_uri))

        hadith_uri = URIRef(ns + str(row['Hadith_Index']))
        g.add((hadith_uri, RDF.type, ns.HadithText))
        g.add((hadith_uri, RDFS.label, Literal(row['English_Hadith'], lang='en')))
        g.add((hadith_uri, RDFS.label, Literal(row['Arabic_Hadith'], lang='ar')))
        g.add((hadith_uri, ns.isPartOf, chapter_uri))

        g.add((hadith_uri, ns.hasIsnad, Literal(row["English_Isnad"], lang="en")))
        g.add((hadith_uri, ns.hasIsnad, Literal(row["Arabic_Isnad"], lang="ar")))

        g.add((hadith_uri, ns.hasMatn, Literal(row["English_Matn"], lang="en")))
        g.add((hadith_uri, ns.hasMatn, Literal(row["Arabic_Matn"], lang="ar")))

        g.add((hadith_uri, ns.hasGrade, Literal(row["English_Grade"], lang="en")))
        g.add((hadith_uri, ns.hasGrade, Literal(row["Arabic_Grade"], lang="ar")))

        if pd.notna(row.get('Topic_Index')):
            topic_uri = URIRef(ns + str(row['Topic_Index']))
            g.add((topic_uri, RDF.type, ns.HadithTopic))
            if pd.notna(row.get('English_Topic')):
                g.add((topic_uri, RDFS.label, Literal(row['English_Topic'], lang='en')))
            if pd.notna(row.get('Arabic_Topic')):
                g.add((topic_uri, RDFS.label, Literal(row['Arabic_Topic'], lang='ar')))
            g.add((hadith_uri, ns.discussHadithTopics, topic_uri))

In [None]:
# -------------------------
# Step 10: Function to track chapter and hadith counts
# -------------------------

chapter_counts = {}
hadith_counts = {}

# Process Hadith Data & Count Chapters & Hadiths
for sheet_name in excel_file.sheet_names:
    if sheet_name in hadith_sheet_to_book:
        df = excel_file.parse(sheet_name)
        if not df.empty:
            mapped_book = hadith_sheet_to_book[sheet_name]
            add_hadithd_book_to_graph(mapped_book, df, g, ns)

            # Count unique chapters and hadiths per book
            chapter_counts[mapped_book] = df['Chapter_Index'].nunique()
            hadith_counts[mapped_book] = df['Hadith_Index'].nunique()
    else:
        print(f"‚ö†Ô∏è Skipped unknown sheet: {sheet_name}")

# Add totalChapters and totalHadiths properties to HadithBook classes
for book_name in chapter_counts:
    g.add((ns[book_name], ns.totalChapters_prop, Literal(chapter_counts[book_name])))
    g.add((ns[book_name], ns.totalHadiths_prop, Literal(hadith_counts[book_name])))

In [None]:
# -------------------------
# Step 11: Function to track total Hadith in each chapter
# -------------------------
from rdflib import RDF, RDFS, Literal
from rdflib.namespace import OWL, XSD

# ---- Define the property (your snippet + recommended domain/range) ----
totalHadithsInChapter = ns["totalHadithsInChapter"]
g.add((totalHadithsInChapter, RDF.type, OWL.DatatypeProperty))
g.add((totalHadithsInChapter, RDFS.domain, ns["HadithChapter"]))
g.add((totalHadithsInChapter, RDFS.range, XSD.integer))
g.add((totalHadithsInChapter, RDFS.label, Literal("total hadiths in chapter", lang="en")))
g.add((totalHadithsInChapter, RDFS.label, Literal("ÿ•ÿ¨ŸÖÿßŸÑŸä ÿßŸÑÿ£ÿ≠ÿßÿØŸäÿ´ ŸÅŸä ÿßŸÑÿ®ÿßÿ®", lang="ar")))

from rdflib import RDF, Literal
from rdflib.namespace import XSD

def set_total_hadiths_in_chapters_from_graph(g, ns):
    """
    Counts hadiths per chapter using triples:
      ?hadith rdf:type :HadithText .
      ?hadith :isPartOf ?chapter .
      ?chapter rdf:type :HadithChapter .

    Writes:
      ?chapter :totalHadithsInChapter N .
    """
    isPartOf = ns["isPartOf"]
    totalProp = ns["totalHadithsInChapter"]

    hadith_cls  = ns["HadithText"]
    chapter_cls = ns["HadithChapter"]

    # collect all chapters
    chapters = set(g.subjects(RDF.type, chapter_cls))

    # initialize counts
    counts = {ch: 0 for ch in chapters}

    # count hadiths linked to each chapter
    for h in g.subjects(RDF.type, hadith_cls):
        for ch in g.objects(h, isPartOf):
            if ch in counts:   # ensures it's a HadithChapter individual
                counts[ch] += 1

    # write totals (remove old values first)
    for ch, n in counts.items():
        g.remove((ch, totalProp, None))
        g.add((ch, totalProp, Literal(n, datatype=XSD.integer)))

    return counts

# --- run it ---
counts = set_total_hadiths_in_chapters_from_graph(g, ns)

# sanity check: print a few
for ch, n in list(counts.items())[:10]:
    print(ch, n)


In [None]:
# -------------------------
# Step 12: Function to add related Quran and Hadith topics
# -------------------------

# Load datasets
df_related = pd.read_excel("yourFileName.xlsx")
#df_unrelated = pd.read_excel("yourFileName.xlsx")

# --- Add related topics ---
for _, row in df_related.iterrows():
    if pd.notna(row["QuranTopic_ID"]) and pd.notna(row["HadithTopic_ID"]):
        q_topic = URIRef(ns + row["QuranTopic_ID"])
        h_topic = URIRef(ns + row["HadithTopic_ID"])

        # Add symmetric relation
        g.add((q_topic, relatedTo, h_topic))
        # Since it's symmetric, a reasoner (HermiT, Pellet) will infer (h_topic ‚Üí q_topic)

# --- Add unrelated topics ---
#for _, row in df_unrelated.iterrows():
#    if pd.notna(row["QuranTopic_ID"]) and pd.notna(row["HadithTopic_ID"]):
#        q_topic = URIRef(ns + row["QuranTopic_ID"])
#        h_topic = URIRef(ns + row["HadithTopic_ID"])#

 #       g.add((q_topic, unrelatedTo, h_topic))
        # Note: not symmetric by default (we could make it symmetric too if you want)


In [None]:
# -------------------------
# Step 13: Serialize the final ontology (based on the format you seek)
# -------------------------

g.serialize("fileName.ttl", format="turtle")