### 1. Importing the modules

In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import re
from string import punctuation
import SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper2
import sys
import os
import contextlib
import stanza
import logging
from IPython.display import display, Markdown
# Suppress output during the NLP pipeline initialization
@contextlib.contextmanager
def suppress_stdout_stderr():
    with open(os.devnull, 'w') as devnull:
        with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
            yield

### 2. Defining Bag of words 

In [2]:
# Bag of Words
def bag_of_words():
    class_list = [['eventName', 'disaster'], ['region', 'place'], ['SubGroupName', 'category']]
    prop_list = [['TypeName', 'type'], ['startYearValue', 'year', 'date'], ['countryName', 'country', 'gpe'], ['GroupName', 'group'], ['magnitude', 'magnitude'], ['totalAffected', 'affect'], ['totalDeaths', 'death', 'die', 'kill'], ['regionName', 'region', 'loc']]
    return class_list, prop_list

### 3. Defining functions for Cleaning Data (Normalize -> tokenize -> stopword removal -> lemmatization)

In [3]:
def clean_data(data):
    text = data
    def normalize(text):
        processed_text = re.sub(f"[{re.escape(punctuation)}]", "", text)
        processed_text = " ". join(processed_text. split())
        return processed_text
    
    def strip_stopwords(tokens):
        stop_words = stopwords.words("english")
        clean_words = []
        for word in tokens:
            if word not in stop_words:
                clean_words.append(word)
        #print(clean_words)
        return clean_words
    def lemmatize(text):
        text = normalize(text)
        lem = WordNetLemmatizer()
        lems = []
        tokens = word_tokenize(text)
        #print(tokens)
        clean_tokens = strip_stopwords(tokens)
        for token in clean_tokens:
            temp = lem.lemmatize(token, 'n')
            lems.append(lem.lemmatize(temp, 'v'))
        return text, tokens, clean_tokens, lems
    text, tokens, clean_tokens, lemmatized_words = lemmatize(text)
    return text, tokens, clean_tokens, lemmatized_words

In [4]:
# text = "Enter your question all of the disasters and    China also the  maybe"
# clean_data(text)

### 4. Named Entity Recognition (NER)

In [5]:
def ner(new_text, lemmatized_words):
    ner_tokens = lemmatized_words
    import stanza
    # nlp = stanza.Pipeline("en")
    logging.getLogger("stanza").setLevel(logging.WARNING)
    with suppress_stdout_stderr():
        nlp = stanza.Pipeline("en", download_method=stanza.DownloadMethod.NONE)
    doc = nlp(new_text)
    if hasattr(doc, 'entities') and len(doc.entities) > 0:
        # print(doc.entities[0].text)
        # print(doc.entities[0].type)
        ner_tokens.append(doc.entities[0].type.lower())
    #     lemmatized_words.append(doc.entities[0].text)
    return ner_tokens, doc

In [6]:
# clean_text = ['Enter', 'question', 'disaster', 'China', 'also', 'maybe']
# text = "Enter your question all of the disasters and    China also the  maybe"
# ner(text, clean_text)

### 5. Ontology Mapping (lemmatized words -> Bag of Words -> Classes and Properties)

In [7]:
def ontology_map(lemmatized_words, cls_lst, prp_lst):
    class_list, prop_list =  cls_lst, prp_lst
    lemmatized_words = lemmatized_words
    # Function to map lemmatized words to ontology classes and properties
    def map_to_ontology(lemmatized_words, class_list, prop_list):
        query_classes = []
        query_properties = []
        
        # Match lemmatized words to classes
        for word in lemmatized_words:
            for class_group in class_list:
                if word.lower() in [cls.lower() for cls in class_group]:
                    query_classes.append(class_group[0])  # Select the canonical class (e.g., 'eventName')
        
        # Match lemmatized words to properties
        for word in lemmatized_words:
            for prop_group in prop_list:
                if word.lower() in [prop.lower() for prop in prop_group]:
                    query_properties.append(prop_group[0])  # Select the canonical property (e.g., 'Type')
        
        return query_classes, query_properties
    
    # Map lemmatized words to ontology
    query_classes, query_properties = map_to_ontology(lemmatized_words, class_list, prop_list)
    return query_classes, query_properties

### 6. Query building

In [8]:
def query_build(query_classes, query_properties, doc):
    import SPARQLWrapper
    query_classes, query_properties, doc = query_classes, query_properties, doc
    # Build the SPARQL query dynamically based on the mapped classes and properties
    # Initialize the SPARQL query
    sparql_query = """
    SELECT """ + " ".join([f"?{cls}" for cls in query_classes] + [f"?{prop}" for prop in query_properties]) + """
    WHERE {
      ?event a <http://www.semanticweb.org/mohdtalhahussain/ontologies/2024/10/Disaster_Management_dataset#DisasterEvent>.
    """
    
    # Add the class and property conditions dynamically
    sparql_query += "\n".join([f"?event <http://www.semanticweb.org/mohdtalhahussain/ontologies/2024/10/Disaster_Management_dataset#{cls}> ?{cls}." for cls in query_classes])
    sparql_query += "\n".join([f"?event <http://www.semanticweb.org/mohdtalhahussain/ontologies/2024/10/Disaster_Management_dataset#{prop}> ?{prop}." for prop in query_properties])
    if hasattr(doc, 'entities') and len(doc.entities) > 0:
        if doc.entities[0].type == "GPE":
            if hasattr(doc, 'entities') and len(doc.entities) > 0:
                cvalue = doc.entities[0].text
                #print(doc.entities[0].type)
                sparql_query += (f"FILTER(?countryName = \"{cvalue}\")")
        # Regular expression to match a year (4 digits)
        year_pattern = r"\b(\d{4})\b"
        # Search for a year in the input string
        match = re.search(year_pattern, text)
        if match:
            # Extracted year
            yvalue = int(match.group(1))
            sparql_query += (f"FILTER(?startYearValue = \"{yvalue}\")")
        if doc.entities[0].type == "LOC":
            lvalue = doc.entities[0].text
            sparql_query += (f"FILTER(?regionName = \"{lvalue}\")")
    sparql_query += """
    } LIMIT 20
    """
    return sparql_query

### 7. Fetching Data from fuseki server

In [9]:
def query_fetch(sparql_query):
    sparql_query = sparql_query
    # Connect to Fuseki server and execute the query
    sparql = SPARQLWrapper2("http://localhost:3030/Disaster_management_dataset/")
    sparql.setQuery(sparql_query)
    data = sparql.query().bindings
    return data

### 8. Creating DataFrame from the results

In [10]:
def create_dataframe(data, query_classes, query_properties):
    data, query_classes, query_properties = data, query_classes, query_properties
    import pandas as pd
    events = []
    for result in data:
        event_data = []
        for cls in query_classes:
            event_data.append(result[f"{cls}"].value if f"{cls}" in result else None)
        for prop in query_properties:
            event_data.append(result[f"{prop}"].value if f"{prop}" in result else None)
        events.append(event_data)
    # Dynamically assign column names for the DataFrame
    columns = []
    for cls in query_classes:
        columns.append(f"{cls}")  # Adding columns for query_classes
    for prop in query_properties:
        columns.append(f"{prop}")  # Adding columns for query_properties    
    
    df = pd.DataFrame(events, columns=columns)
    return df

### 9. Main function

In [11]:
def process_data(text):
    class_list, prop_list = bag_of_words()
    text, tokens, clean_tokens, lemmatized_words =  clean_data(text)
    display(Markdown('**1. Normalized output:**'))
    print(text)
    display(Markdown('**2. Tokenized Output:**'))
    print(tokens)
    display(Markdown('**3.Stopwords removed output:**'))
    print(clean_tokens)
    display(Markdown('**4. Lemmatized output:**'))
    print(lemmatized_words)
    ner_tokens, doc = ner(text, lemmatized_words)
    query_classes, query_properties = ontology_map(ner_tokens, class_list, prop_list)
    sparql_query = query_build(query_classes, query_properties, doc)
    data = query_fetch(sparql_query)
    df = create_dataframe(data, query_classes, query_properties)
    display(Markdown('**5. NER output:**'))
    print(ner_tokens)
    display(Markdown('**6. Bag of Words Mapped output:**'))
    print(query_classes, query_properties)
    display(Markdown('**7. Query output:**'))
    print(sparql_query)
    display(Markdown('**8. Dataframe output:**'))
    print(df)
    # return df

In [12]:
# !pip install SpeechRecognition pyaudio

### 10.A) Audio Input

In [13]:
import speech_recognition as sr
# Initialize recognizer class (for recognizing the speech)
recognizer = sr.Recognizer()
#Microphone as the audio source
with sr.Microphone() as source:
    print("Please say something...")    
    #Recognizer sensitivity
    recognizer.adjust_for_ambient_noise(source, duration=1)    
    #AudioData object
    audio = recognizer.listen(source)    
    print("Recognizing...")   
    #Google's speech recognition
    try:
        text = recognizer.recognize_google(audio)
        print("Your Query: ", text)
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand the audio")
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")
    df = process_data(text)
    print(df)

Please say something...
Recognizing...
Your Query:  disasters of China and people died


**1. Normalized output:**

disasters of China and people died


**2. Tokenized Output:**

['disasters', 'of', 'China', 'and', 'people', 'died']


**3.Stopwords removed output:**

['disasters', 'China', 'people', 'died']


**4. Lemmatized output:**

['disaster', 'China', 'people', 'die']


**5. NER output:**

['disaster', 'China', 'people', 'die', 'gpe']


**6. Bag of Words Mapped output:**

['eventName'] ['totalDeaths', 'countryName']


**7. Query output:**


    SELECT ?eventName ?totalDeaths ?countryName
    WHERE {
      ?event a <http://www.semanticweb.org/mohdtalhahussain/ontologies/2024/10/Disaster_Management_dataset#DisasterEvent>.
    ?event <http://www.semanticweb.org/mohdtalhahussain/ontologies/2024/10/Disaster_Management_dataset#eventName> ?eventName.?event <http://www.semanticweb.org/mohdtalhahussain/ontologies/2024/10/Disaster_Management_dataset#totalDeaths> ?totalDeaths.
?event <http://www.semanticweb.org/mohdtalhahussain/ontologies/2024/10/Disaster_Management_dataset#countryName> ?countryName.FILTER(?countryName = "China")
    } LIMIT 20
    


**8. Dataframe output:**

          eventName totalDeaths countryName
0         Coal mine          19       China
1             Cargo          11       China
2  Chemical factory          78       China
3  Chemical factory          10       China
4          Building          10       China
5             Cargo          10       China
None


### 10.B) User Interface

In [None]:
display(Markdown('**Enter your question:**'))
text = input()
process_data(text)
# df = process_data(text)
# print(df)

**Enter your question:**