# Classifies intents and recognizes named entities for user queries

In [2]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow_addons.layers import CRF
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from plot_keras_history import plot_history
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.sequence import pad_sequences
from neo4j import GraphDatabase
import re
import string
import pickle
import os
from dotenv import load_dotenv



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [3]:
# Load intent classification model
intent_model = load_model('Trained Models/intent_classification_model.h5', compile=False)
intent_model.load_weights('Trained Models/intent_classification_weights.h5')

learning_rate = 0.0001
nadam = tf.keras.optimizers.Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
intent_model.compile(loss='categorical_crossentropy', optimizer=nadam, metrics=['accuracy'])

with open(f"Trained Models/tokenizer.pkl", 'rb') as f:
    Tokenizer = pickle.load(f)

    
# Load NER model
ner_model = tf.keras.models.load_model("Trained Models/Ner_BiLSTM_CRF", custom_objects={'CRF': CRF})

with open(f"Trained Models/word2index.pkl", 'rb') as f:
    word2index = pickle.load(f)

with open(f"Trained Models/index2tag.pkl", 'rb') as f:
    index2tag = pickle.load(f)

with open(f"Trained Models/max_sentence.pkl", 'rb') as f:
    MAX_SENTENCE = pickle.load(f)

In [4]:
sentence = "Where is Grand Street located?"

re_tok = re.compile(f"([{string.punctuation}“”¨«»®´·º½¾¿¡§£₤‘’])")
sentence = re_tok.sub(r"  ", sentence).split()

padded_sentence = sentence + [word2index["--PADDING--"]] * (MAX_SENTENCE - len(sentence))
padded_sentence = [word2index.get(w, 0) for w in padded_sentence]

pred = ner_model.predict(np.array([padded_sentence]))
pred = np.argmax(pred, axis=-1)

retval = ""
for w, p in zip(sentence, pred[0]):
    retval = retval + "{:15}: {:5}".format(w, index2tag[p]) + "\n"

print(retval)

Where          : O    
is             : O    
Grand          : B-org
Street         : B-org
located        : O    



In [5]:
# Tokenization and padding process
phrase = "What did Marie Curie discover?"
tokens = Tokenizer.texts_to_sequences([phrase])
tokens = pad_sequences(tokens, maxlen=100)
prediction = intent_model.predict(np.array(tokens))

i,j = np.where(prediction == prediction.max()) #calculates the index of the maximum element of the array across all axis
# i->rows, j->columns
i = int(i)
j = int(j)

print(prediction)
total_possible_outcomes = [
    "scientific contributions",      # Major discoveries and inventions
    "affiliations and locations",    # Institutions and places where she worked
    "awards and recognitions",       # Prizes and honors she received
    "biography",                     # Information about her life and background
    "influences and impact"          # Her influence on science and legacy
]
print("Result:",total_possible_outcomes[j])

[[9.9658465e-01 2.4312774e-03 5.0097728e-06 3.5526366e-06 9.7547309e-04]]
Result: scientific contributions


In [16]:
intents = {
    "scientific contributions": "scientific contributions",
    "affiliations and locations": "affiliations and locations",
    "awards and recognitions": "awards and recognitions",
    "biography": "biography",
    "influences and impact": "influences and impact"
}

def construct_neo4j_query(intent, entity):
    relationship_type = intents.get(intent)
    if not relationship_type:
        raise ValueError("The intent you try to search is unfound")
    
    if intent == "scientific contributions":
        query = f"""
        MATCH (n)-[r:RELATION]->(m)
        WHERE n.title = $entity AND r.type IN ['discoverer', 'named']
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:RELATION]-(m)
        WHERE m.title = $entity AND r.type IN ['discoverer', 'named']
        RETURN n.title AS title
        """
    elif intent == "affiliations and locations":
        query = f"""
        MATCH (n)-[r:RELATION]->(m)
        WHERE n.title = $entity AND r.type IN ['located']
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:RELATION]-(m)
        WHERE m.title = $entity AND r.type IN ['located']
        RETURN n.title AS title
        """
    elif intent == "awards and recognitions":
        query = f"""
        MATCH (n)-[r:`award received`]->(m)
        WHERE n.title = $entity
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:`award received`]-(m)
        WHERE m.title = $entity
        RETURN n.title AS title
        """

    elif intent == "biography":
        query = f"""
        MATCH (n)-[r:RELATION]->(m)
        WHERE n.title = $entity AND r.type IN ['biography']
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:RELATION]-(m)
        WHERE m.title = $entity AND r.type IN ['biography']
        RETURN n.title AS title
        """
    elif intent == "influences and impact":
        query = f"""
        MATCH (n)-[r:RELATION]->(m)
        WHERE n.title = $entity AND r.type IN ['influence', 'impact']
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:RELATION]-(m)
        WHERE m.title = $entity AND r.type IN ['influence', 'impact']
        RETURN n.title AS title
        """
    else:
        raise ValueError("Intent not supported for query construction.")
    
    return query



In [7]:
load_dotenv()

uri = os.getenv('NEO4J_URI')
neo4j_username = os.getenv('NEO4J_USERNAME')
neo4j_password = os.getenv('NEO4J_PASSWORD')

driver = GraphDatabase.driver(uri, auth=(neo4j_username, neo4j_password))


In [8]:
def execute_neo4j_query(query, entity):
    def get_results(tx, query, entity):
        
        print(f"Executing Query: {query} with entity: {entity}")

        result = tx.run(query, entity=entity)
        return [record["title"] for record in result]
    
    with driver.session() as session:
        results = session.execute_read(get_results, query, entity)
    
    return results

def retrieve_information(intent, entity):
    query = construct_neo4j_query(intent, entity)
    results = execute_neo4j_query(query, entity)
    
    if not results:  # If no results, try with lowercase entity
        entity = entity.lower()
        query = construct_neo4j_query(intent, entity)
        results = execute_neo4j_query(query, entity)
    
    return format_response(entity, intent, results)

def format_response(entity, intent, results):
    if not results:
            return f"No information found for '{entity}' regarding '{intent}'."
        
    if intent == "scientific contributions":
            return f"{entity} is known for the following scientific contributions: {', '.join(results)}."
    elif intent == "affiliations and locations":
            return f"{entity} was affiliated with the following institutions and locations: {', '.join(results)}."
    elif intent == "awards and recognitions":
            return f"{entity} received the following awards and recognitions: {', '.join(results)}."
    elif intent == "biography":
            return f"Information about {entity}'s life and background: {', '.join(results)}."
    elif intent == "influences and impact":
            return f"{entity} had the following influences and impact: {', '.join(results)}."
    else:
            return f"The information for {entity} regarding '{intent}' is {', '.join(results)}."

In [12]:
def classify_intent(sentence):
    tokens = Tokenizer.texts_to_sequences([sentence])
    tokens = pad_sequences(tokens, maxlen=100)
    prediction = intent_model.predict(np.array(tokens))

    i, j = np.where(prediction == prediction.max())
    i = int(i)
    j = int(j)

    total_possible_outcomes = [    
    "scientific contributions",      
    "affiliations and locations",  
    "awards and recognitions",      
    "biography",                     
    "influences and impact"    
    ]
    return total_possible_outcomes[j]

def predict_ne(sentence):
    re_tok = re.compile(f"([{string.punctuation}“”¨«»®´·º½¾¿¡§£₤‘’])")
    sentence = re_tok.sub(r"  ", sentence).split()

    # Pad the sentence to the maximum length
    padded_sentence = sentence + [word2index["--PADDING--"]] * (MAX_SENTENCE - len(sentence))
    padded_sentence = [word2index.get(w, 0) for w in padded_sentence]

    # Predict entities using the NER model
    pred = ner_model.predict(np.array([padded_sentence]))
    pred = np.argmax(pred, axis=-1)

    entities = []
    current_entity = []
    current_label = None

    for w, p in zip(sentence, pred[0]):
        tag = index2tag[p]
        if tag.startswith("B-") or (tag.startswith("I-") and current_label != tag[2:]):
            if current_entity and current_label:
                entities.append({'entity': current_label, 'text': " ".join(current_entity)})
            current_entity = [w]
            current_label = tag[2:]
        elif tag.startswith("I-") and current_label == tag[2:]:
            current_entity.append(w)
        else:
            if current_entity and current_label:
                entities.append({'entity': current_label, 'text': " ".join(current_entity)})
            current_entity = []
            current_label = None

    if current_entity and current_label:
        entities.append({'entity': current_label, 'text': " ".join(current_entity)})

    # Return the entities as a list of dictionaries
    return entities


In [22]:
Question = "What awards did Marie Curie receive?"

# Classify intent
intent = classify_intent(Question)
print(f"Intent: {intent}")

# Extract entities
entities = predict_ne(Question)
print(f"Entities: {entities}")

if not entities:
    print("No entities found.")
    response = "No entities found in the provided phrase."
else:
    entity = entities[0]['text']
    print(f"Entity: {entity}")
    
    query = construct_neo4j_query(intent, entity)
    if query is None:
        response = "Failed to construct the query."
    else:
        response = retrieve_information(intent, entity)

print(response)

Intent: awards and recognitions
Entities: [{'entity': 'per', 'text': 'Marie'}, {'entity': 'org', 'text': 'Curie'}]
Entity: Marie
Executing Query: 
        MATCH (n)-[r:`award received`]->(m)
        WHERE n.title = $entity
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:`award received`]-(m)
        WHERE m.title = $entity
        RETURN n.title AS title
         with entity: Marie
Executing Query: 
        MATCH (n)-[r:`award received`]->(m)
        WHERE n.title = $entity
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:`award received`]-(m)
        WHERE m.title = $entity
        RETURN n.title AS title
         with entity: marie
No information found for 'marie' regarding 'awards and recognitions'.


In [17]:
entity = "Marie Curie"
intent = "awards and recognitions"

# Construct the query
query = construct_neo4j_query(intent, entity)

# Execute the query and get results
results = execute_neo4j_query(query, entity)

# Format and print the response
response = format_response(entity, intent, results)
print(response)


Executing Query: 
        MATCH (n)-[r:`award received`]->(m)
        WHERE n.title = $entity
        RETURN m.title AS title
        UNION
        MATCH (n)<-[r:`award received`]-(m)
        WHERE m.title = $entity
        RETURN n.title AS title
         with entity: Marie Curie
Marie Curie received the following awards and recognitions: Nobel Prize in Physics, Nobel Prize in Physics, Physics, and Henri Becquerel, Nobel Prize, Nobel Prize in Chemistry.


In [None]:
# MATCH (m:Entity)-[r:`award received`]->(n:Entity)
# WHERE m.title = 'Marie Curie' AND n.title = 'Nobel Prize in Chemistry'
# RETURN m, r, n

# MATCH (m:Entity)-[r:`award received`]->(n:Entity)
# WHERE m.title = 'Marie Curie'
# RETURN DISTINCT n.title AS Award
