In [1]:
from neo4j import GraphDatabase

def get_neo4j_driver() -> GraphDatabase.driver:
    """Establishes and returns a Neo4j session for AuraDB."""
    neo4j_uri = 'neo4j+s://9d1381c2.databases.neo4j.io:7687'
    neo4j_user = 'Shirley'
    neo4j_password = 'Sxl19950312'
    return GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))

# Call the function to get the driver
driver = get_neo4j_driver()

# Open a session
with driver.session() as session:
    # Verify connectivity within the session
    session.run("RETURN 1")

print("Connectivity verified successfully.")


from dash import Dash, dcc, html
import plotly.express as px
import pandas as pd


# Define a Cypher query to retrieve the required data
query = """
MATCH (n)
RETURN n
"""

# Execute the query and process the results
with driver.session() as session:
    result = session.run(query)
    neo4j_data = [record.data() for record in result]

# Converting Neo4j Data to Pandas DataFrame Format
df = pd.DataFrame(neo4j_data)

#print(df)

Connectivity verified successfully.


In [2]:
import pandas as pd
import plotly.graph_objects as go

query = """
MATCH (r:Recipe)-[:HAS_MANUFACTURING_PROCESS]->(mp:ManufacturingProcess)
MATCH (p:Product)-[:HAS_RECIPE]->(r)
RETURN mp.Step AS ManufacturingProcessStep, 
       mp.Description AS ManufacturingProcessDescription,
       id(mp) AS ManufacturingProcessId,
       r.ingredients_orig AS Recipe, 
       p.name AS Product
"""

# Retrieve data from Neo4j
with driver.session() as session:
    result = session.run(query)
    neo4j_data = [record.data() for record in result]

# Create DataFrame from neo4j_data
df = pd.DataFrame(neo4j_data)

# Define keyword groups
keyword_groups = {
    'group1': ['water', 'sugar'],
    'group2': ['water', 'and']
}

# Function to check if a description contains all keywords in a group
def check_keywords(description, keywords):
    return all(kw.lower() in str(description).lower() for kw in keywords) if pd.notnull(description) else False

# Create a function to assign group
def assign_group(description):
    for group_name, keywords in keyword_groups.items():
        if check_keywords(description, keywords):
            return group_name
    return None

# Apply the function to create the Group column
df['Group'] = df['ManufacturingProcessDescription'].apply(assign_group)

# Filter rows that belong to any group
filtered_df = df.dropna(subset=['Group'])

# create the Plotly table
fig_data = [go.Table(
    header=dict(values=['Step', 'Description', 'Id', 'Recipe', 'Product', 'Group']),
    cells=dict(values=[filtered_df['ManufacturingProcessStep'], 
                       filtered_df['ManufacturingProcessDescription'], 
                       filtered_df['ManufacturingProcessId'], 
                       filtered_df['Recipe'], 
                       filtered_df['Product'],
                       filtered_df['Group']])
)]

# set up layout
fig_layout = go.Layout(
    title='ManufacturingProcess Grouped by Keyword Combinations'
)

fig = go.Figure(data=fig_data, layout=fig_layout)
fig.show()


In [5]:
import pandas as pd
import plotly.graph_objects as go

query = """
MATCH (r:Recipe)-[:HAS_MANUFACTURING_PROCESS]->(mp:ManufacturingProcess)
MATCH (p:Product)-[:HAS_RECIPE]->(r)
MATCH (r)-[:USES_INGREDIENT]->(i:Ingredient)
RETURN mp.Step AS ManufacturingProcessStep, 
       mp.Description AS ManufacturingProcessDescription,
       r.ingredients_orig AS Recipe, 
       p.name AS Product,
       i.name AS Ingredient
"""

# Retrieve data from Neo4j
with driver.session() as session:
    result = session.run(query)
    neo4j_data = [record.data() for record in result]

# Create DataFrame from neo4j_data
df = pd.DataFrame(neo4j_data)

# Define keyword groups
keyword_groups = {
    'group1': ['water', 'sugar'],
    'group2': ['water', 'and']
}

# Function to check if a description contains all keywords in a group
def check_keywords(description, keywords):
    return all(kw.lower() in str(description).lower() for kw in keywords) if pd.notnull(description) else False

# Create a function to assign group
def assign_group(description):
    for group_name, keywords in keyword_groups.items():
        if check_keywords(description, keywords):
            return group_name
    return None

# Apply the function to create the Group column
df['Group'] = df['ManufacturingProcessDescription'].apply(assign_group)

# Create a function to assign combined group
def assign_combined_group(row):
    return f"{row['Product']}_{row['Ingredient']}" if pd.notnull(row['Product']) and pd.notnull(row['Ingredient']) else None

# Apply the function to create the CombinedGroup column
df['CombinedGroup'] = df.apply(assign_combined_group, axis=1)

# Filter rows that belong to any group
filtered_df = df.dropna(subset=['Group', 'CombinedGroup'])

# create the Plotly table
fig_data = [go.Table(
    header=dict(values=['Step', 'Description', 'Recipe', 'Product', 'Ingredient', 'Group', 'CombinedGroup']),
    cells=dict(values=[filtered_df['ManufacturingProcessStep'], 
                       filtered_df['ManufacturingProcessDescription'], 
                       filtered_df['Recipe'], 
                       filtered_df['Product'],
                       filtered_df['Ingredient'],
                       filtered_df['Group'],
                       filtered_df['CombinedGroup']])
)]

# set up layout
fig_layout = go.Layout(
    title='ManufacturingProcess Grouped by Keyword and Product-Ingredient Combinations'
)

fig = go.Figure(data=fig_data, layout=fig_layout)
fig.show()

In [6]:
import pandas as pd
import plotly.graph_objects as go

query = """
MATCH (r:Recipe)-[:HAS_MANUFACTURING_PROCESS]->(mp:ManufacturingProcess)
MATCH (p:Product)-[:HAS_RECIPE]->(r)
MATCH (r)-[:USES_INGREDIENT]->(i:Ingredient)
MATCH (i)-[:HAS_RESEARCH]->(z:IngredientResearch)
RETURN mp.Step AS ManufacturingProcessStep, 
       mp.Description AS ManufacturingProcessDescription,
       r.ingredients_orig AS Recipe, 
       p.name AS Product,
       i.name AS Ingredient,
       z.ingredient_name AS IngredientResearch
"""

# Retrieve data from Neo4j
with driver.session() as session:
    result = session.run(query)
    neo4j_data = [record.data() for record in result]

# Create DataFrame from neo4j_data
df = pd.DataFrame(neo4j_data)

# Define keyword groups
keyword_groups = {
    'group1': ['water', 'sugar'],
    'group2': ['water', 'and']
}

# Function to check if a description contains all keywords in a group
def check_keywords(description, keywords):
    return all(kw.lower() in str(description).lower() for kw in keywords) if pd.notnull(description) else False

# Create a function to assign group
def assign_group(description):
    for group_name, keywords in keyword_groups.items():
        if check_keywords(description, keywords):
            return group_name
    return None

# Apply the function to create the Group column
df['Group'] = df['ManufacturingProcessDescription'].apply(assign_group)

# Create a function to assign combined group
def assign_combined_group(row):
    return f"{row['Product']}_{row['Ingredient']}" if pd.notnull(row['Product']) and pd.notnull(row['Ingredient']) else None

# Apply the function to create the CombinedGroup column
df['CombinedGroup'] = df.apply(assign_combined_group, axis=1)

# Filter rows that belong to any group
filtered_df = df.dropna(subset=['Group', 'CombinedGroup'])

# create the Plotly table
fig_data = [go.Table(
    header=dict(values=['Step', 'Description', 'Recipe', 'Product', 'Ingredient', 'IngredientResearch','Group', 'CombinedGroup']),
    cells=dict(values=[filtered_df['ManufacturingProcessStep'], 
                       filtered_df['ManufacturingProcessDescription'], 
                       filtered_df['Recipe'], 
                       filtered_df['Product'],
                       filtered_df['Ingredient'],
                       filtered_df['IngredientResearch'],
                       filtered_df['Group'],
                       filtered_df['CombinedGroup']])
)]

# set up layout
fig_layout = go.Layout(
    title='ManufacturingProcess Grouped by Keyword and Product-Ingredient Combinations'
)

fig = go.Figure(data=fig_data, layout=fig_layout)
fig.show()

In [7]:
import pandas as pd
import plotly.graph_objects as go

query = """
MATCH (r:Recipe)-[:HAS_MANUFACTURING_PROCESS]->(mp:ManufacturingProcess)
MATCH (p:Product)-[:HAS_RECIPE]->(r)
MATCH (r)-[:USES_INGREDIENT]->(i:Ingredient)
MATCH (i)-[:HAS_RESEARCH]->(z:IngredientResearch)
RETURN mp.Step AS ManufacturingProcessStep, 
       mp.Description AS ManufacturingProcessDescription,
       r.ingredients_orig AS Recipe, 
       p.name AS Product,
       i.name AS Ingredient,
       z.ingredient_name AS IngredientResearch
"""

# Retrieve data from Neo4j
with driver.session() as session:
    result = session.run(query)
    neo4j_data = [record.data() for record in result]

# Create DataFrame from neo4j_data
df = pd.DataFrame(neo4j_data)

# Define keyword groups
keyword_groups = {
    'group1': ['water', 'sugar'],
    'group2': ['water', 'and']
}

# Function to check if a description contains all keywords in a group
def check_keywords(description, keywords):
    return all(kw.lower() in str(description).lower() for kw in keywords) if pd.notnull(description) else False

# Create a function to assign group
def assign_group(description):
    for group_name, keywords in keyword_groups.items():
        if check_keywords(description, keywords):
            return group_name
    return None

# Apply the function to create the Group column
df['Group'] = df['ManufacturingProcessDescription'].apply(assign_group)

# Create a function to assign combined group based on Product, Ingredient, and Description
def assign_combined_group(row):
    return f"{row['Product']}_{row['IngredientResearch']}" if pd.notnull(row['Product']) and pd.notnull(row['IngredientResearch']) else None

# Apply the function to create the CombinedGroup column
df['CombinedGroup'] = df.apply(assign_combined_group, axis=1)

# Filter rows that belong to any group
filtered_df = df.dropna(subset=['Group', 'CombinedGroup'])

# create the Plotly table
fig_data = [go.Table(
    header=dict(values=['Step', 'Description','Recipe', 'Product', 'Ingredient','IngredientResearch', 'Group', 'CombinedGroup']),
    cells=dict(values=[filtered_df['ManufacturingProcessStep'], 
                       filtered_df['ManufacturingProcessDescription'], 
                       filtered_df['Recipe'], 
                       filtered_df['Product'],
                       filtered_df['Ingredient'],
                       filtered_df['IngredientResearch'], 
                       filtered_df['Group'],
                       filtered_df['CombinedGroup']])
)]

# set up layout
fig_layout = go.Layout(
    title='ManufacturingProcess Grouped by Keyword and Product-Ingredient Combinations'
)

fig = go.Figure(data=fig_data, layout=fig_layout)
fig.show()


In [11]:
import pandas as pd
import spacy

# Load spaCy English model
nlp = spacy.load('en_core_web_sm')


keyword_groups = [['water', 'sugar']]

# Defining Filter Functions
def filter_descriptions(description):
    if pd.notnull(description):
        doc = nlp(str(description).lower())
        tokens = [token.text for token in doc]
        for group in keyword_groups:
            if all(word in tokens for word in group):
                return True
    return False

# Filtered Data Framework
filtered_df = df[df['ManufacturingProcessDescription'].apply(filter_descriptions)]
print(filtered_df[['ManufacturingProcessStep', 'ManufacturingProcessDescription', 'Recipe']])


[W095] Model 'en_core_web_sm' (3.5.0) was trained with spaCy v3.5.0 and may not be 100% compatible with the current version (3.7.2). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate



     ManufacturingProcessStep  \
24                     Step 6   
25                     Step 7   
76                     Step 2   
100                    Step 1   
116                    Step 1   
...                       ...   
2042                   Step 1   
2064                   Step 1   
2082                   Step 1   
2103                   Step 1   
2108                   Step 1   

                        ManufacturingProcessDescription  \
24    Step 6: The sweet soy sauce is made by mixing ...   
25    Step 7: The chilli sauce is made by mixing chi...   
76    Step 2: Combining the fruit juices and purées ...   
100   Step 1: Canola oil, water, sugar, and vinegar ...   
116   Step 1: Preparation of Jelly - Water, sugar, a...   
...                                                 ...   
2042  Step 1: Preparation of Jelly - Water, sugar, a...   
2064  Step 1: Preparation of Jelly - Water, sugar, a...   
2082  Step 1: Preparation of Jelly - Water, sugar, a...   
2103  Step 1:

In [17]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
import nltk
nltk.download('stopwords')
import nltk
nltk.download('punkt')




keyword_groups = [['water', 'sugar']]

# Initialising deactivated words
stopwords = nltk.corpus.stopwords.words('english')


def filter_descriptions(description):
    if pd.notnull(description):
        tokens = word_tokenize(str(description).lower())
        filtered_tokens = [word for word in tokens if word not in stopwords]
        for group in keyword_groups:
            if all(word in filtered_tokens for word in group):
                return True
    return False

# Filtering data frames and constructing lists of results
result = []
for index, row in df.iterrows():
    if filter_descriptions(row['ManufacturingProcessDescription']):
        result.append({
            'ManufacturingProcessStep': row['ManufacturingProcessStep'],
            'ManufacturingProcessDescription': row['ManufacturingProcessDescription'],
            'Recipe': row['Recipe']
        })


print(result)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\apple\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\apple\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


[{'ManufacturingProcessStep': 'Step 6', 'ManufacturingProcessDescription': 'Step 6: The sweet soy sauce is made by mixing sugar, water, salt, wheat, soy bean, spices, and sesame oil.', 'Recipe': 'Noodles: Wheat Flour (62%), Refined Palm Oil (Contains Antioxidant 319), Salt, Acidity Regulators (451, 501, 500), Thickener (412), Colour (101).<br>Seasoning Powder: Salt, Sugar, Flavour Enhancers (621, 631, 627), Garlic Powder, Onion Powder, Yeast Extract, Artificial Flavour, Pepper, Anti Caking Agent (551).<br>Seasoning Oil: Refined Palm Oil (Contains Antioxidant 319), Onion.<br>Sweet Soy Sauce: Sugar, Water, Salt, Wheat, Soy Bean, Spices, Sesame Oil.<br>Chilli Sauce: Chilli, Water, Sugar, Salt, Tapioca Starch, Acidity Regulators (260, 330), Herbs, Flavour Enhancers (621, 627, 631), Flavours, Preservatives 1211, 223). Fried Onion: Onion, Refined Palm Oil (Contains Antioxidant 319).'}, {'ManufacturingProcessStep': 'Step 7', 'ManufacturingProcessDescription': 'Step 7: The chilli sauce is made

In [14]:
import pandas as pd
import spacy


nlp = spacy.load('en_core_web_sm')


keyword_groups = [['water', 'sugar']]

# Defining Filter Functions
def filter_descriptions(description):
    if pd.notnull(description):
        doc = nlp(str(description).lower())
        tokens = [token.text for token in doc]
        for group in keyword_groups:
            if all(word in tokens for word in group):
                return True
    return False

# Filtering data frames and constructing lists of results
result = []
for index, row in df.iterrows():
    if filter_descriptions(row['ManufacturingProcessDescription']):
        result.append({
            'ManufacturingProcessStep': row['ManufacturingProcessStep'],
            'ManufacturingProcessDescription': row['ManufacturingProcessDescription'],
            'Recipe': row['Recipe']
        })

print(result)


[W095] Model 'en_core_web_sm' (3.5.0) was trained with spaCy v3.5.0 and may not be 100% compatible with the current version (3.7.2). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate



[{'ManufacturingProcessStep': 'Step 6', 'ManufacturingProcessDescription': 'Step 6: The sweet soy sauce is made by mixing sugar, water, salt, wheat, soy bean, spices, and sesame oil.', 'Recipe': 'Noodles: Wheat Flour (62%), Refined Palm Oil (Contains Antioxidant 319), Salt, Acidity Regulators (451, 501, 500), Thickener (412), Colour (101).<br>Seasoning Powder: Salt, Sugar, Flavour Enhancers (621, 631, 627), Garlic Powder, Onion Powder, Yeast Extract, Artificial Flavour, Pepper, Anti Caking Agent (551).<br>Seasoning Oil: Refined Palm Oil (Contains Antioxidant 319), Onion.<br>Sweet Soy Sauce: Sugar, Water, Salt, Wheat, Soy Bean, Spices, Sesame Oil.<br>Chilli Sauce: Chilli, Water, Sugar, Salt, Tapioca Starch, Acidity Regulators (260, 330), Herbs, Flavour Enhancers (621, 627, 631), Flavours, Preservatives 1211, 223). Fried Onion: Onion, Refined Palm Oil (Contains Antioxidant 319).'}, {'ManufacturingProcessStep': 'Step 7', 'ManufacturingProcessDescription': 'Step 7: The chilli sauce is made

In [25]:
import pandas as pd
import spacy
import plotly.graph_objects as go

# Load spaCy English model
nlp = spacy.load('en_core_web_sm')


keyword_groups = [['water', 'sugar']]

# Defining Filter Functions
def filter_descriptions(description):
    if pd.notnull(description):
        doc = nlp(str(description).lower())
        tokens = [token.text for token in doc]
        for group in keyword_groups:
            if all(word in tokens for word in group):
                return True
    return False

# Filtering data frames and constructing lists of results
result = []
for index, row in df.iterrows():
    if filter_descriptions(row['ManufacturingProcessDescription']):
        result.append({
            'ManufacturingProcessStep': row['ManufacturingProcessStep'],
            'ManufacturingProcessDescription': row['ManufacturingProcessDescription'],
            'Recipe': row['Recipe'],
            'Product': row['Product'],
            'Ingredient': row['Ingredient'],
            'IngredientResearch': row['IngredientResearch']
        })


result_df = pd.DataFrame(result)


fig_data = [go.Table(
    header=dict(values=['ManufacturingProcessStep', 'ManufacturingProcessDescription', 'product','Recipe','Ingredient','IngredientResearch']),
    cells=dict(values=[result_df['ManufacturingProcessStep'], result_df['ManufacturingProcessDescription'],result_df['Product'], 
                       result_df['Recipe'],result_df['Ingredient'],result_df['IngredientResearch']])
)]



fig_layout = go.Layout(
    title='ManufacturingProcesses with \'water\' and \'sugar\' in Description'
)


fig = go.Figure(data=fig_data, layout=fig_layout)
fig.show()


[W095] Model 'en_core_web_sm' (3.5.0) was trained with spaCy v3.5.0 and may not be 100% compatible with the current version (3.7.2). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate

