In [4]:
from neo4j import GraphDatabase

def get_neo4j_driver() -> GraphDatabase.driver:
    """Establishes and returns a Neo4j session for AuraDB."""
    neo4j_uri = 'neo4j+s://9d1381c2.databases.neo4j.io:7687'
    neo4j_user = 'Shirley'
    neo4j_password = 'Sxl19950312'
    return GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))

# Call the function to get the driver
driver = get_neo4j_driver()

# Open a session
with driver.session() as session:
    # Verify connectivity within the session
    session.run("RETURN 1")

print("Connectivity verified successfully.")


from dash import Dash, dcc, html
import plotly.express as px
import pandas as pd


# Define a Cypher query to retrieve the required data
query = """
MATCH (n)
RETURN n
"""

# Execute the query and process the results
with driver.session() as session:
    result = session.run(query)
    neo4j_data = [record.data() for record in result]

# Converting Neo4j Data to Pandas DataFrame Format
df = pd.DataFrame(neo4j_data)

#print(df)

Connectivity verified successfully.


In [5]:
from neo4j import GraphDatabase
import pandas as pd
import plotly.graph_objects as go


query = """
MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
WITH res, COUNT(DISTINCT p) AS productCount, COLLECT(DISTINCT p.name) AS products, COUNT(DISTINCT i) AS ingredientCount
MATCH (allProducts:Product)
WITH res, productCount, products, ingredientCount, COUNT(allProducts) AS totalProductCount
WITH res, productCount, products, ingredientCount, totalProductCount, toFloat(productCount) / toFloat(totalProductCount) AS frequencyRatio
RETURN res.ingredient_name AS HighFrequencyIngredientResearch, productCount, products, frequencyRatio
ORDER BY frequencyRatio DESC
"""

with driver.session() as session:
    result = session.run(query)
    data = [record.data() for record in result]
    df = pd.DataFrame(data)

# The results are sorted according to the value of the frequency ratio (ratio) from highest to lowest.
df.sort_values(by='frequencyRatio', ascending=False, inplace=True)

# Assign a group name to each group（group）
df['group'] = 'Group ' + (df['HighFrequencyIngredientResearch'].rank(ascending=True, method='dense').astype(int)).astype(str)


fig_data = go.Table(
    header=dict(values=['HighFrequencyIngredientResearch', 'Product Count', 'Products', 'Frequency Ratio', 'Group']),
    cells=dict(values=[df['HighFrequencyIngredientResearch'], df['productCount'], df['products'], df['frequencyRatio'], df['group']])
)


fig_layout = go.Layout(
    title='High Frequency Ingredient Research and Associated Products (Sorted by Frequency Ratio)',
)


fig = go.Figure(data=fig_data, layout=fig_layout)
fig.show()

# Storing the results in a new DataFrame
data_lists = fig_data.cells.values
new_df = pd.DataFrame({
    'HighFrequencyIngredientResearch': data_lists[0],
    'Product Count': data_lists[1],
    'Products': data_lists[2],
    'Frequency Ratio': data_lists[3],
    'Group': data_lists[4]
})

print(new_df)

           HighFrequencyIngredientResearch  Product Count  \
0                                    Sugar             66   
1                                     Salt             50   
2                            Vegetable Oil             29   
3                              Wheat Flour             27   
4                                  Flavour             25   
..                                     ...            ...   
334                             Strawberry              1   
335                         Stabiliser 422              1   
336                            Citric Acid              1   
337                         Thickener 1414              1   
338  Clarified Pineapple Juice Concentrate              1   

                                              Products  Frequency Ratio  \
0    [Coca - Cola Classic  Soft Drink Multipack Can...         0.318841   
1    [Indomie Mi Goreng Instant Noodles, Maggi 2 Mi...         0.241546   
2    [Maggi 2 Minute Chicken Flavour Insta

In [9]:
from neo4j import GraphDatabase
import pandas as pd
import plotly.graph_objects as go


query = """
MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
WITH res, COUNT(DISTINCT p) AS productCount, COLLECT(DISTINCT p.name) AS products, COUNT(DISTINCT i) AS ingredientCount
MATCH (allProducts:Product)
WITH res, productCount, products, ingredientCount, COUNT(allProducts) AS totalProductCount
WITH res, productCount, products, ingredientCount, totalProductCount, toFloat(productCount) / toFloat(totalProductCount) AS frequencyRatio
RETURN res.ingredient_name AS HighFrequencyIngredientResearch, productCount, products, frequencyRatio
ORDER BY frequencyRatio DESC
"""

with driver.session() as session:
    result = session.run(query)
    data = [record.data() for record in result]
    df = pd.DataFrame(data)


df.sort_values(by='frequencyRatio', ascending=False, inplace=True)


group_names = [f'Group {i+1}' for i in range(len(df))]
df['group'] = pd.Series(group_names, index=df.index)


fig_data = go.Table(
    header=dict(values=['HighFrequencyIngredientResearch', 'Product Count', 'Products', 'Frequency Ratio', 'Group']),
    cells=dict(values=[df['HighFrequencyIngredientResearch'], df['productCount'], df['products'], df['frequencyRatio'], df['group']])
)


fig_layout = go.Layout(
    title='High Frequency Ingredient Research and Associated Products (Sorted by Frequency Ratio)',
)


fig = go.Figure(data=fig_data, layout=fig_layout)
fig.show()

In [1]:

def get_data(tx):
    query = """
    MATCH (p1:Product)-[:HAS_RECIPE]->(:Recipe)-[:USES_INGREDIENT]->(:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)<-[:HAS_RESEARCH]-(:Ingredient)<-[:USES_INGREDIENT]-(:Recipe)<-[:HAS_RECIPE]-(p2:Product)
    WHERE id(p1) < id(p2)
    RETURN p1.name AS product1, p2.name AS product2, COUNT(DISTINCT res.ingredient_name) AS co_occurrences
    """
    result = tx.run(query)
    data = [(record["product1"], record["product2"], record["co_occurrences"]) for record in result]
    return data


with driver.session() as session: 
    data = session.read_transaction(get_data) 


df = pd.DataFrame(data, columns=['Product1', 'Product2', 'Co-Occurrences'])


co_occurrence_matrix = df.pivot(index='Product1', columns='Product2', values='Co-Occurrences').fillna(0)


clustering = AgglomerativeClustering(n_clusters=5).fit(co_occurrence_matrix)


df['Group'] = clustering.labels_


table_data = go.Table( 
    header=dict(values=['Product1', 'Product2', 'Co-Occurrences', 'Group']), 
    cells=dict(values=[df['Product1'], df['Product2'], df['Co-Occurrences'], df['Group']]) 
) 


layout = go.Layout(title='Product Co-Occurrences Groups') 
fig = go.Figure(data=table_data, layout=layout) 
fig.show()


NameError: name 'driver' is not defined

In [98]:
print("Shape of co-occurrence matrix:", co_occurrence_matrix.shape)
print("Co-occurrence matrix:")
print(co_occurrence_matrix)


Shape of co-occurrence matrix: (0, 0)
Co-occurrence matrix:
Empty DataFrame
Columns: []
Index: []


In [103]:
import plotly.graph_objects as go
import pandas as pd


query = """
MATCH (p:Product)-[:HAS_RECIPE]->(:Recipe)-[:USES_INGREDIENT]->(:Ingredient)-[:HAS_RESEARCH]->(res1:IngredientResearch)
WITH p, COLLECT(DISTINCT res1.ingredient_name) AS ingredientResearchList
WHERE size(ingredientResearchList) >= 2
RETURN ingredientResearchList, COUNT(*) AS frequency
ORDER BY frequency DESC

"""

with driver.session() as session:
    result = session.run(query)
    data = [record.data() for record in result]
    df = pd.DataFrame(data)

import plotly.graph_objects as go




table_data = go.Table(
    header=dict(values=['Frequency', 'Ingredient Research List']),
    cells=dict(values=[df['frequency'], df['ingredientResearchList']])
)


layout = go.Layout(title='High Frequency Ingredient Research and Associated Products (Sorted by Frequency)')
fig = go.Figure(data=table_data, layout=layout)
fig.show()


In [None]:
from neo4j import GraphDatabase


# Define functions to create group nodes
def create_group(tx, group_name):
    tx.run("CREATE (:Group {name: $group_name})", group_name=group_name)

# Define functions to create relationships
def create_relationship(tx, product_name, group_name):
    tx.run("""
    MATCH (p:Product {name: $product_name})
    MATCH (g:Group {name: $group_name})
    CREATE (p)-[:GROUP_TO]->(g)
    """, product_name=product_name, group_name=group_name)

# Extracting grouping information from a DataFrame and saving it to a Neo4j database
with driver.session() as session:
    for group_name, products in new_df.groupby('Group'):
        # Create group nodes
        session.write_transaction(create_group, group_name)
        
        # Create relationships for each product
        for product in products['Products']:
            session.write_transaction(create_relationship, product, group_name)

In [10]:
# Define function to create group nodes
def create_group(tx, group_name):
    tx.run("MERGE (:Group {name: $group_name})", group_name=group_name)

# Define function to create relationships
def create_relationships(tx, group_name, product_names):
    query = """
        MATCH (g:Group {name: $group_name})
        UNWIND $product_names AS product_name
        MATCH (p:Product {name: product_name})
        MERGE (p)-[:GROUPS_TO]->(g)
    """
    tx.run(query, group_name=group_name, product_names=product_names)

# Extracting grouping information from the DataFrame and saving it to the Neo4j database
with driver.session() as session:
    for group_name, group_data in new_df.groupby('Group'):
        # Create group nodes
        session.write_transaction(create_group, group_name)
        
        # Create relationships for products in each group
        product_names = group_data['Products'].explode().tolist()
        session.write_transaction(create_relationships, group_name, product_names)


write_transaction has been renamed to execute_write


write_transaction has been renamed to execute_write



In [18]:
from neo4j import GraphDatabase
import pandas as pd
import networkx as nx
from community import community_louvain
import matplotlib.pyplot as plt
%matplotlib inline



def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, res.ingredient_name AS ingredientResearch
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredientResearch"]) for record in result if record["product"] is not None and record["ingredientResearch"] is not None]
    return data

with driver.session() as session:
    data = session.read_transaction(get_data)

# Constructing a two-part diagram
bipartite_graph = nx.Graph()
products = set([product for product, ingredientResearch in data])
ingredients = set([ingredient for product, ingredient in data])

bipartite_graph.add_nodes_from(products, bipartite=0)
bipartite_graph.add_nodes_from(ingredients, bipartite=1)
bipartite_graph.add_edges_from(data)

# Generate a projection of the product
product_nodes = {node for node, data in bipartite_graph.nodes(data=True) if data['bipartite'] == 0}
product_graph = nx.bipartite.projected_graph(bipartite_graph, product_nodes, multigraph=True)

# Run Louvain's algorithm for community detection
communities = community_louvain.best_partition(product_graph)

# Store the results in a DataFrame
product_groups = pd.DataFrame({'Product': list(products), 'Group': [communities[node] for node in product_graph.nodes()]})
print(product_groups)

import plotly.graph_objects as go


table_data = go.Table(
    header=dict(values=['Product', 'Group']),
    cells=dict(values=[product_groups['Product'], product_groups['Group']])
)


layout = go.Layout(title='Product Groups')


fig = go.Figure(data=table_data, layout=layout)
fig.show()





read_transaction has been renamed to execute_read



                                              Product  Group
0                              Woolworths Apple Juice      0
1                        Indomie Mi Goreng Noodle Cup      7
2     Smith's Crinkle Cut Potato Chips Salt & Vinegar      0
3          Schweppes Soda Water Bottle Classic Mixers      2
4      Schweppes Natural Mineral Water Bottle Natural      3
..                                                ...    ...
98       Golden Circle Drinks Pine Orange Fruit Drink      0
99                                Thins Chips Chicken      7
100  Schweppes Lime Soda Water With Lime Juice Bottle      2
101      Cadbury Dairy Milk Caramello Koala Chocolate      4
102                   Woolworths Tomato Sauce Squeeze      0

[103 rows x 2 columns]


In [4]:
# Grouping products by cluster and linking product names
grouped_products = product_groups.groupby('Group')['Product'].apply(lambda x: ', '.join(x)).reset_index()

# Sort products according to the group's serial number
grouped_products.sort_values(by='Group', inplace=True)

# Create a new DataFrame with each row containing a group and the corresponding products
new_product_groups = pd.DataFrame({'Group': grouped_products['Group'], 'Products': grouped_products['Product']})

# Create the table
table_data = go.Table(
    header=dict(values=['Group', 'Products']),
    cells=dict(values=[new_product_groups['Group'], new_product_groups['Products']])
)


layout = go.Layout(title='Product Groups (Products Sorted by Group Number)')


fig = go.Figure(data=table_data, layout=layout)
fig.show()


In [8]:
from neo4j import GraphDatabase
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans


def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, COLLECT(res.ingredient_name) AS ingredients
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredients"]) for record in result]
    return data

with driver.session() as session:
    data = session.read_transaction(get_data)

# Converting data to DataFrame
df = pd.DataFrame(data, columns=['Product', 'Ingredients'])

# Feature extraction and pre-processing of constituent data
tfidf_vectorizer = TfidfVectorizer()
ingredient_features = tfidf_vectorizer.fit_transform(df['Ingredients'].apply(lambda x: ', '.join(x)))

# Clustering using the K-means algorithm
num_clusters = 10 # Assuming the number of clusters is 5
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(ingredient_features)

# Get the cluster labels for each product
product_labels = kmeans.labels_

# Adding Cluster Labels to a DataFrame
df['Cluster'] = product_labels

# Grouping products based on cluster labels
grouped_products = df.groupby('Cluster')['Product'].apply(lambda x: ', '.join(x)).reset_index()

# Create a new DataFrame with each row containing a group and the corresponding products
new_product_groups = pd.DataFrame({'Cluster': grouped_products['Cluster'], 'Products': grouped_products['Product']})

# Create the table
table_data = go.Table(
    header=dict(values=['Cluster', 'Products']),
    cells=dict(values=[new_product_groups['Cluster'], new_product_groups['Products']])
)


layout = go.Layout(title='Product Clusters (Products Sorted by Cluster)')


fig = go.Figure(data=table_data, layout=layout)
fig.show()



read_transaction has been renamed to execute_read





In [23]:
from neo4j import GraphDatabase
import networkx as nx


def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, res.ingredient_name AS ingredientResearch
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredientResearch"]) for record in result if record["product"] is not None and record["ingredientResearch"] is not None]
    return data

with driver.session() as session:
    data = session.read_transaction(get_data)



G = nx.Graph()
G.add_edges_from(data)



communities = list(nx.algorithms.community.label_propagation_communities(G))


product_groups = pd.DataFrame({'Product': list(G.nodes()), 'Group': [idx for idx, com in enumerate(communities) for node in com]})
print(product_groups)

import plotly.graph_objects as go


fig = go.Figure(data=[go.Table(
    header=dict(values=['Product', 'Group']),
    cells=dict(values=[product_groups['Product'], product_groups['Group']])
)])


fig.update_layout(title='Product Groups Detected by Label Propagation')


fig.show()

# Group the product_groups DataFrame by the Group column and concatenate the product names within each group
grouped_products = product_groups.groupby('Group')['Product'].apply(lambda x: ', '.join(x)).reset_index()

# 
fig = go.Figure(data=[go.Table(
    header=dict(values=['Group', 'Products']),
    cells=dict(values=[grouped_products['Group'], grouped_products['Product']])
)])


fig.update_layout(title='Products Grouped by Group')


fig.show()




read_transaction has been renamed to execute_read



                                               Product  Group
0       Coca - Cola Classic  Soft Drink Multipack Cans      0
1                                     Carbonated Water      0
2                                                Sugar      0
3                                 Colouring Agent 150d      0
4                                      Phosphoric Acid      0
..                                                 ...    ...
406                                          Pineapple     36
407                     Dole Pineapple Chunks In Juice     37
408                                    Pineapple Juice     37
409              Clarified Pineapple Juice Concentrate     37
410  Goulburn Valley Two Fruits In Juice Fruit Cups...     37

[411 rows x 2 columns]


In [22]:
from neo4j import GraphDatabase
import networkx as nx
import pandas as pd
from community import community_louvain
import matplotlib.pyplot as plt
%matplotlib inline


def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, res.ingredient_name AS ingredientResearch
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredientResearch"]) for record in result if record["product"] is not None and record["ingredientResearch"] is not None]
    return data

with driver.session() as session:
    data = session.read_transaction(get_data)


G = nx.DiGraph()
G.add_edges_from(data)


undirected_graph = G.to_undirected()


communities = list(nx.algorithms.community.label_propagation_communities(undirected_graph))


product_groups = pd.DataFrame({'Product': list(undirected_graph.nodes()), 'Group': [idx for idx, com in enumerate(communities) for node in com]})
print(product_groups)

import plotly.graph_objects as go

table_data = go.Table(
    header=dict(values=['Product', 'Group']),
    cells=dict(values=[product_groups['Product'], product_groups['Group']])
)

layout = go.Layout(title='Product Groups')

fig = go.Figure(data=table_data, layout=layout)
fig.show()



read_transaction has been renamed to execute_read



                                               Product  Group
0       Coca - Cola Classic  Soft Drink Multipack Cans      0
1                                     Carbonated Water      0
2                                                Sugar      0
3                                 Colouring Agent 150d      0
4                                      Phosphoric Acid      0
..                                                 ...    ...
406                                          Pineapple     36
407                     Dole Pineapple Chunks In Juice     37
408                                    Pineapple Juice     37
409              Clarified Pineapple Juice Concentrate     37
410  Goulburn Valley Two Fruits In Juice Fruit Cups...     37

[411 rows x 2 columns]


In [24]:
# Extract product names from raw data
products = set(product_groups['Product'])

# Group the product_groups DataFrame by the Group column and concatenate the product names in each group
grouped_products = product_groups.groupby('Group')['Product'].apply(lambda x: ', '.join(x)).reset_index()


fig = go.Figure(data=[go.Table(
    header=dict(values=['Group', 'Products']),
    cells=dict(values=[grouped_products['Group'], grouped_products['Product']])
)])


fig.update_layout(title='Products Grouped by Group')

fig.show()


In [32]:
import networkx as nx
import community  



# Community allocation results using Louvain's algorithm
louvain_partition = community.best_partition(G)

# Community allocation results using Label Propagation algorithm
label_propagation_communities = list(nx.algorithms.community.label_propagation_communities(G))

# Construct a mapping of the communities that each node belongs to
louvain_community_mapping = {node: community for node, community in louvain_partition.items()}
label_propagation_mapping = {node: idx for idx, community in enumerate(label_propagation_communities) for node in community}

# Caculate Modularity Score
def calculate_modularity_score(partition_mapping):
    modularity_score = 0
    m = G.number_of_edges()  # total number of sides
    for community in set(partition_mapping.values()):
        # Getting nodes in the community
        community_nodes = [node for node, com in partition_mapping.items() if com == community]
        # Calculate the number of edges in the community
        internal_edges = G.subgraph(community_nodes).number_of_edges()
        # Calculate Modularity Score contributions within the community
        modularity_score += internal_edges / m - (G.subgraph(community_nodes).number_of_nodes() / (2 * m)) ** 2
    return modularity_score

# Calculate Modularity Score using Louvain's Algorithm
louvain_modularity_score = calculate_modularity_score(louvain_community_mapping)
print("Louvain Modularity Score:", louvain_modularity_score)

# Calculate Modularity Score using Label Propagation Algorithm
label_propagation_modularity_score = calculate_modularity_score(label_propagation_mapping)
print("Label Propagation Modularity Score:", label_propagation_modularity_score)


Louvain Modularity Score: 0.7019335248120844
Label Propagation Modularity Score: 0.6312799054792684


In [41]:
from neo4j import GraphDatabase 
import networkx as nx 
import pandas as pd 
from community import community_louvain 
import matplotlib.pyplot as plt 
%matplotlib inline 

def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, res.ingredient_name AS ingredientResearch
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredientResearch"]) for record in result if record["product"] is not None and record["ingredientResearch"] is not None]
    return data

with driver.session() as session: 
    data = session.read_transaction(get_data) 

G = nx.Graph() 


G.add_edges_from(data)

communities = list(nx.algorithms.community.label_propagation_communities(G)) 

product_groups = pd.DataFrame({'Product': list(G.nodes()), 'Group': [idx for idx, com in enumerate(communities) for node in com]}) 
print(product_groups) 

import plotly.graph_objects as go 

table_data = go.Table( 
    header=dict(values=['Product', 'Group']), 
    cells=dict(values=[product_groups['Product'], product_groups['Group']]) 
) 

layout = go.Layout(title='Product Groups') 
fig = go.Figure(data=table_data, layout=layout) 
fig.show()



read_transaction has been renamed to execute_read



                                               Product  Group
0       Coca - Cola Classic  Soft Drink Multipack Cans      0
1                                     Carbonated Water      0
2                                                Sugar      0
3                                 Colouring Agent 150d      0
4                                      Phosphoric Acid      0
..                                                 ...    ...
406                                          Pineapple     36
407                     Dole Pineapple Chunks In Juice     37
408                                    Pineapple Juice     37
409              Clarified Pineapple Juice Concentrate     37
410  Goulburn Valley Two Fruits In Juice Fruit Cups...     37

[411 rows x 2 columns]


In [9]:
import networkx as nx
import pandas as pd
from neo4j import GraphDatabase
import plotly.graph_objects as go


def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, res.ingredient_name AS ingredientResearch
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredientResearch"]) for record in result if record["product"] is not None and record["ingredientResearch"] is not None]
    return data


with driver.session() as session: 
    data = session.read_transaction(get_data) 


G = nx.Graph() 

# Add only the product name to the graphic
products = set() 
for record in data:
    product, ingredient_research = record
    if product not in products:
        G.add_node(product)
        products.add(product)
    G.add_edge(product, ingredient_research)

# Detecting communities using hashtag propagation algorithms
communities = list(nx.algorithms.community.label_propagation_communities(G)) 

# Convert results to DataFrame
product_groups = pd.DataFrame({'Product': list(G.nodes()), 'Group': [idx for idx, com in enumerate(communities) for node in com]}) 
print(product_groups) 

# Create a table to display product groupings
table_data = go.Table( 
    header=dict(values=['Product', 'Group']), 
    cells=dict(values=[product_groups['Product'], product_groups['Group']]) 
) 


layout = go.Layout(title='Product Groups') 
fig = go.Figure(data=table_data, layout=layout) 
fig.show()



read_transaction has been renamed to execute_read



                                               Product  Group
0       Coca - Cola Classic  Soft Drink Multipack Cans      0
1                                     Carbonated Water      0
2                                                Sugar      0
3                                 Colouring Agent 150d      0
4                                      Phosphoric Acid      0
..                                                 ...    ...
406                                          Pineapple     36
407                     Dole Pineapple Chunks In Juice     37
408                                    Pineapple Juice     37
409              Clarified Pineapple Juice Concentrate     37
410  Goulburn Valley Two Fruits In Juice Fruit Cups...     37

[411 rows x 2 columns]


In [5]:
import networkx as nx
import pandas as pd
from neo4j import GraphDatabase
import plotly.graph_objects as go


def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(r:Recipe)-[:USES_INGREDIENT]->(i:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, res.ingredient_name AS ingredientResearch
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredientResearch"]) for record in result if record["product"] is not None and record["ingredientResearch"] is not None]
    return data


with driver.session() as session: 
    data = session.read_transaction(get_data) 


G = nx.Graph() 

# Add only the product name to the graphic
products = set() 
for record in data:
    product, ingredient_research = record
    G.add_node(product)  # Add product names as nodes

# Add data to the graph, assuming all edges have a weight of 1
G.add_edges_from(data)

# Detecting communities using hashtag propagation algorithms
communities = list(nx.algorithms.community.label_propagation_communities(G)) 

# Convert results to DataFrame
product_groups = pd.DataFrame({'Product': list(G.nodes()), 'Group': [idx for idx, com in enumerate(communities) for node in com]}) 
print(product_groups) 

# Create a table to display product groupings
table_data = go.Table( 
    header=dict(values=['Product', 'Group']), 
    cells=dict(values=[product_groups['Product'], product_groups['Group']]) 
) 


layout = go.Layout(title='Product Groups') 
fig = go.Figure(data=table_data, layout=layout) 
fig.show()

# Extract product names from raw data
products = set(product_groups['Product'])

# Group the product_groups DataFrame by the Group column and concatenate the product names in each group
grouped_products = product_groups.groupby('Group')['Product'].apply(lambda x: ', '.join(x)).reset_index()


fig = go.Figure(data=[go.Table(
    header=dict(values=['Group', 'Products']),
    cells=dict(values=[grouped_products['Group'], grouped_products['Product']])
)])


fig.update_layout(title='Products Grouped by Group')

fig.show()

  data = session.read_transaction(get_data)


                                               Product  Group
0       Coca - Cola Classic  Soft Drink Multipack Cans      0
1            Coca - Cola Classic  Soft Drink Mini Cans      0
2       Coca - Cola Vanilla  Soft Drink Multipack Cans      0
3     Coca - Cola Zero Sugar Soft Drink Multipack Cans      0
4    Coca - Cola Caffeine Free Zero Sugar Soft Drin...      0
..                                                 ...    ...
406                                      Diced Peaches     36
407                                Refined Fruit Juice     37
408                                          Pineapple     37
409                                    Pineapple Juice     37
410              Clarified Pineapple Juice Concentrate     37

[411 rows x 2 columns]


In [10]:

def get_data(tx):
    query = """
        MATCH (p:Product)-[:HAS_RECIPE]->(:Recipe)-[:USES_INGREDIENT]->(:Ingredient)-[:HAS_RESEARCH]->(res:IngredientResearch)
        RETURN p.name AS product, res.ingredient_name AS ingredientResearch
    """
    result = tx.run(query)
    data = [(record["product"], record["ingredientResearch"]) for record in result if record["product"] is not None and record["ingredientResearch"] is not None]
    return data




with driver.session() as session:
    data = session.read_transaction(get_data)

# Create the bipartite graph
bipartite_graph = nx.Graph()

# Add data to a bipartite graph
products = set()  # Used to store product names to avoid duplicate additions
for record in data:
    product, ingredient_research = record
    if product not in products:
        bipartite_graph.add_node(product, bipartite='product')
        products.add(product)
    if ingredient_research not in products:
        bipartite_graph.add_node(ingredient_research, bipartite='ingredientResearch')
        products.add(ingredient_research)
    bipartite_graph.add_edge(product, ingredient_research)

# Detecting communities using modular optimisation algorithms
partition = community.best_partition(bipartite_graph)

# Convert results to DataFrame
product_groups = pd.DataFrame({'Product': list(partition.keys()), 'Group': list(partition.values())})

# Keep only the name of the product and not include the name of the ingredient study
product_groups = product_groups[product_groups['Product'].apply(lambda x: bipartite_graph.nodes[x]['bipartite'] == 'product')]

print(product_groups)

# Create a table to display product groupings
table_data = go.Table(
    header=dict(values=['Product', 'Group']),
    cells=dict(values=[product_groups['Product'], product_groups['Group']])
)


layout = go.Layout(title='Product group')
fig = go.Figure(data=table_data, layout=layout)
fig.show()

# Extract product names from raw data
products = set(product_groups['Product'])

# Group the product_groups DataFrame by the Group column and concatenate the product names in each group
grouped_products = product_groups.groupby('Group')['Product'].apply(lambda x: ', '.join(x)).reset_index()


fig = go.Figure(data=[go.Table(
    header=dict(values=['Group', 'Products']),
    cells=dict(values=[grouped_products['Group'], grouped_products['Product']])
)])


fig.update_layout(title='Products Grouped by Group' )

fig.show()


read_transaction has been renamed to execute_read



NameError: name 'community' is not defined