In [1]:
import pandas as pd
from neo4j import GraphDatabase
from config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

def run_query(query, params={}):
    with driver.session() as session:
        result = session.run(query, params)
        return pd.DataFrame([r.values() for r in result], columns=result.keys())

# 1. Updating nodes

In [None]:
# Clear the entire database
run_query("MATCH (n) DETACH DELETE n")

Database cleared.


In [None]:
# Delete a single node and its relationships based on a property value (e.g., SoilCondition_ID or name)
run_query("""
    MATCH (t:TraitValue {name: $name})
    DETACH DELETE t
    """, name="spring")

In [None]:
# clear property from all matching nodes
run_query("""
          MATCH (n: WeedingStrategy|Plant|PlantTrait|PlantTraitValue|SoilProperty|SoilCondition)
          WHERE n.louvain IS NOT NULL
          REMOVE n.louvain
        """)

# 2. Logical Knowledge

In [None]:
result = run_query("""
    MATCH (ptv:PlantTraitValue)
    OPTIONAL MATCH (p_all:Plant)-[:hasTrait]->(ptv)-[:isInstanceOf]->(pt:PlantTrait)
    WITH pt, ptv, count(p_all) AS total_plants
    WHERE total_plants > 2 AND NOT pt.name IN ['HeightMax']
    MATCH (ws:WeedingStrategy)-[:isEffectiveAgainst]->(p:Plant)-[:hasTrait]->(ptv)
    WITH ws, ptv, total_plants, count(DISTINCT p) AS effective_plants
    WHERE toFloat(effective_plants) / total_plants >= 0.8
    MERGE (ws)-[:targetsTrait]->(ptv)
    RETURN ws.name, ptv.valueName, effective_plants, total_plants
    """)
print(result.data())

[{'ws.name': 'harrow tines', 'ptv.valueName': 40, 'effective_plants': 4, 'total_plants': 4}, {'ws.name': 'crop rotation', 'ptv.valueName': 40, 'effective_plants': 4, 'total_plants': 4}, {'ws.name': 'thermal control', 'ptv.valueName': 'herbaceous', 'effective_plants': 9, 'total_plants': 11}, {'ws.name': 'cultivator', 'ptv.valueName': 'climbing', 'effective_plants': 3, 'total_plants': 3}, {'ws.name': 'cultivator', 'ptv.valueName': 'cordate_root', 'effective_plants': 5, 'total_plants': 5}, {'ws.name': 'shallow cultivation', 'ptv.valueName': 'cordate_root', 'effective_plants': 4, 'total_plants': 5}, {'ws.name': 'turning tillage', 'ptv.valueName': 'all_temp_germination', 'effective_plants': 4, 'total_plants': 5}, {'ws.name': 'cultivator', 'ptv.valueName': 6, 'effective_plants': 4, 'total_plants': 4}, {'ws.name': 'crop rotation', 'ptv.valueName': 6, 'effective_plants': 4, 'total_plants': 4}, {'ws.name': 'crop rotation', 'ptv.valueName': 10, 'effective_plants': 4, 'total_plants': 4}, {'ws.nam

In [None]:
result = run_query("""
    MATCH (p:Plant)-[:isCultivatedAs]->(ctv:CultivationTraitValue)
    WITH p, count(ctv) AS num_cultivation_traits
    WHERE num_cultivation_traits > 0
    SET p.isCrop = true
    RETURN p.commonName, p.isCrop
    """)
print(result.data())

[{'p.commonName': 'Spelt', 'p.isCrop': True}, {'p.commonName': 'Emmer', 'p.isCrop': True}, {'p.commonName': 'Einkorn', 'p.isCrop': True}, {'p.commonName': 'Barley', 'p.isCrop': True}, {'p.commonName': 'Wheat', 'p.isCrop': True}]


In [None]:
run_query("""
          MATCH (ws: WeedingStrategy)-[r:isEffectiveAgainst]->(p:Plant)
          SET r.confidence = toInteger(100) """)

# 3. Link Prediction with Knowledge Graph Embeddings

In [11]:
# graph with context
run_query("""
CALL gds.graph.project('context_graph', 
['WeedingStrategy', 'Plant', 'PlantTrait', 'PlantTraitValue', 'SoilProperty', 'SoilCondition'], 
{
  isEffectiveAgainst: {orientation: 'UNDIRECTED', properties: 'confidence'},
  hasTrait: {orientation: 'UNDIRECTED'},
  prefers: {orientation: 'UNDIRECTED'},
  isInstanceOf: {orientation: 'UNDIRECTED'},
  targetsTrait: {orientation: 'UNDIRECTED'}  // from logical knowledge
})""")

# focused graph
run_query("""
CALL gds.graph.project('focused_graph', 
['WeedingStrategy', 'Plant'], 
{isEffectiveAgainst: {orientation: 'UNDIRECTED', properties: 'confidence'}})
""")

Unnamed: 0,nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
0,"{'Plant': {'label': 'Plant', 'properties': {}}...",{'isEffectiveAgainst': {'aggregation': 'DEFAUL...,focused_graph,66,238,42


In [12]:
run_query("""
CALL gds.fastRP.write('context_graph', {
  writeProperty:'context_fastRP_embedding', 
  embeddingDimension: 64,
  iterationWeights: [1.0, 1.0, 1.0, 1.0]
})""")

run_query("""
CALL gds.fastRP.write('focused_graph', {
  writeProperty:'focused_fastRP_embedding', 
  embeddingDimension: 64,
  iterationWeights: [1.0, 1.0, 1.0, 1.0]
})""")

run_query("""
CALL gds.node2vec.write('context_graph', {
  writeProperty: 'context_node2vec_embedding',
  embeddingDimension: 64,
  walkLength: 80,
  walksPerNode: 10
})""")

run_query("""
CALL gds.node2vec.write('focused_graph', {
  writeProperty: 'focused_node2vec_embedding',
  embeddingDimension: 64,
  walkLength: 80,
  walksPerNode: 10
})""")

Unnamed: 0,nodeCount,nodePropertiesWritten,preProcessingMillis,computeMillis,writeMillis,configuration,lossPerIteration
0,66,66,0,90,19,{'writeProperty': 'focused_node2vec_embedding'...,[183802.77147366334]


In [None]:
# Use your existing embeddings for link prediction via cosine similarity
result_context_fastRP = run_query("""
MATCH (ws:WeedingStrategy), (p:Plant)
WHERE ws.context_fastRP_embedding IS NOT NULL 
AND p.context_fastRP_embedding IS NOT NULL
AND NOT EXISTS((ws)-[:isEffectiveAgainst]-(p))  // Only predict new links
WITH ws, p, gds.similarity.cosine(ws.context_fastRP_embedding, p.context_fastRP_embedding) AS similarity
WHERE similarity > 0.7  // Threshold for similarity
RETURN ws.name as strategy, p.scientificName as plant, similarity
ORDER BY similarity DESC
LIMIT 20
""")

result_focused_fastRP = run_query("""
MATCH (ws:WeedingStrategy), (p:Plant)
WHERE ws.focused_fastRP_embedding IS NOT NULL 
AND p.focused_fastRP_embedding IS NOT NULL
AND NOT EXISTS((ws)-[:isEffectiveAgainst]-(p))  // Only predict new links
WITH ws, p, gds.similarity.cosine(ws.focused_fastRP_embedding, p.focused_fastRP_embedding) AS similarity
WHERE similarity > 0.7  // Threshold for similarity
RETURN ws.name as strategy, p.scientificName as plant, similarity
ORDER BY similarity DESC
LIMIT 20
""")

result_context_node2vec = run_query("""
MATCH (ws:WeedingStrategy), (p:Plant)
WHERE ws.context_node2vec_embedding IS NOT NULL 
AND p.context_node2vec_embedding IS NOT NULL
AND NOT EXISTS((ws)-[:isEffectiveAgainst]-(p))  // Only predict new links
WITH ws, p, gds.similarity.cosine(ws.context_node2vec_embedding, p.context_node2vec_embedding) AS similarity
WHERE similarity > 0.7  // Threshold for similarity
RETURN ws.name as strategy, p.scientificName as plant, similarity
ORDER BY similarity DESC
LIMIT 20
""")

result_focused_node2vec = run_query("""
MATCH (ws:WeedingStrategy), (p:Plant)
WHERE ws.focused_node2vec_embedding IS NOT NULL
AND p.focused_node2vec_embedding IS NOT NULL
AND NOT EXISTS((ws)-[:isEffectiveAgainst]-(p))  // Only predict new links
WITH ws, p, gds.similarity.cosine(ws.focused_node2vec_embedding, p.focused_node2vec_embedding) AS similarity
WHERE similarity > 0.7  // Threshold for similarity
RETURN ws.name as strategy, p.scientificName as plant, similarity
ORDER BY similarity DESC
LIMIT 20
""")



In [21]:
all_results = result_context_fastRP\
    .merge(result_focused_fastRP, on=['strategy', 'plant'], how='outer', suffixes=('_context_fastRP', '_focused_fastRP'))\
    .merge(result_context_node2vec, on=['strategy', 'plant'], how='outer', suffixes=('', '_context_node2vec'))\
    .merge(result_focused_node2vec, on=['strategy', 'plant'], how='outer', suffixes=('', '_focused_node2vec'))
    
all_results['count'] = all_results.count(axis=1) - 2
all_results['max_similarity'] = all_results[['similarity_context_fastRP', 'similarity_focused_fastRP', 'similarity', 'similarity_focused_node2vec']].max(axis=1)
all_results.fillna(0, inplace=True)
all_results = all_results.sort_values(by=['count', 'max_similarity'], ascending=False)

print(all_results)

               strategy                      plant  similarity_context_fastRP  \
16           cultivator      Digitaria sanguinalis                   0.854074   
19           cultivator         Portulaca oleracea                   0.813703   
14        crop rotation             Galium aparine                   0.805922   
22         harrow tines     Amaranthus retroflexus                   0.835194   
53      thermal control         Portulaca oleracea                   0.806796   
33            row width        Polygonum aviculare                   0.000000   
48    stubble treatment         Rumex obtusifolius                   0.000000   
25            row width            Anthemis cotula                   0.000000   
47    stubble treatment              Rumex crispus                   0.000000   
38         seeding rate            Stellaria media                   0.000000   
32            row width       Matricaria discoidea                   0.000000   
13        crop rotation     

In [23]:
all_results = all_results[all_results['count'] >= 2]
relationships_data = all_results[['strategy', 'plant', 'max_similarity']].to_dict('records')

run_query("""
UNWIND $relationships AS rel
MATCH (ws:WeedingStrategy {name: rel.strategy})
MATCH (p:Plant {scientificName: rel.plant})
MERGE (ws)-[r:predictedEffectiveAgainst]->(p)
SET r.confidence = rel.max_similarity
RETURN count(r) as relationships_created
""", {'relationships': relationships_data})

print("Batch insert completed")


Batch insert completed
