In [None]:
!pip install neo4j python-dotenv pandas pyvis pydantic fastapi python-multipart matplotlib pandas

In [None]:
pip install  uvicorn["standard"]

In [140]:
import os
os.environ["NEO4J_DB"]= "neo4j"
NEO4J_DB = os.getenv("NEO4J_DB")

In [188]:
from neo4j import GraphDatabase

# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "bolt://localhost:7687"
AUTH = (NEO4J_DB, "password")

with GraphDatabase.driver(URI, auth=AUTH, database=NEO4J_DB) as driver: # explicit db allows the driver to work more efficiently
        driver.verify_connectivity()
        print("Connection established.")

Connection established.


_____

## Project Activity Schema

In [142]:
from pydantic import BaseModel
from enum import Enum
from typing import Optional, Union

# can either be lead or lag
class RelationEnum(str, Enum):
    finishToStart = "finish-to-start" # a dependent activity cannot start until its predecessor finish (default)
    startToStart = "start-to-start" # a dependent activity cannot start until predencessor has started
    finishToFinish = "finish-to-finish" # a dependent activity cannot finish until its predecessor is finished
    startToFinish = "start-to-finish" # a depended activity cannot finish until its predecessor has started

class Activity(BaseModel):
    earlyStart: Optional[int] = None
    earlyFinish: Optional[int] = None
    latestStart: Optional[int] = None
    latestFinish: Optional[int] = None
    duration: Optional[int]
    name: Optional[str]
    description: Optional[str] = None
    totalFloat: Optional[int] = None
    freeFloat: Optional[int] = None
    independentFloat: Optional[int] = None
    # TODO how does this activity consume time and resources

class Predecessor(BaseModel):
    id: Union[str, int]
    earlyStart: Optional[int]
    earlyFinish: Optional[int]

class Relationship(BaseModel):
    activity: Predecessor
    type: RelationEnum = "finish-to-start"
    duration: Optional[int] = 0


## Resource Allocaion Schema
We can think of introducing geofenced cost of resources like how it is done in products. This data will be used in price analysis for BoQs

### Labour

In [36]:
class SkillEnum(str, Enum):
    plumbing = "plumbing"
    masonry = "masonry"

class RateEnum(str, Enum):
    perHour = "per-hour"
    perDay = "per-day"

class GroupEnum(str, Enum):
    example = "example"

class Job(BaseModel):
    title: str  # e.g., construction manager, resident engineer, driver, site agent etc

class Skill(BaseModel):
    type: SkillEnum # e.g, plumbing, masonry etc
    rate: RateEnum # per hour or per day
    renumeration: float
    total: int
    available: int
    
# to authenticate into the system
class Employee(BaseModel):
    name: str
    password: Optional[str]
    username: Optional[str]
    group: Optional[GroupEnum]
    job: Job

class EmployeSkill(BaseModel):
    skill: Skill
    employee: Employee


### Equiment

In [37]:
class EquipmentSourceEnum(str, Enum):
    purchase = "purchase"
    leasing = "leasing"  # for a period of time may include purchase option
    renting = "renting"  # only when it is available

class Equipment(BaseModel):
    name: str
    description: str
    source: EquipmentSourceEnum
    rate: RateEnum
    cost: float
    transportCost: float = 0

### RawMaterial
Here is where Jumba ecosystem is integrated. Depending on requirements of the project Jumba will be used as a procurement partner since it handles material sourcing and logistics.

In [38]:
class Material(BaseModel):
    name: str
    description: str # include the variance e.g., if cement what type rapid curing or normal curing
    quantity: int
    unitCost: int  # geofenced price

## Resource Allocation Logic
TODO: decide allocate resource when creating an activity or after all activities have been created
### Goal
- Is the resource available, if not what actions to perform to make it available e.g., alert user, or AI agents to create orders for material or equipment and user just confirms for them to be brought to site at specific days 
- What is the cost of allocating the resource required by the activity. ( information be used in price analysis for BoQ)
- A resouce might not meet the requirement so what is the deficit

This allocation should be on a timeline where we release resources e.g., equipment once an activity is done so that is available for the subsequent activities. Advance the resource allocation which have constraints to suggest optimal use of resources to ensure no wastage.

- Each ACTIVITY will have a corresponding ALLOCATION node (store resource allocated metadata i.e., the cost of the allocation depending on geofencing)
- An activity requests for a resouce to this allocation node by a dependency REQUIRES
- Perform DB query to get the resource
    - For materials Jumba's ecosystem will calculate the unitCost, shipping cost and constraints e.g., MoQ for the materials
    - For equipment Jumba can introduce this line of products for leasing, renting or purchasing unitCost and transportation cost
    - Labour on the contractors part a DBMS with the skills available to the contractor and cost of this labour. For manual labour consider introducing an external app like JumbaGo. We will have a labour contractor who adds the skills available to him or her.
- From this allocations will act as saving best practice (a need in civil works for documenting the best practices in implementations)

In [None]:
# check if allocation is full
"""MATCH (n {name: "Genesis"})-[requires:REQUIRES]->(allocation:ALLOCATION) 
MATCH (n)-[allocated:ALLOCATED]->(allocation:ALLOCATION)
RETURN requires.quantity as requiredQuantity, n.quantity as allocatedQuantity"""

In [None]:
# an activity requires a resource (activity is requesting a resource)
"""CREATE (activity:ACTIVITY) ... RETURN ID(activity) as activityID"""

### Allocating Labour

In [None]:
# aget the activity
"""MATCH (activity:ACTIVITY) WHERE ID(activity) = $activityID"""

# from the required resource
# Query DB for the resource (predefined skills the company has) to get the cost of this resource according to geofencing, and if available and amount available
# save it in a variable resource
# update the allocation with the metadata of the resources according to geofencing also constraints of delivering the labour

# create an allocation for the activity if the activity requires resource
"""CREATE (allocation: ALLOCATION {activityID: $activityID})"""
# create a relationship between the activity and allocation
"""(activity)-[requires:REQUIRES {type: labour, name:'plumbing', quantity: 5}]->(allocation)""" # activity expects  ... in the allocation

"""SET allocation.unitCost = $resource.unitCost, allocation.rate = resource.rate  """ # required.quantity x allocaition.unitCost = price analysis for resource

# What has been allocated depending the resource constraints e.g., available_quantity, 
# with MoQ will result to wastage (manufacturer and developer relationship to get exact quantity required)
"""CREATE (allocation) - [:ALLOCATED {quantity: 5}] -> (acitivity)"""

### Allocating material
Query DB for the materials Jumba has and its cost in terms of geofencing. If not available i.e., the cost is not available aleart someone (Jumba and User)

In [None]:
# aget the activity
"""MATCH (activity:ACTIVITY) WHERE ID(activity) = $activityID"""

# from the required resource
# Query DB for the resource (predefined resources that Jumba can deliver within their procurement) to get the cost of this resource according to geofencing, and if available and amount available
# save it in a variable resource
# update the allocation with the metadata of the resources according to geofencing also constraints of delivering the material e.g., MoQ

# create an allocation for the activity if the activity requires resource
"""CREATE (allocation: ALLOCATION {activityID: $activityID})"""
# create a relationship between the activity and allocation
"""(activity)-[requires:REQUIRES {type: material, name:cement, quantity: 5}]->(allocation)""" # activity expects  ... in the allocation

"""
    SET allocation.unitCost = $resource.unitCost, allocation.moq = resource.moq  """ # required.quantity x allocaition.unitCost = price analysis for resource

# What has been allocated depending the resource constraints e.g., available_quantity, 
# with MoQ will result to wastage (manufacturer and developer relationship to get exact quantity required)
"""CREATE (allocation) - [:ALLOCATED {quantity: 5}] -> (acitivity)"""

### Allocating equipment

In [None]:
# aget the activity
"""MATCH (activity:ACTIVITY) WHERE ID(activity) = $activityID"""

# from the required resource
# Query DB for the resource (predefined resources that Jumba can deliver within their procurement) to get the cost of this resource according to geofencing, and if available and amount available
# save it in a variable resource
# update the allocation with the metadata of the resources according to geofencing and constraints

# create an allocation for the activity if the activity requires resource
"""CREATE (allocation: ALLOCATION {activityID: $activityID})"""
# create a relationship between the activity and allocation
"""(activity)-[requires:REQUIRES {type: equiment, name:crane, quantity: 2}]->(allocation)""" # activity expects  ... in the allocation

"""SET allocation.unitCost = $resource.unitCost, allocation.rate = $allocation.rate """ # required.quantity x allocaition.unitCost = price analysis for resource

"""CREATE (allocation) - [:ALLOCATED {quantity: 1}] -> (acitivity)"""

In [144]:
def formatter(records):
    # Loop through results and do something with them
    for record in records:
        return record.data() # obtain record as dict

In [171]:
def resetGraph():
    try:
        with driver.session(database=NEO4J_DB) as session:
            session.run("""MATCH (n:ACTIVITY) SET n += {earlyStart: NULL, earlyFinish: NULL, latestStart: 0, latestFinish: 0}""")
        return
    except Exception as e:
        raise e

## Start Node

Create a start node since we are using activity on node. This will be connecting activities that do not have predecessors

In [189]:
# create the start node
def initiateProjectSchedule():
    try:
        with driver.session(database=NEO4J_DB) as session:
            results = session.run("""
                MERGE (start:START {name: $name, description: $description, duration: 0, earlyStart: 0, earlyFinish: 0, latestStart: 0, latestFinish: 0})
                RETURN start
            """,
            name = "Genesis",
            description = "This marks the start of your project no resources are consumed"
            )
        return formatter(results)
    except Exception as e:
        print(e)
initiateProjectSchedule()

  with driver.session(database=NEO4J_DB) as session:


The result has been consumed. Fetch all needed records before calling Result.consume().


## Activity Node and Edge
Create activity together with dependencies between each other or start node

In [None]:
# create node without calculating early start and early finish
"""CREATE (activity:ACTIVITY {name: $name, description: $description, duration: $duration})
                    WITH activity, $predecessors as batch
                    UNWIND batch as dependency
                    MATCH (predecessor:ACTIVITY)
                    WHERE ID(predecessor) = dependency.activity.id
                    CREATE (activity)<-[:DEPENDS_ON {type: dependency.type, duration: dependency.duration}]-(predecessor)"""

# TODO: create an activity and calculate earlyStart and early finish. User will see in real time time taken to finish project 

In [185]:
# depends on will be the ID of the predecessor activities
def createActivity(activity: Activity, predecessors: list[Relationship] = []):
    
    try:
        with driver.session(database="neo4j") as session:
            if len(predecessors):
                predecessors = [{"activity": vars(obj.activity), "type": obj.type, "duration": obj.duration} for obj in predecessors] # serialize the objects
                # activity has predecessors
                session.run("""
                    CREATE (activity:ACTIVITY {name: $name, description: $description, duration: $duration})
                    WITH activity, $predecessors as batch
                    UNWIND batch as dependency
                    MATCH (predecessor:ACTIVITY)
                    WHERE ID(predecessor) = dependency.activity.id
                    CREATE (activity)<-[:DEPENDS_ON {type: dependency.type, duration: dependency.duration}]-(predecessor)""", predecessors=predecessors, name=activity.name, description=activity.description, duration=activity.duration)

            else:
                # Activity has no predecessor start nodes
                session.run("""
                        MATCH (genesis:START {name: "Genesis"})
                        CREATE (start:ACTIVITY {name: $name, description: $description, duration: $duration, earlyStart: COALESCE(genesis.earlyFinish, 0), earlyFinish: COALESCE(genesis.earlyFinish, 0) + COALESCE($duration, 0)})
                        CREATE (start)<-[:START_ON]-(genesis)
                    """, 
                    name = activity.name,
                    description = activity.description,
                    duration = activity.duration)
    except Exception as e:
        print("ERROR >>>>>>", e)
        raise e

#### An activity without a dependency
The reason we needed a genesis node

In [190]:
# creating the first activity with no dependecy
createActivity(Activity(name="Excavation", description="It is just digging staff", duration=10))

  with driver.session(database="neo4j") as session:


In [191]:
# creating the second activity with no dependecy
createActivity(Activity(name="soil test", description="Confirming strength of soil", duration=3))

  with driver.session(database="neo4j") as session:


### Activity with more tha one predecessor

In [192]:
# has more than one predecessor
predecessors = [
    Relationship(activity=Predecessor(id = 1, earlyStart=0, earlyFinish=10), duration = 0),
    Relationship(activity=Predecessor(id = 2, earlyStart=0, earlyFinish=3), duration = 7),
]
createActivity(Activity(name="Poor foundation", description="placing foundation according to design", duration=2), predecessors=predecessors)

  with driver.session(database="neo4j") as session:


### Activity with 1 predecessor

In [193]:
# has one predecessor
predecessors = [
    Relationship(activity=Predecessor(id = 3, earlyStart=0, earlyFinish=10), duration = 0),
]
createActivity(Activity(name="Curing", description="cure for 7 days", duration=7), predecessors=predecessors)

  with driver.session(database="neo4j") as session:


### Activity with three predecessors

In [194]:
# has more than one predecessor
# predecessors = [
#     Relationship(activity=Predecessor(id = 30, earlyStart=0, earlyFinish=10), duration = 0),
#     Relationship(activity=Predecessor(id = 31, earlyStart=0, earlyFinish=10), duration = 0),
#     Relationship(activity=Predecessor(id = 3, earlyStart=0, earlyFinish=10), duration = 0),
# ]
predecessors = [
    Relationship(activity=Predecessor(id = 4, earlyStart=0, earlyFinish=10), duration = 0),
    Relationship(activity=Predecessor(id = 2, earlyStart=0, earlyFinish=10), duration = 0),
    Relationship(activity=Predecessor(id = 3, earlyStart=0, earlyFinish=10), duration = 0),
]
createActivity(Activity(name="Inspection", description="Inspect the foundation before back filling", duration=1), predecessors=predecessors)

  with driver.session(database="neo4j") as session:


## Graph Traversing

For scheduling constraints between the activities or tasks in the graph must be a Directed Acyclic Graph 

TODO: ensure in UI user always creates an asyclic graph.

There is a direction in the dependency of tasks and due to precedence among activities the graph is acyclic.

For this graph we need to have a Topological ordering such that any directed path in the graph traverses the nodes in an increasing order. 

And for this directed graph we may have more than on topological order if graph some parts are not connected.

TODO: Confirm possibility of more than 1 topological ordering of an acyclic graph

### Forward Pass
To calculate early start and early finish
There is precedence within the activities
ESj = Max(EFpredecessors)
EFj = ESj + tj

- get all activity nodes in array
- calculate topological order according to relationship DEPENDS_ON for each activity (might be expensive for large graphs)
- for each activity node get its predecessors
- value of duration of the relationship
- use formula above to get EF and ES of current activity

In [180]:
resetGraph()

  with driver.session(database=NEO4J_DB) as session:


In [196]:
def forwardPass():
    try:
        with driver.session(database=NEO4J_DB) as session:
            session.run("""
                OPTIONAL MATCH (activity)<-[:DEPENDS_ON]-(predecessor:ACTIVITY)
                WITH activity, COUNT(predecessor) AS incomingDependencies
                OPTIONAL MATCH path = (activity)-[:DEPENDS_ON*]->(dependent:ACTIVITY)
                WITH activity, incomingDependencies, COALESCE(MAX(LENGTH(path)), 0) AS depth
                ORDER BY depth DESC, incomingDependencies ASC
                MATCH (activity:ACTIVITY)                                           // retrieve all tasks in graph
                OPTIONAL MATCH (activity)<-[:DEPENDS_ON]-(predecessor:ACTIVITY)     // for each activity match predecessors if it exists
                WITH activity, COLLECT(predecessor) AS predecessors                 // group all predecessor nodes into a list
                ORDER BY activity.depth ASC                                         // order activities by their depth from topological sort
                WITH activity,
                    REDUCE(maxFinish = 0, pred IN predecessors |                    //  Iterate over each predecessor for forward pass take max of predecessors early finish 
                        CASE 
                            WHEN pred.earlyFinish IS NOT NULL AND pred.earlyFinish > maxFinish THEN pred.earlyFinish  // get max early finish of predecessors
                            ELSE maxFinish 
                        END) AS earlyStart                                          // the maxfinish in the predecessors will be the early start of current node  or 0 if no predecessors acc maxFinish = 0                     
                SET activity.earlyStart = COALESCE(earlyStart, 0),
                    activity.earlyFinish = COALESCE(earlyStart, 0) + activity.duration
                RETURN activity.name AS activityName, activity.earlyStart, activity.earlyFinish
                ORDER BY activity.depth ASC;
            """)
    except Exception as e:
        print(e)
        raise e
forwardPass()

  with driver.session(database=NEO4J_DB) as session:


### Backward Pass
To calculated latest start and latest finish

### 

### Finding critical path

In [None]:
"""MATCH (n:Person {name: 'Anna'})-[:KNOWS]-{1,5}(friend:Person WHERE n.born < friend.born)
RETURN DISTINCT friend.name AS olderConnections"""

### Plan for upcoming events

In [None]:
# 2 hops
"""MATCH (tom:Person {name:'Tom Hanks'})--{2}(colleagues:Person)
RETURN DISTINCT colleagues.name AS name, colleagues.born AS bornIn
ORDER BY bornIn
LIMIT 5"""

In [164]:
from neo4j import GraphDatabase
from pyvis.network import Network


def fetch_network_data():
    query = """
    MATCH (a:ACTIVITY)-[r:DEPENDS_ON]->(b:ACTIVITY)
    RETURN a, b, r
    """
    with driver.session() as session:
        result = session.run(query)
        return [
            (record["a"], record["b"], record["r"])
            for record in result
        ]

# Fetch and visualize
data = fetch_network_data()



In [165]:
data

[(<Node element_id='4:cde3a7fa-b1e1-4510-9a57-f1852e2d74e0:5' labels=frozenset({'ACTIVITY'}) properties={'duration': 10, 'name': 'Excavation', 'description': 'Excavation'}>,
  <Node element_id='4:cde3a7fa-b1e1-4510-9a57-f1852e2d74e0:7' labels=frozenset({'ACTIVITY'}) properties={'duration': 10, 'name': 'Poor foundation', 'description': 'after excavation'}>,
  <Relationship element_id='5:cde3a7fa-b1e1-4510-9a57-f1852e2d74e0:6917534525199220743' nodes=(<Node element_id='4:cde3a7fa-b1e1-4510-9a57-f1852e2d74e0:5' labels=frozenset({'ACTIVITY'}) properties={'duration': 10, 'name': 'Excavation', 'description': 'Excavation'}>, <Node element_id='4:cde3a7fa-b1e1-4510-9a57-f1852e2d74e0:7' labels=frozenset({'ACTIVITY'}) properties={'duration': 10, 'name': 'Poor foundation', 'description': 'after excavation'}>) type='DEPENDS_ON' properties={'duration': 2, 'type': 'finish-to-start'}>),
 (<Node element_id='4:cde3a7fa-b1e1-4510-9a57-f1852e2d74e0:6' labels=frozenset({'ACTIVITY'}) properties={'duration':

In [166]:
def visualize_network(data):
    from pyvis.network import Network

    def safe_id(node_id):
        """Ensure node IDs are valid types for pyvis."""
        if not isinstance(node_id, (str, int)):
            return str(node_id)
        return node_id

    # Initialize the network
    net = Network(height="750px", width="100%", directed=True)

    if not data:
        raise ValueError("No data provided to visualize the network.")

    # Process data
    for a, b, r in data:
        if a and b and r:
            # Add nodes with safe IDs
            net.add_node(safe_id(a["id"]), label=a["name"], title=f"Duration: {a.get('duration', 'N/A')}")
            net.add_node(safe_id(b["id"]), label=b["name"], title=f"Duration: {b.get('duration', 'N/A')}")
            # Add edge
            net.add_edge(
                safe_id(a["id"]),
                safe_id(b["id"]),
                title=f"{r.get('type', 'FS')} (Lead: {r.get('lead', 0)}, Lag: {r.get('lag', 0)})"
            )
        else:
            print("Skipping incomplete data:", a, b, r)

    # Debugging
    print(f"Nodes: {len(net.get_nodes())}")
    print(f"Edges: {len(net.get_edges())}")

    if not net.get_nodes():
        raise ValueError("No nodes were added to the graph. Verify input data.")
    if not net.get_edges():
        raise ValueError("No edges were added to the graph. Verify relationships.")

    # Save and show the network
    net.write_html("project_network.html")
    print("Network visualization saved as 'project_network.html'")


In [167]:
visualize_network(data)

Nodes: 1
Edges: 3
Network visualization saved as 'project_network.html'
