# ADAM Optimizer

A common optimizer used in machine learning is ADAM. We have created the components for it in a separate file of ADAM.json and will use that to showcase it here.

In [1]:
import sys
#sys.path.append("../../pybdp")
#from src import pybdp
import pybdp
from IPython.display import Markdown
from pprint import pprint

# Start with an empty project
project = pybdp.create_empty_project()

## High Level

At a high level, the ADAM algorithm initializes some optimization parameters and then loops updating theta, the paremeters we are optimizing for.

In [2]:
# Add the spaces
project.add_space(id = "theta",
                  name = "theta",
                  description = "The model parameters")

# Add the block
project.add_block(id="Parameter Optimization Block",
                  name="Parameter Optimization Block",
                  description="The block for parameter optimization",
                  domain=["theta"],
                  codomain=["theta"],)

# Add the processor
project.add_processor(id="ADAM",
                      name="ADAM",
                      description="The ADAM optimization algorithm",
                      parent_id="Parameter Optimization Block",)


processor = project.processors_map["ADAM"]
processor.display_mermaid_graphic()
print("Ports:")
print(processor.ports)
print("Terminals:")
print(processor.terminals)

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph G0[ADAM - Parameter Optimization Block Block]
direction LR
X0[ADAM]
subgraph G0P[Ports]
direction TB
XX0P0[theta]
end
XX0P0[theta] o--o X0
subgraph G0T[Terminals]
direction TB
XX0T0[theta]
end
X0 o--o XX0T0[theta]
end

```

Ports:
[< Space ID: theta Name: theta >]
Terminals:
[< Space ID: theta Name: theta >]


## Initialization and Updating

The next step is to break down what happens in our subsystem. There are two components that run sequentially:

1. Initializing the variables for ADAM
2. Running the update loop

We add them below as a subsystem, first creating the system itself.

In [3]:
# Add the spaces
project.add_space(id = "m",
                  name = "m",
                  description = "First moment vector")

project.add_space(id = "v",
                  name = "v",
                  description = "Second moment vector")

project.add_space(id = "t",
                  name = "t",
                  description = "The current timestep")


# Add the block
project.add_block(id="Parameter Initialization",
                  name="Parameter Initialization",
                  description="The block for parameter initialization",
                  domain=["theta"],
                  codomain=["theta", "m", "v", "t"],)

project.add_block(id="Optimization Update Loop",
                  name="Optimization Update Loop",
                  description="The block for the update loop of optimization",
                  domain=["theta", "m", "v", "t"],
                  codomain=["theta"],)



# Add the processor
project.add_processor(id="ADAM Initialization",
                      name="ADAM Initialization",
                      description="Initialiazes the ADAM state variables",
                      parent_id="Parameter Initialization")
project.add_processor(id="ADAM Update Loop",
                      name="ADAM Update Loop",
                      description="Loops through the ADAM updates",
                      parent_id="Optimization Update Loop")
pprint(project.processors_map["ADAM Initialization"].find_potential_wires(project.processors_map["ADAM Update Loop"]))

{'Ports': [{'Parent': 'theta',
            'Source': {'Index': 0, 'Processor': 'ADAM Update Loop'},
            'Target': {'Index': 0, 'Processor': 'ADAM Initialization'}}],
 'Terminals': [{'Parent': 'theta',
                'Source': {'Index': 0, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 0, 'Processor': 'ADAM Update Loop'}},
               {'Parent': 'm',
                'Source': {'Index': 1, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 1, 'Processor': 'ADAM Update Loop'}},
               {'Parent': 'v',
                'Source': {'Index': 2, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 2, 'Processor': 'ADAM Update Loop'}},
               {'Parent': 't',
                'Source': {'Index': 3, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 3, 'Processor': 'ADAM Update Loop'}}]}


In [4]:
project.add_wires([{'Parent': 'theta',
                'Source': {'Index': 0, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 0, 'Processor': 'ADAM Update Loop'}},
               {'Parent': 'm',
                'Source': {'Index': 1, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 1, 'Processor': 'ADAM Update Loop'}},
               {'Parent': 'v',
                'Source': {'Index': 2, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 2, 'Processor': 'ADAM Update Loop'}},
               {'Parent': 't',
                'Source': {'Index': 3, 'Processor': 'ADAM Initialization'},
                'Target': {'Index': 3, 'Processor': 'ADAM Update Loop'}}],
                auto_increment=True)

project.add_system(id="ADAM System",
                   name="ADAM System",
                   processors=["ADAM Initialization", "ADAM Update Loop"],
                   wires=["W1", "W2", "W3", "W4"],
                     description="The system representing the ADAM algorithm",)


processor = project.systems_map["ADAM System"]
processor.display_mermaid_graphic()

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph GS0[ADAM System]
subgraph G0[ADAM Initialization - Parameter Initialization Block]
direction LR
X0[ADAM Initialization]
subgraph G0P[Ports]
direction TB
XX0P0[theta]
end
XX0P0[theta] o--o X0
subgraph G0T[Terminals]
direction TB
XX0T0[theta]
XX0T1[m]
XX0T2[v]
XX0T3[t]
end
X0 o--o XX0T0[theta]
X0 o--o XX0T1[m]
X0 o--o XX0T2[v]
X0 o--o XX0T3[t]
end
subgraph G1[ADAM Update Loop - Optimization Update Loop Block]
direction LR
X1[ADAM Update Loop]
subgraph G1P[Ports]
direction TB
XX1P0[theta]
XX1P1[m]
XX1P2[v]
XX1P3[t]
end
XX1P0[theta] o--o X1
XX1P1[m] o--o X1
XX1P2[v] o--o X1
XX1P3[t] o--o X1
subgraph G1T[Terminals]
direction TB
XX1T0[theta]
end
X1 o--o XX1T0[theta]
end
XX0T0[theta] ---> XX1P0[theta]
XX0T1[m] ---> XX1P1[m]
XX0T2[v] ---> XX1P2[v]
XX0T3[t] ---> XX1P3[t]
end

```

## Attatch Subsystem

In [5]:
possible = project.processors_map["ADAM"].find_potential_subsystems_mappings(project.systems_map["ADAM System"])
pprint(possible)

{'Port Mappings': [[{'Index': 0, 'Processor': 'ADAM Initialization'}]],
 'Terminal Mappings': [[{'Index': 0, 'Processor': 'ADAM Initialization'},
                        {'Index': 0, 'Processor': 'ADAM Update Loop'}]]}


In [6]:
port_mappings = [x[0] for x in possible["Port Mappings"]]
terminal_mappings = [x[1] for x in possible["Terminal Mappings"]]
print("Port Mappings: ", port_mappings)
print("Terminal Mappings: ", terminal_mappings)

project.attach_subsystem(project.processors_map["ADAM"],
                          project.systems_map["ADAM System"],
                          port_mappings,
                          terminal_mappings)
processor = project.processors_map["ADAM"]
processor.display_mermaid_graphic()
processor.display_mermaid_graphic(composite=True)

Port Mappings:  [{'Processor': 'ADAM Initialization', 'Index': 0}]
Terminal Mappings:  [{'Processor': 'ADAM Update Loop', 'Index': 0}]


```mermaid
---
config:
    layout: elk
---
graph LR
subgraph G0[ADAM - Parameter Optimization Block Block]
direction LR
X0[ADAM]
subgraph G0P[Ports]
direction TB
XX0P0[theta]
end
XX0P0[theta] o--o X0
subgraph G0T[Terminals]
direction TB
XX0T0[theta]
end
X0 o--o XX0T0[theta]
end

```

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph GC0[ADAM - Parameter Optimization Block Block]
direction LR
subgraph GS0[ADAM System]
subgraph G1[ADAM Initialization - Parameter Initialization Block]
direction LR
X1[ADAM Initialization]
subgraph G1P[Ports]
direction TB
XX1P0[theta]
end
XX1P0[theta] o--o X1
subgraph G1T[Terminals]
direction TB
XX1T0[theta]
XX1T1[m]
XX1T2[v]
XX1T3[t]
end
X1 o--o XX1T0[theta]
X1 o--o XX1T1[m]
X1 o--o XX1T2[v]
X1 o--o XX1T3[t]
end
subgraph G2[ADAM Update Loop - Optimization Update Loop Block]
direction LR
X2[ADAM Update Loop]
subgraph G2P[Ports]
direction TB
XX2P0[theta]
XX2P1[m]
XX2P2[v]
XX2P3[t]
end
XX2P0[theta] o--o X2
XX2P1[m] o--o X2
XX2P2[v] o--o X2
XX2P3[t] o--o X2
subgraph G2T[Terminals]
direction TB
XX2T0[theta]
end
X2 o--o XX2T0[theta]
end
XX1T0[theta] ---> XX2P0[theta]
XX1T1[m] ---> XX2P1[m]
XX1T2[v] ---> XX2P2[v]
XX1T3[t] ---> XX2P3[t]
end
subgraph GC0P[Ports]
direction TB
X1P0[theta]
end
X1P0[theta] --> XX1P0[theta]
subgraph GC0T[Terminals]
direction TB
X1T0[theta]
end
XX2T0[theta] --> X1T0[theta]
end

```

## Update Loop

Now we begin work on the update loop, we will have three components of it:

1. A switch which can take in spaces from either outside the processor or inside the processor
2. A convergance criteria which determines when the loop ends
3. An update step which specifies how parameters are updated

In [7]:
# Add the block
project.add_block(id="Optimization Loop Switch",
                  name="Optimization Loop Switch",
                  description="The switch for the optimization loop",
                  domain=["m", "v", "t", "theta", "m", "v", "t", "theta"],
                  codomain=["m", "v", "t", "theta"],)

project.add_block(id="Convergence Criteria",
                  name="Convergence Criteria",
                  description="Evaluates whether the optimization process has converged based on moments, timestep, and parameters",
                  domain=["m", "v", "t", "theta"],
                  codomain=["m", "v", "t", "theta"],)

project.add_block(id="Optimization Step",
                  name="Optimization Step",
                  description="One step in the optimization process",
                  domain=["theta",  "m", "v", "t"],
                  codomain=["theta",  "m", "v", "t"],)



# Add the processor
project.add_processor(id="Theta Convergence Criteria",
                      name="Theta Convergence Criteria",
                      description="Convergence criteria based only on theta",
                      parent_id="Convergence Criteria")
project.add_processor(id="ADAM Update Step",
                      name="ADAM Update Step",
                      description="One update step in the ADAM algorithm",
                      parent_id="Optimization Step")
project.add_processor(id="ADAM Switch",
                      name="ADAM Switch",
                      description="The switch for the ADAM optimization algorithm",
                      parent_id="Optimization Loop Switch")


for p1, p2 in zip(["ADAM Switch",
                   "Theta Convergence Criteria",
                   "ADAM Update Step"],
                   ["Theta Convergence Criteria",
                   "ADAM Update Step",
                   "ADAM Switch"]):
    print("Potential wires between processors: {} and {}".format(p1, p2))
    pprint(project.processors_map[p1].find_potential_wires(project.processors_map[p2]))
    print()
    print()

Potential wires between processors: ADAM Switch and Theta Convergence Criteria
{'Ports': [{'Parent': 'm',
            'Source': {'Index': 0, 'Processor': 'Theta Convergence Criteria'},
            'Target': {'Index': 0, 'Processor': 'ADAM Switch'}},
           {'Parent': 'm',
            'Source': {'Index': 0, 'Processor': 'Theta Convergence Criteria'},
            'Target': {'Index': 4, 'Processor': 'ADAM Switch'}},
           {'Parent': 'v',
            'Source': {'Index': 1, 'Processor': 'Theta Convergence Criteria'},
            'Target': {'Index': 1, 'Processor': 'ADAM Switch'}},
           {'Parent': 'v',
            'Source': {'Index': 1, 'Processor': 'Theta Convergence Criteria'},
            'Target': {'Index': 5, 'Processor': 'ADAM Switch'}},
           {'Parent': 't',
            'Source': {'Index': 2, 'Processor': 'Theta Convergence Criteria'},
            'Target': {'Index': 2, 'Processor': 'ADAM Switch'}},
           {'Parent': 't',
            'Source': {'Index': 2, 'Pro

In [8]:
project.add_wires([{'Parent': 'm',
                'Source': {'Index': 0, 'Processor': 'ADAM Switch'},
                'Target': {'Index': 0,
                           'Processor': 'Theta Convergence Criteria'}},
               {'Parent': 'v',
                'Source': {'Index': 1, 'Processor': 'ADAM Switch'},
                'Target': {'Index': 1,
                           'Processor': 'Theta Convergence Criteria'}},
               {'Parent': 't',
                'Source': {'Index': 2, 'Processor': 'ADAM Switch'},
                'Target': {'Index': 2,
                           'Processor': 'Theta Convergence Criteria'}},
               {'Parent': 'theta',
                'Source': {'Index': 3, 'Processor': 'ADAM Switch'},
                'Target': {'Index': 3,
                           'Processor': 'Theta Convergence Criteria'}},
{'Parent': 'theta',
                'Source': {'Index': 3,
                           'Processor': 'Theta Convergence Criteria'},
                'Target': {'Index': 0, 'Processor': 'ADAM Update Step'}},
               {'Parent': 'm',
                'Source': {'Index': 0,
                           'Processor': 'Theta Convergence Criteria'},
                'Target': {'Index': 1, 'Processor': 'ADAM Update Step'}},
               {'Parent': 'v',
                'Source': {'Index': 1,
                           'Processor': 'Theta Convergence Criteria'},
                'Target': {'Index': 2, 'Processor': 'ADAM Update Step'}},
               {'Parent': 't',
                'Source': {'Index': 2,
                           'Processor': 'Theta Convergence Criteria'},
                'Target': {'Index': 3, 'Processor': 'ADAM Update Step'}},
                {'Parent': 'm',
                'Source': {'Index': 1, 'Processor': 'ADAM Update Step'},
                'Target': {'Index': 4, 'Processor': 'ADAM Switch'}},
               {'Parent': 'v',
                'Source': {'Index': 2, 'Processor': 'ADAM Update Step'},
                'Target': {'Index': 5, 'Processor': 'ADAM Switch'}},
               {'Parent': 't',
                'Source': {'Index': 3, 'Processor': 'ADAM Update Step'},
                'Target': {'Index': 6, 'Processor': 'ADAM Switch'}},
               {'Parent': 'theta',
                'Source': {'Index': 0, 'Processor': 'ADAM Update Step'},
                'Target': {'Index': 7, 'Processor': 'ADAM Switch'}}],
                auto_increment=True)


project.add_system(id="ADAM Optimization Loop System",
                   name="ADAM Optimization Loop System",
                   processors=["ADAM Switch", "ADAM Update Step", "Theta Convergence Criteria"],
                   wires=[x.id for x in project.wires][4:],
                     description="The system representing the ADAM optimization loop with convergence checking",)


processor = project.systems_map["ADAM Optimization Loop System"]
processor.display_mermaid_graphic()


possible = project.processors_map["ADAM Update Loop"].find_potential_subsystems_mappings(project.systems_map["ADAM Optimization Loop System"])
pprint(possible)

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph GS0[ADAM Optimization Loop System]
subgraph G0[ADAM Switch - Optimization Loop Switch Block]
direction LR
X0[ADAM Switch]
subgraph G0P[Ports]
direction TB
XX0P0[m]
XX0P1[v]
XX0P2[t]
XX0P3[theta]
XX0P4[m]
XX0P5[v]
XX0P6[t]
XX0P7[theta]
end
XX0P0[m] o--o X0
XX0P1[v] o--o X0
XX0P2[t] o--o X0
XX0P3[theta] o--o X0
XX0P4[m] o--o X0
XX0P5[v] o--o X0
XX0P6[t] o--o X0
XX0P7[theta] o--o X0
subgraph G0T[Terminals]
direction TB
XX0T0[m]
XX0T1[v]
XX0T2[t]
XX0T3[theta]
end
X0 o--o XX0T0[m]
X0 o--o XX0T1[v]
X0 o--o XX0T2[t]
X0 o--o XX0T3[theta]
end
subgraph G1[ADAM Update Step - Optimization Step Block]
direction LR
X1[ADAM Update Step]
subgraph G1P[Ports]
direction TB
XX1P0[theta]
XX1P1[m]
XX1P2[v]
XX1P3[t]
end
XX1P0[theta] o--o X1
XX1P1[m] o--o X1
XX1P2[v] o--o X1
XX1P3[t] o--o X1
subgraph G1T[Terminals]
direction TB
XX1T0[theta]
XX1T1[m]
XX1T2[v]
XX1T3[t]
end
X1 o--o XX1T0[theta]
X1 o--o XX1T1[m]
X1 o--o XX1T2[v]
X1 o--o XX1T3[t]
end
subgraph G2[Theta Convergence Criteria - Convergence Criteria Block]
direction LR
X2[Theta Convergence Criteria]
subgraph G2P[Ports]
direction TB
XX2P0[m]
XX2P1[v]
XX2P2[t]
XX2P3[theta]
end
XX2P0[m] o--o X2
XX2P1[v] o--o X2
XX2P2[t] o--o X2
XX2P3[theta] o--o X2
subgraph G2T[Terminals]
direction TB
XX2T0[m]
XX2T1[v]
XX2T2[t]
XX2T3[theta]
end
X2 o--o XX2T0[m]
X2 o--o XX2T1[v]
X2 o--o XX2T2[t]
X2 o--o XX2T3[theta]
end
XX0T0[m] ---> XX2P0[m]
XX0T1[v] ---> XX2P1[v]
XX0T2[t] ---> XX2P2[t]
XX0T3[theta] ---> XX2P3[theta]
XX2T3[theta] ---> XX1P0[theta]
XX2T0[m] ---> XX1P1[m]
XX2T1[v] ---> XX1P2[v]
XX2T2[t] ---> XX1P3[t]
XX1T1[m] ---> XX0P4[m]
XX1T2[v] ---> XX0P5[v]
XX1T3[t] ---> XX0P6[t]
XX1T0[theta] ---> XX0P7[theta]
end

```

{'Port Mappings': [[{'Index': 3, 'Processor': 'ADAM Switch'}],
                   [{'Index': 0, 'Processor': 'ADAM Switch'}],
                   [{'Index': 1, 'Processor': 'ADAM Switch'}],
                   [{'Index': 2, 'Processor': 'ADAM Switch'}]],
 'Terminal Mappings': [[{'Index': 3, 'Processor': 'ADAM Switch'},
                        {'Index': 0, 'Processor': 'ADAM Update Step'},
                        {'Index': 3,
                         'Processor': 'Theta Convergence Criteria'}]]}


In [9]:
port_mappings = [{'Index': 3, 'Processor': 'ADAM Switch'},
                 {'Index': 0, 'Processor': 'ADAM Switch'},
                 {'Index': 1, 'Processor': 'ADAM Switch'},
                 {'Index': 2, 'Processor': 'ADAM Switch'}
                 ]
terminal_mappings = [{'Index': 3,
                         'Processor': 'Theta Convergence Criteria'}]

project.attach_subsystem(project.processors_map["ADAM Update Loop"],
                          project.systems_map["ADAM Optimization Loop System"],
                          port_mappings,
                          terminal_mappings)
processor = project.processors_map["ADAM Update Loop"]
processor.display_mermaid_graphic()
processor.display_mermaid_graphic(composite=True)

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph G0[ADAM Update Loop - Optimization Update Loop Block]
direction LR
X0[ADAM Update Loop]
subgraph G0P[Ports]
direction TB
XX0P0[theta]
XX0P1[m]
XX0P2[v]
XX0P3[t]
end
XX0P0[theta] o--o X0
XX0P1[m] o--o X0
XX0P2[v] o--o X0
XX0P3[t] o--o X0
subgraph G0T[Terminals]
direction TB
XX0T0[theta]
end
X0 o--o XX0T0[theta]
end

```

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph GC0[ADAM Update Loop - Optimization Update Loop Block]
direction LR
subgraph GS0[ADAM Optimization Loop System]
subgraph G1[ADAM Switch - Optimization Loop Switch Block]
direction LR
X1[ADAM Switch]
subgraph G1P[Ports]
direction TB
XX1P0[m]
XX1P1[v]
XX1P2[t]
XX1P3[theta]
XX1P4[m]
XX1P5[v]
XX1P6[t]
XX1P7[theta]
end
XX1P0[m] o--o X1
XX1P1[v] o--o X1
XX1P2[t] o--o X1
XX1P3[theta] o--o X1
XX1P4[m] o--o X1
XX1P5[v] o--o X1
XX1P6[t] o--o X1
XX1P7[theta] o--o X1
subgraph G1T[Terminals]
direction TB
XX1T0[m]
XX1T1[v]
XX1T2[t]
XX1T3[theta]
end
X1 o--o XX1T0[m]
X1 o--o XX1T1[v]
X1 o--o XX1T2[t]
X1 o--o XX1T3[theta]
end
subgraph G2[ADAM Update Step - Optimization Step Block]
direction LR
X2[ADAM Update Step]
subgraph G2P[Ports]
direction TB
XX2P0[theta]
XX2P1[m]
XX2P2[v]
XX2P3[t]
end
XX2P0[theta] o--o X2
XX2P1[m] o--o X2
XX2P2[v] o--o X2
XX2P3[t] o--o X2
subgraph G2T[Terminals]
direction TB
XX2T0[theta]
XX2T1[m]
XX2T2[v]
XX2T3[t]
end
X2 o--o XX2T0[theta]
X2 o--o XX2T1[m]
X2 o--o XX2T2[v]
X2 o--o XX2T3[t]
end
subgraph G3[Theta Convergence Criteria - Convergence Criteria Block]
direction LR
X3[Theta Convergence Criteria]
subgraph G3P[Ports]
direction TB
XX3P0[m]
XX3P1[v]
XX3P2[t]
XX3P3[theta]
end
XX3P0[m] o--o X3
XX3P1[v] o--o X3
XX3P2[t] o--o X3
XX3P3[theta] o--o X3
subgraph G3T[Terminals]
direction TB
XX3T0[m]
XX3T1[v]
XX3T2[t]
XX3T3[theta]
end
X3 o--o XX3T0[m]
X3 o--o XX3T1[v]
X3 o--o XX3T2[t]
X3 o--o XX3T3[theta]
end
XX1T0[m] ---> XX3P0[m]
XX1T1[v] ---> XX3P1[v]
XX1T2[t] ---> XX3P2[t]
XX1T3[theta] ---> XX3P3[theta]
XX3T3[theta] ---> XX2P0[theta]
XX3T0[m] ---> XX2P1[m]
XX3T1[v] ---> XX2P2[v]
XX3T2[t] ---> XX2P3[t]
XX2T1[m] ---> XX1P4[m]
XX2T2[v] ---> XX1P5[v]
XX2T3[t] ---> XX1P6[t]
XX2T0[theta] ---> XX1P7[theta]
end
subgraph GC0P[Ports]
direction TB
X1P0[theta]
X1P1[m]
X1P2[v]
X1P3[t]
end
X1P0[theta] --> XX1P3[theta]
X1P1[m] --> XX1P0[m]
X1P2[v] --> XX1P1[v]
X1P3[t] --> XX1P2[t]
subgraph GC0T[Terminals]
direction TB
X1T0[theta]
end
XX3T3[theta] --> X1T0[theta]
end

```

## Update Step

The final thing we need to do is fill out the ADAM update step, we follow the algorithm from the paper.

In [10]:
# Add the spaces
project.add_space(id = "g",
                  name = "g",
                  description = "Gradients")

# Add the block
project.add_block(id="Update Timestep",
                  name="Update Timestep",
                  description="Updates the current timestep value",
                  domain=["t"],
                  codomain=["t"],)
project.add_block(id="Get Gradients",
                  name="Get Gradients",
                  description="Computes the gradients",
                  domain=["theta"],
                  codomain=["g"],)
project.add_block(id="Update Biased First Moment",
                  name="Update Biased First Moment",
                  description="Updates the biased first moment estimate",
                  domain=["m", "g"],
                  codomain=["m"],)
project.add_block(id="Update Biased Second Moment",
                  name="Update Biased Second Moment",
                  description="Updates the biased second moment estimate",
                  domain=["v", "g"],
                  codomain=["v"],)
project.add_block(id="Compute Bias-Corrected First Moment",
                  name="Compute Bias-Corrected First Moment",
                  description="Computes the bias-corrected first moment estimate",
                  domain=["m", "t"],
                  codomain=["m"],)
project.add_block(id="Compute Bias-Corrected Second Moment",
                  name="Compute Bias-Corrected Second Moment",
                  description="Computes the bias-corrected second moment estimate",
                  domain=["v", "t"],
                  codomain=["v"],)
project.add_block(id="Update Theta",
                  name="Update Theta",
                  description="Updates model parameters based on bias-corrected moment estimates",
                  domain=["m", "v", "theta"],
                  codomain=["theta"])

# Add processors
project.add_processor(id="Increment Timestep",
                      name="Increment Timestep",
                      description="Increments t by 1",
                      parent_id="Update Timestep")

project.add_processor(id="Get Function Gradients",
                      name="Get Function Gradients",
                      description="Computes the gradients of a function which respect to theta. $g_t$ ← $∇_θf_t(θ_{t−1})$",
                      parent_id="Get Gradients")

project.add_processor(id="Exponential Smoothing First Moment",
                      name="Exponential Smoothing First Moment",
                      description="Updates the first moment estimate using exponential smoothing with beta1 parameter. $m_t$ ← $β_1$ · $m_{t-1}$ + (1 − $β_1$) · $g_t$ ",
                      parent_id="Update Biased First Moment")

project.add_processor(id="Exponential Smoothing Second Moment",
                      name="Exponential Smoothing Second Moment",
                      description="Updates the second moment estimate using exponential smoothing with beta2 parameter. $v_t$ ← $β_2$ · $v_{t-1}$ + (1 − $β_2$) · $g_t^2$",
                      parent_id="Update Biased Second Moment")

project.add_processor(id="Exponential Decay First Moment Bias Correction",
                      name="Exponential Decay First Moment Bias Correction",
                      description="Computes the bias-corrected first moment estimate using exponential decay. $\\hat{m}_t$ ← $m_t$ / (1 − $\\beta_1^t$)",
                      parent_id="Compute Bias-Corrected First Moment")

project.add_processor(id="Exponential Decay Second Moment Bias Correction",
                      name="Exponential Decay Second Moment Bias Correction",
                      description="Computes the bias-corrected second moment estimate using exponential decay. $\\hat{v}_t$ ← $v_t$ / (1 − $\\beta_2^t$)",
                      parent_id="Compute Bias-Corrected Second Moment")

project.add_processor(id="ADAM Theta Update",
                      name="ADAM Theta Update",
                      description="Updates model parameters using ADAM formula: $θ_t$ ← $θ_{t-1} - α · \\hat{m}_t / (\\sqrt{\\hat{v}_t} + ϵ)$",
                      parent_id="Update Theta")

# Add wires
project.add_wires([{
    "Parent": "m",
    "Source": {"Processor": "Exponential Decay First Moment Bias Correction", "Index": 0},
    "Target": {"Processor": "ADAM Theta Update", "Index": 0}
},
{
    "Parent": "v",
    "Source": {"Processor": "Exponential Decay Second Moment Bias Correction", "Index": 0},
    "Target": {"Processor": "ADAM Theta Update", "Index": 1}
},
{
    "Parent": "t",
    "Source": {"Processor": "Increment Timestep", "Index": 0},
    "Target": {"Processor": "Exponential Decay First Moment Bias Correction", "Index": 1}
},
{
    "Parent": "t",
    "Source": {"Processor": "Increment Timestep", "Index": 0},
    "Target": {"Processor": "Exponential Decay Second Moment Bias Correction", "Index": 1}
},
{
    "Parent": "m",
    "Source": {"Processor": "Exponential Smoothing First Moment", "Index": 0},
    "Target": {"Processor": "Exponential Decay First Moment Bias Correction", "Index": 0}
},
{
    "Parent": "v",
    "Source": {"Processor": "Exponential Smoothing Second Moment", "Index": 0},
    "Target": {"Processor": "Exponential Decay Second Moment Bias Correction", "Index": 0}
},
{
    "Parent": "g",
    "Source": {"Processor": "Get Function Gradients", "Index": 0},
    "Target": {"Processor": "Exponential Smoothing First Moment", "Index": 1}
},
{
    "Parent": "g",
    "Source": {"Processor": "Get Function Gradients", "Index": 0},
    "Target": {"Processor": "Exponential Smoothing Second Moment", "Index": 1}
}], auto_increment=True)

project.add_system(id="ADAM Update Step System",
                   name="ADAM Update Step System",
                   description="A single iteration of the ADAM optimization algorithm",
                   processors=[
                "Get Function Gradients",
                "Exponential Smoothing First Moment",
                "Exponential Smoothing Second Moment",
                "Increment Timestep",
                "Exponential Decay First Moment Bias Correction",
                "Exponential Decay Second Moment Bias Correction",
                "ADAM Theta Update"
            ],
            wires=["W{}".format(x) for x in range(17, 25)])


processor = project.systems_map["ADAM Update Step System"]
processor.display_mermaid_graphic()
processor.display_mermaid_graphic(composite=True)

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph GS0[ADAM Update Step System]
subgraph G0[Get Function Gradients - Get Gradients Block]
direction LR
X0[Get Function Gradients]
subgraph G0P[Ports]
direction TB
XX0P0[theta]
end
XX0P0[theta] o--o X0
subgraph G0T[Terminals]
direction TB
XX0T0[g]
end
X0 o--o XX0T0[g]
end
subgraph G1[Exponential Smoothing First Moment - Update Biased First Moment Block]
direction LR
X1[Exponential Smoothing First Moment]
subgraph G1P[Ports]
direction TB
XX1P0[m]
XX1P1[g]
end
XX1P0[m] o--o X1
XX1P1[g] o--o X1
subgraph G1T[Terminals]
direction TB
XX1T0[m]
end
X1 o--o XX1T0[m]
end
subgraph G2[Exponential Smoothing Second Moment - Update Biased Second Moment Block]
direction LR
X2[Exponential Smoothing Second Moment]
subgraph G2P[Ports]
direction TB
XX2P0[v]
XX2P1[g]
end
XX2P0[v] o--o X2
XX2P1[g] o--o X2
subgraph G2T[Terminals]
direction TB
XX2T0[v]
end
X2 o--o XX2T0[v]
end
subgraph G3[Increment Timestep - Update Timestep Block]
direction LR
X3[Increment Timestep]
subgraph G3P[Ports]
direction TB
XX3P0[t]
end
XX3P0[t] o--o X3
subgraph G3T[Terminals]
direction TB
XX3T0[t]
end
X3 o--o XX3T0[t]
end
subgraph G4[Exponential Decay First Moment Bias Correction - Compute Bias-Corrected First Moment Block]
direction LR
X4[Exponential Decay First Moment Bias Correction]
subgraph G4P[Ports]
direction TB
XX4P0[m]
XX4P1[t]
end
XX4P0[m] o--o X4
XX4P1[t] o--o X4
subgraph G4T[Terminals]
direction TB
XX4T0[m]
end
X4 o--o XX4T0[m]
end
subgraph G5[Exponential Decay Second Moment Bias Correction - Compute Bias-Corrected Second Moment Block]
direction LR
X5[Exponential Decay Second Moment Bias Correction]
subgraph G5P[Ports]
direction TB
XX5P0[v]
XX5P1[t]
end
XX5P0[v] o--o X5
XX5P1[t] o--o X5
subgraph G5T[Terminals]
direction TB
XX5T0[v]
end
X5 o--o XX5T0[v]
end
subgraph G6[ADAM Theta Update - Update Theta Block]
direction LR
X6[ADAM Theta Update]
subgraph G6P[Ports]
direction TB
XX6P0[m]
XX6P1[v]
XX6P2[theta]
end
XX6P0[m] o--o X6
XX6P1[v] o--o X6
XX6P2[theta] o--o X6
subgraph G6T[Terminals]
direction TB
XX6T0[theta]
end
X6 o--o XX6T0[theta]
end
XX4T0[m] ---> XX6P0[m]
XX5T0[v] ---> XX6P1[v]
XX3T0[t] ---> XX4P1[t]
XX3T0[t] ---> XX5P1[t]
XX1T0[m] ---> XX4P0[m]
XX2T0[v] ---> XX5P0[v]
XX0T0[g] ---> XX1P1[g]
XX0T0[g] ---> XX2P1[g]
end

```

```mermaid
---
config:
    layout: elk
---
graph LR
subgraph GS0[ADAM Update Step System]
subgraph G0[Get Function Gradients - Get Gradients Block]
direction LR
X0[Get Function Gradients]
subgraph G0P[Ports]
direction TB
XX0P0[theta]
end
XX0P0[theta] o--o X0
subgraph G0T[Terminals]
direction TB
XX0T0[g]
end
X0 o--o XX0T0[g]
end
subgraph G1[Exponential Smoothing First Moment - Update Biased First Moment Block]
direction LR
X1[Exponential Smoothing First Moment]
subgraph G1P[Ports]
direction TB
XX1P0[m]
XX1P1[g]
end
XX1P0[m] o--o X1
XX1P1[g] o--o X1
subgraph G1T[Terminals]
direction TB
XX1T0[m]
end
X1 o--o XX1T0[m]
end
subgraph G2[Exponential Smoothing Second Moment - Update Biased Second Moment Block]
direction LR
X2[Exponential Smoothing Second Moment]
subgraph G2P[Ports]
direction TB
XX2P0[v]
XX2P1[g]
end
XX2P0[v] o--o X2
XX2P1[g] o--o X2
subgraph G2T[Terminals]
direction TB
XX2T0[v]
end
X2 o--o XX2T0[v]
end
subgraph G3[Increment Timestep - Update Timestep Block]
direction LR
X3[Increment Timestep]
subgraph G3P[Ports]
direction TB
XX3P0[t]
end
XX3P0[t] o--o X3
subgraph G3T[Terminals]
direction TB
XX3T0[t]
end
X3 o--o XX3T0[t]
end
subgraph G4[Exponential Decay First Moment Bias Correction - Compute Bias-Corrected First Moment Block]
direction LR
X4[Exponential Decay First Moment Bias Correction]
subgraph G4P[Ports]
direction TB
XX4P0[m]
XX4P1[t]
end
XX4P0[m] o--o X4
XX4P1[t] o--o X4
subgraph G4T[Terminals]
direction TB
XX4T0[m]
end
X4 o--o XX4T0[m]
end
subgraph G5[Exponential Decay Second Moment Bias Correction - Compute Bias-Corrected Second Moment Block]
direction LR
X5[Exponential Decay Second Moment Bias Correction]
subgraph G5P[Ports]
direction TB
XX5P0[v]
XX5P1[t]
end
XX5P0[v] o--o X5
XX5P1[t] o--o X5
subgraph G5T[Terminals]
direction TB
XX5T0[v]
end
X5 o--o XX5T0[v]
end
subgraph G6[ADAM Theta Update - Update Theta Block]
direction LR
X6[ADAM Theta Update]
subgraph G6P[Ports]
direction TB
XX6P0[m]
XX6P1[v]
XX6P2[theta]
end
XX6P0[m] o--o X6
XX6P1[v] o--o X6
XX6P2[theta] o--o X6
subgraph G6T[Terminals]
direction TB
XX6T0[theta]
end
X6 o--o XX6T0[theta]
end
XX4T0[m] ---> XX6P0[m]
XX5T0[v] ---> XX6P1[v]
XX3T0[t] ---> XX4P1[t]
XX3T0[t] ---> XX5P1[t]
XX1T0[m] ---> XX4P0[m]
XX2T0[v] ---> XX5P0[v]
XX0T0[g] ---> XX1P1[g]
XX0T0[g] ---> XX2P1[g]
end

```