# Using the Poplar SDK from Julia

This notebook shows a simple example of using functionalities in the [Poplar SDK](https://docs.graphcore.ai/projects/poplar-api/en/latest/index.html) from Julia.

In [1]:
using IPUToolkit.Poplar

In [2]:
# Set up graph and program
device = Poplar.get_ipu_device()
target = Poplar.DeviceGetTarget(device)
graph = Poplar.Graph(target)
prog = Poplar.ProgramSequence()

# Create IPU tensors
c1 = Poplar.GraphAddConstant(graph, Float32[1.0, 1.5, 2.0, 2.5])
v1 = similar(graph, c1, "v1")
v2 = similar(graph, c1, "v2")
v3 = Poplar.GraphAddVariable(graph, Poplar.FLOAT(), UInt64[4, 4], "v3")
v4 = Poplar.GraphAddVariable(graph, Poplar.INT(), UInt64[10], "v4")

# Tensors tile mapping
Poplar.GraphSetTileMapping(graph, v1, 0)

for i in UInt64(0):UInt64(3)
    Poplar.GraphSetTileMapping(graph, v2[i], i)
end

Poplar.GraphSetTileMapping(graph, v3, 0)
Poplar.GraphSetTileMapping(graph, v4, 0)

Poplar.GraphSetTileMapping(graph, c1, 0)

# Copy `c1` to `v1` and print `v1`
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramCopy(c1, v1))
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramPrintTensor("v1-debug", v1))

# Copy `v1` to `v2` and print `v2` (should be same as `v1` above)
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramCopy(v1, v2))
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramPrintTensor("v2-debug", v2))

# Prepare copying data between CPU and IPU 
Poplar.GraphCreateHostWrite(graph, "v3-write", v3)
Poplar.GraphCreateHostRead(graph, "v3-read", v3)

v1slice = Poplar.TensorSlice(v1, 0, 3)
v3slice = Poplar.TensorSlice(v3, UInt64[1, 1], UInt64[2, 4])
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramCopy(v1slice, v3slice))

# Read three batches of 10 `Int32`s from the strea `inStream` into `v4` and print values
inStream = Poplar.GraphAddHostToDeviceFIFO(graph, "v4-input-stream", Poplar.INT(), 10)

Poplar.ProgramSequenceAdd(prog, Poplar.ProgramCopy(inStream, v4))
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramPrintTensor("v4-0", v4))
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramCopy(inStream, v4))
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramPrintTensor("v4-1", v4))
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramCopy(inStream, v4))
Poplar.ProgramSequenceAdd(prog, Poplar.ProgramPrintTensor("v4-2", v4))

flags = Poplar.OptionFlags()
Poplar.OptionFlagsSet(flags, "debug.instrument", "true")

engine = Poplar.Engine(graph, prog, flags)
Poplar.EngineLoad(engine, device)

h3 = zeros(Float32, 4, 4)
Poplar.EngineWriteTensor(engine, "v3-write", h3)

# Create data to stream to `v4`
inData = Int32.(1:30)

# Connect data to stream
Poplar.EngineConnectStream(engine, "v4-input-stream", inData)

# Run the engine
Poplar.EngineRun(engine, 0)

# Write IPU tensor `v3` to CPU array `h3`
Poplar.EngineReadTensor(engine, "v3-read", h3)

# Print value of CPU array `h3`
print("h3 data: ")
display(h3')

# Release all devices
Poplar.detach_devices()

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mTrying to attach to device 0...
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mSuccessfully attached to device 0


h3 data: 

v1-debug: {1,1.5,2,2.5}
v2-debug: {1,1.5,2,2.5}
v4-0: {1,2,3,4,5,6,7,8,9,10}
v4-1: {11,12,13,14,15,16,17,18,19,20}
v4-2: {21,22,23,24,25,26,27,28,29,30}


4×4 adjoint(::Matrix{Float32}) with eltype Float32:
 0.0  0.0  0.0  0.0
 0.0  1.0  1.5  2.0
 0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0

In [3]:
# Remember to release all devices after use
Poplar.detach_devices()