# 算子融合

In [1]:
import numpy as np
import pytest

import tvm
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import run_opt_pass
import tvm.testing
import tvm.topi.testing

## 简单的例子

In [18]:
def before():
    x = relay.var("x", shape=(10, 20))
    y = relay.add(x, relay.const(1, "float32"))
    z = relay.exp(y)
    w = relay.squeeze(z)
    return relay.Function([x], w)

def expected():
    x = relay.var("p", shape=(10, 20))
    y = relay.add(x, relay.const(1, "float32"))
    z = relay.exp(y)
    w = relay.squeeze(z)
    f1 = relay.Function([x], w)
    f1 = f1.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
    x = relay.var("x", shape=(10, 20))
    y = relay.Call(f1, [x])
    return relay.Function([x], y)

z = before()
zz = run_opt_pass(z, transform.FuseOps())
after = run_opt_pass(expected(), transform.InferType())
assert tvm.ir.structural_equal(zz, after)

In [23]:
dshape = (1, 16, 64, 64)
x = relay.var("x", shape=dshape)
x = relay.add(x, relay.const(1, "float32"))
y = relay.nn.conv2d(x, relay.var("w1"), kernel_size=(3, 3), padding=(1, 1), channels=16)
# this is the next dominator.
y1 = relay.add(relay.const(1, "float32"), y)
y = relay.add(y, y1)
# second path
z2 = relay.nn.conv2d(y, relay.var("w2"), kernel_size=(1, 1), padding=(0, 0), channels=16)
z3 = relay.nn.conv2d(y, relay.var("w3"), kernel_size=(3, 3), padding=(1, 1), channels=16)
# add can only be fused to z1
z = relay.add(z2, z3)
f = relay.Function(relay.analysis.free_vars(z), z)
zz = run_opt_pass(f, transform.FuseOps(fuse_opt_level=2))

In [24]:
f

fn (%x: Tensor[(1, 16, 64, 64), float32], %w1, %w2, %w3) {
  %0 = add(%x, 1f);
  %1 = nn.conv2d(%0, %w1, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]);
  %2 = add(1f, %1);
  %3 = add(%1, %2);
  %4 = nn.conv2d(%3, %w2, padding=[0, 0, 0, 0], channels=16, kernel_size=[1, 1]);
  %5 = nn.conv2d(%3, %w3, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]);
  add(%4, %5)
}

In [25]:
zz

fn (%x: Tensor[(1, 16, 64, 64), float32] /* ty=Tensor[(1, 16, 64, 64), float32] */, %w1: Tensor[(16, 16, 3, 3), float32] /* ty=Tensor[(16, 16, 3, 3), float32] */, %w2: Tensor[(16, 16, 1, 1), float32] /* ty=Tensor[(16, 16, 1, 1), float32] */, %w3: Tensor[(16, 16, 3, 3), float32] /* ty=Tensor[(16, 16, 3, 3), float32] */) -> Tensor[(1, 16, 64, 64), float32] {
  %3 = fn (%p02: Tensor[(1, 16, 64, 64), float32] /* ty=Tensor[(1, 16, 64, 64), float32] */, Primitive=1) -> Tensor[(1, 16, 64, 64), float32] {
    add(%p02, 1f /* ty=float32 */) /* ty=Tensor[(1, 16, 64, 64), float32] */
  } /* ty=fn (Tensor[(1, 16, 64, 64), float32]) -> Tensor[(1, 16, 64, 64), float32] */;
  %4 = %3(%x) /* ty=Tensor[(1, 16, 64, 64), float32] */;
  %5 = fn (%p01: Tensor[(1, 16, 64, 64), float32] /* ty=Tensor[(1, 16, 64, 64), float32] */, %p11: Tensor[(16, 16, 3, 3), float32] /* ty=Tensor[(16, 16, 3, 3), float32] */, Primitive=1) -> Tensor[(1, 16, 64, 64), float32] {
    %1 = nn.conv2d(%p01, %p11, padding=[1, 1, 1, 1]