In [4]:
import sys
sys.path.append('../../')
sys.path.append('../')
import time
import wd

In [5]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.fx import subgraph_rewriter, symbolic_trace
import utils
from torch.fx import Proxy, Graph, GraphModule
from torch.fx.passes.utils.matcher_utils import SubgraphMatcher

In [6]:
from torch.profiler import profile, record_function, ProfilerActivity
import time
import torch._dynamo as dynamo


In [7]:
def gen_pattern_replace_and_matcher_for_MLP(traced,
                                                  redency_part_slice,unredency_part_slice,
                                                  key_node_name,match_func = None
                                                ):
  from torch.fx.passes.utils.matcher_utils import SubgraphMatcher


  def _match(match,ori,pat):
    return True 
  env  = utils.get_env(traced)
  target_node = env[key_node_name]
  target_node_mod = utils.get_target_mod(traced,target_node.target)
  shape_info = target_node_mod.weight.data.shape
  class PatternClass(torch.nn.Module):
      def __init__(self):
          super().__init__()
          self.embed = torch.nn.Embedding(1, 1)
          self.embed_output_dim = shape_info[1]
          self.mlp = nn.Linear(shape_info[0],shape_info[1])


      def forward(self,x):
          x = self.embed(x).view(-1,self.embed_output_dim)
          return self.mlp(x)
  pattern = PatternClass()  
  pattern_trace = symbolic_trace(pattern)
  pattern_graph = pattern_trace.graph
  original_graph = traced.graph
  matcher =  SubgraphMatcher(pattern_graph, match_output=False, match_placeholder=False,
                              remove_overlapping_matches=True)
  _matches = matcher.match(original_graph)
  match_filters = [_match if match_func is None else match_func]
  _matches = [
      m for m in _matches
      if all(match_filter(m, original_graph, pattern_graph)
              for match_filter in match_filters)
  ]  
  # 因为在过滤器中做了限制应该只有一个符合要求的
  _matched = _matches[0]
  pattern_env = utils.get_env(pattern_trace)
  node_map = _matched.nodes_map
  
  embed_node = node_map[pattern_env['embed']]
  embed_node_module = utils.get_target_mod(traced,embed_node.target)
  
  linear_node = node_map[pattern_env['mlp']]
  linear_node_module = utils.get_target_mod(traced,linear_node.target)
  linear_node_weight = linear_node_module.weight.data
  linear_node_bias = linear_node_module.bias.data
  
  class ReplacementClass(torch.nn.Module):
    def __init__(self):
      super().__init__()
      self.embed = embed_node_module
      self.embed_dim = self.embed.weight.data.shape[1]
      self.redency_weight_len = self.embed_dim * redency_part_slice[1].stop
      redency_weight = linear_node_weight[:,:self.redency_weight_len]
      unredency_weight = linear_node_weight[:,self.redency_weight_len:]
      self.unredency_weight_len = unredency_weight.shape[1]
      self.redency_linear = nn.Linear(redency_weight.shape[1],redency_weight.shape[0])
      self.redency_linear.weight.data.copy_(redency_weight)
      self.redency_linear.bias.data.copy_(linear_node_bias)

      self.unredency_linear = nn.Linear(unredency_weight.shape[1],unredency_weight.shape[0],bias=False)
      self.unredency_linear.weight.data.copy_(unredency_weight)

      

    def forward(self,x):
      redency_part = x[redency_part_slice] 
      unredency_part = x[unredency_part_slice] 
      return self.redency_linear(self.embed(redency_part).view(-1,self.redency_weight_len)) + self.unredency_linear(self.embed(unredency_part).view(-1,self.unredency_weight_len))
      # return unredency_sum
    
  
  return pattern,ReplacementClass(),_match

In [8]:
def workload_wdl(num_field, prefix,dim = 64,l = [1024,512,256]):
  print(f"now gen workload of wdl with config: dim: {dim}, num_field: {num_field}, prefix: {prefix}")
  wdl_model_ori = wd.WideAndDeepModel([100 for i in range(num_field)],dim,l,0.1)
  ori_traced = symbolic_trace(wdl_model_ori)
  
  wdl_model_modify = wd.WideAndDeepModel([100 for i in range(num_field)],dim,l,0.1)
  modify_traced = symbolic_trace(wdl_model_modify)
  pattern,replace,match = gen_pattern_replace_and_matcher_for_MLP(modify_traced,
                                                                      (0,slice(None,prefix,None)),(slice(None,None,None),slice(prefix,None,None)),
                                                                      "mlp_mlp_0")
  matches = subgraph_rewriter.replace_pattern_with_filters(modify_traced, pattern, replace,[match])
  return ori_traced,modify_traced

In [9]:
def genWorkload(num_field = 34 * 5,prefix = 29 * 5, batch = 4096, dim = 64):
  ori_model_name = f'/home/yssun/pytorch-fm/torchfm/model/test_fx/exp/model_repo/wdl_rate/wdl_{batch}_{num_field}_{prefix}_{dim}_ori.onnx'
  modify_model_name = f'/home/yssun/pytorch-fm/torchfm/model/test_fx/exp/model_repo/wdl_rate/wdl_{batch}_{num_field}_{prefix}_{dim}_modify.onnx'
  ori, modify = workload_wdl(num_field,prefix,dim)
  torch.onnx.export(ori,               # 模型 being run
                  torch.randint(low=0, high=20, size=(batch,num_field), dtype=torch.long),                  # 模型输入 (or a tuple for multiple inputs)
                  ori_model_name,        # 导出文件的文件名
                  export_params=True, # 如果设置为True，则参数也会被导出。注意某些情况下参数可能无法被导出。
                  opset_version=10,   # ONNX版本
                  do_constant_folding=True,  # 是否执行常量折叠以优化模型
                  input_names = ['input'],   # 输入的名称
                  output_names = ['output'], # 输出的名称
                  )
  torch.onnx.export(modify,               # 模型 being run
                  torch.randint(low=0, high=20, size=(batch,num_field), dtype=torch.long),                  # 模型输入 (or a tuple for multiple inputs)
                  modify_model_name,        # 导出文件的文件名
                  export_params=True, # 如果设置为True，则参数也会被导出。注意某些情况下参数可能无法被导出。
                  opset_version=10,   # ONNX版本
                  do_constant_folding=True,  # 是否执行常量折叠以优化模型
                  input_names = ['input'],   # 输入的名称
                  output_names = ['output'], # 输出的名称
                  )

In [10]:
dims= [32]
batches = [1024,2048,4096]
for dim in dims:
  for batch in batches:
    for prefix in [10,20,30,40,50,60,70,80,90,99]:
      genWorkload(num_field=100,prefix=prefix,batch=batch,dim=dim)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 10




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 20




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 30




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 40




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 50




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 60




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 70




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 80




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 90




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 99




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 10




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 20




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 30




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 40




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 50




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 60




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 70




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 80




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 90




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 99




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 10




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 20




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 30




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 40




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 50




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 60




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 70




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 80




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 90




now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 99




In [8]:
def calculate_mean_and_variance_manual(data):
    n = len(data)
    mean = sum(data) / n
    variance = sum((x - mean) ** 2 for x in data) / n
    return mean, variance

In [9]:
def gen_and_test(num_field = 22,prefix = 10, batch = 4096, dim = 32, workload_func = workload_wdl,l = [1024,512,256]):
  def run(model):
    traced_model = torch.jit.trace(model, torch.randint(low=0, high=88, size=(batch,num_field), dtype=torch.long))
    compiled_model = torch.compile(traced_model, backend="inductor")
    compiled_model.eval()
    total_time = []
    t = torch.randint(low=0, high=88, size=(batch ,num_field), dtype=torch.long)

    with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
        with record_function("model_inference"):
          for i in range(100):
              start_time = time.time()  # 开始计时
              with torch.no_grad():
                soutput = compiled_model(t)
              end_time = time.time()  # 结束计时
              
              # 计算并打印函数执行所需的时间
              elapsed_time = end_time - start_time
              total_time.append(elapsed_time * 1000)
    print(calculate_mean_and_variance_manual(total_time))
    return prof
  print(f"now gen workload of wdl with config: dim: {dim}, num_field: {num_field}, prefix: {prefix}, batch :{batch}")
  ori, modify = workload_func(num_field,prefix,dim,l)
  p1 = run(ori)
  p2 = run(modify)
  return p1, p2

In [113]:
gen_and_test(num_field = 22,prefix = 10, batch = 1024, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10, batch :1024
now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 755 / 1024 (73.7%)
Greatest absolute difference: 0.003040313720703125 at index (52,) (up to 1e-05 allowed)
Greatest relative difference: 0.015587846852296581 at index (342,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:58:50 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:58:50 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:58:50 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(3.2036423683166504, 0.16318170921181263)


Tensor-likes are not close!

Mismatched elements: 732 / 1024 (71.5%)
Greatest absolute difference: 0.003636598587036133 at index (275,) (up to 1e-05 allowed)
Greatest relative difference: 0.018113487297396026 at index (143,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:58:51 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:58:51 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:58:51 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(2.772352695465088, 0.28006141416767605)


(<torch.profiler.profiler.profile at 0x7ee590f7eaf0>,
 <torch.profiler.profiler.profile at 0x7ee580a35d30>)

In [114]:
p1 , p2 = gen_and_test(num_field = 22,prefix = 10, batch = 2048, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10, batch :2048
now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 1467 / 2048 (71.6%)
Greatest absolute difference: 0.0032285749912261963 at index (1189,) (up to 1e-05 allowed)
Greatest relative difference: 0.015538992479396517 at index (713,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:00:48 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:00:49 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:00:49 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(3.9754223823547363, 0.20999803456902555)


Tensor-likes are not close!

Mismatched elements: 1525 / 2048 (74.5%)
Greatest absolute difference: 0.003830850124359131 at index (1666,) (up to 1e-05 allowed)
Greatest relative difference: 0.0213352628943599 at index (135,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:00:49 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:00:50 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:00:50 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(3.4917831420898438, 0.23876289446889132)


In [59]:
gen_and_test(num_field = 22,prefix = 10, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 2997 / 4096 (73.2%)
Greatest absolute difference: 0.0040725767612457275 at index (653,) (up to 1e-05 allowed)
Greatest relative difference: 0.020869940862464777 at index (3577,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:40:35 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:40:36 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:40:36 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(7.487597465515137, 0.4443045948164581)


Tensor-likes are not close!

Mismatched elements: 2982 / 4096 (72.8%)
Greatest absolute difference: 0.004334956407546997 at index (1291,) (up to 1e-05 allowed)
Greatest relative difference: 0.01569245939842554 at index (2680,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:40:36 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:40:37 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:40:37 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(6.460316181182861, 0.8133674435327976)


(<torch.profiler.profiler.profile at 0x7f18a2ef4fd0>,
 <torch.profiler.profiler.profile at 0x7f18a1b82520>)

In [11]:
gen_and_test(num_field = 22,prefix = 10, batch = 8192, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10, batch :8192
now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 5985 / 8192 (73.1%)
Greatest absolute difference: 0.0049446821212768555 at index (1301,) (up to 1e-05 allowed)
Greatest relative difference: 0.02189776552533188 at index (5419,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-04 21:57:37 4097444:4097444 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-04 21:57:41 4097444:4097444 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-04 21:57:41 4097444:4097444 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(38.919479846954346, 3.2986022377542668)


Tensor-likes are not close!

Mismatched elements: 6181 / 8192 (75.5%)
Greatest absolute difference: 0.00649106502532959 at index (1882,) (up to 1e-05 allowed)
Greatest relative difference: 0.0221152113958787 at index (983,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-04 21:57:42 4097444:4097444 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-04 21:57:46 4097444:4097444 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-04 21:57:46 4097444:4097444 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(44.149370193481445, 4.1975765956067335)


(<torch.profiler.profiler.profile at 0x7f3788c6fc70>,
 <torch.profiler.profiler.profile at 0x7f37802a5b80>)

In [63]:
gen_and_test(num_field = 22,prefix = 10, batch = 1024, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10, batch :1024
now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 765 / 1024 (74.7%)
Greatest absolute difference: 0.004614919424057007 at index (517,) (up to 1e-05 allowed)
Greatest relative difference: 0.02158088646590178 at index (538,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:41:28 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:41:29 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:41:29 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(4.040942192077637, 1.0354623905413973)


Tensor-likes are not close!

Mismatched elements: 758 / 1024 (74.0%)
Greatest absolute difference: 0.003536522388458252 at index (835,) (up to 1e-05 allowed)
Greatest relative difference: 0.0186929987799294 at index (621,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:41:29 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:41:30 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:41:30 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(3.847780227661133, 0.28602093507288373)


(<torch.profiler.profiler.profile at 0x7f189b169970>,
 <torch.profiler.profiler.profile at 0x7f189a2a5a00>)

In [64]:
gen_and_test(num_field = 22,prefix = 10, batch = 2048, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10, batch :2048
now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 1518 / 2048 (74.1%)
Greatest absolute difference: 0.0037743449211120605 at index (89,) (up to 1e-05 allowed)
Greatest relative difference: 0.018367423195171893 at index (1660,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:41:33 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:41:34 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:41:34 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(6.395909786224365, 0.930559502756978)


Tensor-likes are not close!

Mismatched elements: 1536 / 2048 (75.0%)
Greatest absolute difference: 0.003513544797897339 at index (895,) (up to 1e-05 allowed)
Greatest relative difference: 0.01879021455448549 at index (160,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:41:34 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:41:35 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:41:35 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(5.623955726623535, 0.9673388208057077)


(<torch.profiler.profiler.profile at 0x7f18989f9ac0>,
 <torch.profiler.profiler.profile at 0x7f18979ce0d0>)

In [65]:
gen_and_test(num_field = 22,prefix = 10, batch = 4096, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10, batch :4096
now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 3028 / 4096 (73.9%)
Greatest absolute difference: 0.00417935848236084 at index (1416,) (up to 1e-05 allowed)
Greatest relative difference: 0.01922524074980463 at index (346,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:41:40 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:41:41 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:41:41 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(10.4844069480896, 2.8313142539616365)


Tensor-likes are not close!

Mismatched elements: 2918 / 4096 (71.2%)
Greatest absolute difference: 0.004600226879119873 at index (2172,) (up to 1e-05 allowed)
Greatest relative difference: 0.01516691688982047 at index (2404,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:41:41 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:41:42 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:41:42 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(9.043309688568115, 0.5919898647960053)


(<torch.profiler.profiler.profile at 0x7f18969576d0>,
 <torch.profiler.profiler.profile at 0x7f1895101460>)

In [119]:
gen_and_test(num_field = 22,prefix = 10, batch = 8192, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10, batch :8192
now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 5928 / 8192 (72.4%)
Greatest absolute difference: 0.005536407232284546 at index (4654,) (up to 1e-05 allowed)
Greatest relative difference: 0.019040173610070165 at index (7762,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:05:00 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:05:04 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:05:04 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(41.60029888153076, 8.554045692221734)


Tensor-likes are not close!

Mismatched elements: 5925 / 8192 (72.3%)
Greatest absolute difference: 0.004739999771118164 at index (6258,) (up to 1e-05 allowed)
Greatest relative difference: 0.02400387648225035 at index (6389,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:05:05 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:05:09 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:05:09 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(39.74055051803589, 3.954013439368964)


(<torch.profiler.profiler.profile at 0x7ee4c1b8cb50>,
 <torch.profiler.profiler.profile at 0x7ee4b6c487c0>)

In [67]:
gen_and_test(num_field = 34,prefix = 29, batch = 1024, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29, batch :1024
now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 632 / 1024 (61.7%)
Greatest absolute difference: 0.0041487812995910645 at index (652,) (up to 1e-05 allowed)
Greatest relative difference: 0.02340897986087216 at index (785,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:05 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:05 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:05 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(3.312067985534668, 0.2433223953403285)


Tensor-likes are not close!

Mismatched elements: 661 / 1024 (64.6%)
Greatest absolute difference: 0.003370821475982666 at index (108,) (up to 1e-05 allowed)
Greatest relative difference: 0.018108361088589847 at index (967,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:06 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:06 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:06 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(2.588198184967041, 0.2628381306465144)


(<torch.profiler.profiler.profile at 0x7f1692231f40>,
 <torch.profiler.profiler.profile at 0x7f15ecf55a60>)

In [68]:
gen_and_test(num_field = 34,prefix = 29, batch = 2048, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29, batch :2048
now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 1234 / 2048 (60.3%)
Greatest absolute difference: 0.00350189208984375 at index (1089,) (up to 1e-05 allowed)
Greatest relative difference: 0.014675458921967572 at index (1286,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:10 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:10 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:10 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(4.810066223144531, 0.5339177952009777)


Tensor-likes are not close!

Mismatched elements: 1222 / 2048 (59.7%)
Greatest absolute difference: 0.004008650779724121 at index (1633,) (up to 1e-05 allowed)
Greatest relative difference: 0.01444969333679053 at index (612,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:11 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:11 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:11 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(3.6214780807495117, 0.33057470793664834)


(<torch.profiler.profiler.profile at 0x7f15ebcaf940>,
 <torch.profiler.profiler.profile at 0x7f15ea698e80>)

In [69]:
gen_and_test(num_field = 34,prefix = 29, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 2487 / 4096 (60.7%)
Greatest absolute difference: 0.004291653633117676 at index (2919,) (up to 1e-05 allowed)
Greatest relative difference: 0.017515334586813796 at index (2276,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:14 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:15 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:15 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(7.665987014770508, 1.179806113759696)


Tensor-likes are not close!

Mismatched elements: 2555 / 4096 (62.4%)
Greatest absolute difference: 0.0037918388843536377 at index (3842,) (up to 1e-05 allowed)
Greatest relative difference: 0.016171019369217044 at index (130,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:15 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:16 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:16 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(5.302414894104004, 0.36174186427615496)


(<torch.profiler.profiler.profile at 0x7f15e90b80d0>,
 <torch.profiler.profiler.profile at 0x7f15e7d794f0>)

In [71]:
gen_and_test(num_field = 34,prefix = 29, batch = 8192, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29, batch :8192
now gen workload of wdl with config: dim: 32, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 5161 / 8192 (63.0%)
Greatest absolute difference: 0.003899604082107544 at index (5450,) (up to 1e-05 allowed)
Greatest relative difference: 0.017327888696440148 at index (7825,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:41 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:45 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:45 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(39.0883994102478, 0.32793937822930275)


Tensor-likes are not close!

Mismatched elements: 5068 / 8192 (61.9%)
Greatest absolute difference: 0.00429224967956543 at index (1141,) (up to 1e-05 allowed)
Greatest relative difference: 0.018540683782305785 at index (5543,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:46 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:49 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:49 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(38.56184244155884, 0.8216893548876669)


(<torch.profiler.profiler.profile at 0x7f15e36c7400>,
 <torch.profiler.profiler.profile at 0x7f15e2d85490>)

In [72]:
gen_and_test(num_field = 34,prefix = 29, batch = 1024, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29, batch :1024
now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 653 / 1024 (63.8%)
Greatest absolute difference: 0.004015624523162842 at index (371,) (up to 1e-05 allowed)
Greatest relative difference: 0.017516655956836845 at index (335,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:42:59 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:42:59 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:42:59 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(4.675490856170654, 0.18287486142867238)


Tensor-likes are not close!

Mismatched elements: 668 / 1024 (65.2%)
Greatest absolute difference: 0.00396236777305603 at index (198,) (up to 1e-05 allowed)
Greatest relative difference: 0.0177633753113608 at index (141,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:43:00 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:43:00 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:43:00 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(2.6479506492614746, 0.2702182822474697)


(<torch.profiler.profiler.profile at 0x7f15e1784160>,
 <torch.profiler.profiler.profile at 0x7f15e050ad90>)

In [73]:
gen_and_test(num_field = 34,prefix = 29, batch = 2048, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29, batch :2048
now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 1288 / 2048 (62.9%)
Greatest absolute difference: 0.004005521535873413 at index (1004,) (up to 1e-05 allowed)
Greatest relative difference: 0.016975887096265872 at index (225,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:43:02 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:43:03 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:43:03 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(7.289128303527832, 0.24572321692630794)


Tensor-likes are not close!

Mismatched elements: 1293 / 2048 (63.1%)
Greatest absolute difference: 0.005026429891586304 at index (1144,) (up to 1e-05 allowed)
Greatest relative difference: 0.01877565652894662 at index (719,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:43:04 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:43:04 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:43:04 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(4.480829238891602, 2.4907019098009187)


(<torch.profiler.profiler.profile at 0x7f15df0312e0>,
 <torch.profiler.profiler.profile at 0x7f15ddc5d370>)

In [125]:
gen_and_test(num_field = 34,prefix = 29, batch = 4096, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29, batch :4096
now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 2541 / 4096 (62.0%)
Greatest absolute difference: 0.004085838794708252 at index (1613,) (up to 1e-05 allowed)
Greatest relative difference: 0.019560641183476547 at index (3781,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:49:32 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:49:33 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:49:33 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(17.62883424758911, 1.125506435863599)


Tensor-likes are not close!

Mismatched elements: 2495 / 4096 (60.9%)
Greatest absolute difference: 0.0039688050746917725 at index (3484,) (up to 1e-05 allowed)
Greatest relative difference: 0.017356889156664557 at index (3093,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:49:34 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:49:35 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:49:35 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(6.2017083168029785, 0.5914286411723424)


(<torch.profiler.profiler.profile at 0x7ee3f23b57f0>,
 <torch.profiler.profiler.profile at 0x7ee3ecc7e400>)

In [126]:
gen_and_test(num_field = 34,prefix = 29, batch = 8192, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29, batch :8192
now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29


Tensor-likes are not close!

Mismatched elements: 5137 / 8192 (62.7%)
Greatest absolute difference: 0.004293560981750488 at index (6213,) (up to 1e-05 allowed)
Greatest relative difference: 0.017037666149717346 at index (3356,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:49:46 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:49:51 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:49:51 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(49.608635902404785, 9.0556711637646)


Tensor-likes are not close!

Mismatched elements: 5178 / 8192 (63.2%)
Greatest absolute difference: 0.004384338855743408 at index (502,) (up to 1e-05 allowed)
Greatest relative difference: 0.019114987922120137 at index (5891,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 20:49:52 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 20:49:56 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 20:49:56 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(38.37266445159912, 1.6489793447135526)


(<torch.profiler.profiler.profile at 0x7ee3dc6bcbe0>,
 <torch.profiler.profiler.profile at 0x7ee3c6e548e0>)

In [85]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 1024, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50, batch :1024
now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 413 / 1024 (40.3%)
Greatest absolute difference: 0.0037986040115356445 at index (722,) (up to 1e-05 allowed)
Greatest relative difference: 0.01686529998809134 at index (689,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:46:28 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:46:29 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:46:29 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(6.054248809814453, 0.40336101565117133)


Tensor-likes are not close!

Mismatched elements: 400 / 1024 (39.1%)
Greatest absolute difference: 0.0026208162307739258 at index (880,) (up to 1e-05 allowed)
Greatest relative difference: 0.011245099151133923 at index (109,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:46:29 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:46:30 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:46:30 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(4.847621917724609, 0.5601550469691574)


(<torch.profiler.profiler.profile at 0x7f0b40a3abb0>,
 <torch.profiler.profiler.profile at 0x7f0afdc042b0>)

In [87]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 2048, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50, batch :2048
now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 833 / 2048 (40.7%)
Greatest absolute difference: 0.003035306930541992 at index (581,) (up to 1e-05 allowed)
Greatest relative difference: 0.018317699675414745 at index (337,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:47:00 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:47:01 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:47:01 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(9.582388401031494, 0.8977245890662289)


Tensor-likes are not close!

Mismatched elements: 768 / 2048 (37.5%)
Greatest absolute difference: 0.0033698081970214844 at index (1540,) (up to 1e-05 allowed)
Greatest relative difference: 0.015526391748114874 at index (1610,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:47:01 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:47:02 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:47:02 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(7.105839252471924, 0.6370660905474779)


(<torch.profiler.profiler.profile at 0x7f046c319070>,
 <torch.profiler.profiler.profile at 0x7f042e6b9100>)

In [88]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 1531 / 4096 (37.4%)
Greatest absolute difference: 0.003214538097381592 at index (69,) (up to 1e-05 allowed)
Greatest relative difference: 0.01784427609993351 at index (1681,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:47:06 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:47:09 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:47:09 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(23.621885776519775, 2.8635076167290663)


Tensor-likes are not close!

Mismatched elements: 1530 / 4096 (37.4%)
Greatest absolute difference: 0.003350973129272461 at index (923,) (up to 1e-05 allowed)
Greatest relative difference: 0.02065894855833375 at index (2905,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:47:09 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:47:10 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:47:10 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(12.352263927459717, 1.4731369473963696)


(<torch.profiler.profiler.profile at 0x7f03b7fde370>,
 <torch.profiler.profiler.profile at 0x7f03a35e9df0>)

In [90]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 8192, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50, batch :8192
now gen workload of wdl with config: dim: 32, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 3081 / 8192 (37.6%)
Greatest absolute difference: 0.003699362277984619 at index (5824,) (up to 1e-05 allowed)
Greatest relative difference: 0.01683641843341576 at index (3263,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:47:43 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:47:49 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:47:49 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(58.56094837188721, 17.97192176147746)


Tensor-likes are not close!

Mismatched elements: 3062 / 8192 (37.4%)
Greatest absolute difference: 0.003034055233001709 at index (4150,) (up to 1e-05 allowed)
Greatest relative difference: 0.020027614112782095 at index (493,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:47:50 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:47:55 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:47:55 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(49.843153953552246, 8.862448495642639)


(<torch.profiler.profiler.profile at 0x7f02d569b520>,
 <torch.profiler.profiler.profile at 0x7f0029652dc0>)

In [91]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 1024, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50, batch :1024
now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 388 / 1024 (37.9%)
Greatest absolute difference: 0.002737373113632202 at index (222,) (up to 1e-05 allowed)
Greatest relative difference: 0.02248258783196468 at index (742,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:48:14 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:48:15 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:48:15 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(8.898494243621826, 0.48498895295665534)


Tensor-likes are not close!

Mismatched elements: 366 / 1024 (35.7%)
Greatest absolute difference: 0.0036386847496032715 at index (638,) (up to 1e-05 allowed)
Greatest relative difference: 0.014777513614496295 at index (389,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:48:15 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:48:16 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:48:16 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(6.4845967292785645, 1.0025864613169233)


(<torch.profiler.profiler.profile at 0x7effe078dee0>,
 <torch.profiler.profiler.profile at 0x7eff933d1160>)

In [94]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 2048, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50, batch :2048
now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 792 / 2048 (38.7%)
Greatest absolute difference: 0.003103971481323242 at index (820,) (up to 1e-05 allowed)
Greatest relative difference: 0.019858454475669095 at index (1884,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:48:59 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:49:01 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:49:01 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(21.985862255096436, 1.6997760825404384)


Tensor-likes are not close!

Mismatched elements: 752 / 2048 (36.7%)
Greatest absolute difference: 0.0025399327278137207 at index (671,) (up to 1e-05 allowed)
Greatest relative difference: 0.01735196844629005 at index (603,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:49:02 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:49:03 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:49:03 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(9.209728240966797, 1.3893481389004592)


(<torch.profiler.profiler.profile at 0x7efe3a3d5370>,
 <torch.profiler.profiler.profile at 0x7efb892ee0d0>)

In [95]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 4096, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50, batch :4096
now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 1524 / 4096 (37.2%)
Greatest absolute difference: 0.0033547282218933105 at index (1765,) (up to 1e-05 allowed)
Greatest relative difference: 0.01669834144688696 at index (951,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:49:14 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:49:18 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:49:18 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(41.269328594207764, 2.1940995587726775)


Tensor-likes are not close!

Mismatched elements: 1558 / 4096 (38.0%)
Greatest absolute difference: 0.004715681076049805 at index (1777,) (up to 1e-05 allowed)
Greatest relative difference: 0.01794240815960295 at index (1194,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:49:19 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:49:22 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:49:22 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(25.81106185913086, 1.1863294677368685)


(<torch.profiler.profiler.profile at 0x7efb36bce280>,
 <torch.profiler.profiler.profile at 0x7efafe1be8e0>)

In [98]:
gen_and_test(num_field = 22 * 5,prefix = 10 * 5, batch = 8192, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50, batch :8192
now gen workload of wdl with config: dim: 64, num_field: 110, prefix: 50


Tensor-likes are not close!

Mismatched elements: 3197 / 8192 (39.0%)
Greatest absolute difference: 0.0033190250396728516 at index (6607,) (up to 1e-05 allowed)
Greatest relative difference: 0.018155756457700546 at index (3470,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:50:42 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:50:52 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:50:52 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(100.08099555969238, 1.8495043437724235)


Tensor-likes are not close!

Mismatched elements: 3067 / 8192 (37.4%)
Greatest absolute difference: 0.004911541938781738 at index (596,) (up to 1e-05 allowed)
Greatest relative difference: 0.021591550989508563 at index (821,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:50:53 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:51:00 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:51:00 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(64.87936735153198, 21.671167053563057)


(<torch.profiler.profiler.profile at 0x7ef9e812c430>,
 <torch.profiler.profiler.profile at 0x7ef6f31d03a0>)

In [99]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 1024, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145, batch :1024
now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 324 / 1024 (31.6%)
Greatest absolute difference: 0.0037370026111602783 at index (353,) (up to 1e-05 allowed)
Greatest relative difference: 0.019284062101097607 at index (511,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:51:41 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:51:42 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:51:42 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(8.380768299102783, 0.21736192763341933)


Tensor-likes are not close!

Mismatched elements: 329 / 1024 (32.1%)
Greatest absolute difference: 0.0032735466957092285 at index (772,) (up to 1e-05 allowed)
Greatest relative difference: 0.020725420401324385 at index (490,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:51:43 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:51:43 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:51:43 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(3.720364570617676, 0.2454618609363024)


(<torch.profiler.profiler.profile at 0x7ef6b0321190>,
 <torch.profiler.profiler.profile at 0x7ef6681b8e80>)

In [100]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 2048, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145, batch :2048
now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 593 / 2048 (29.0%)
Greatest absolute difference: 0.003600180149078369 at index (237,) (up to 1e-05 allowed)
Greatest relative difference: 0.014440619117076841 at index (1848,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:51:51 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:51:53 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:51:53 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(17.54427671432495, 0.3537450225564953)


Tensor-likes are not close!

Mismatched elements: 664 / 2048 (32.4%)
Greatest absolute difference: 0.005089759826660156 at index (1511,) (up to 1e-05 allowed)
Greatest relative difference: 0.01889447815493623 at index (1674,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:51:54 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:51:54 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:51:54 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(5.324833393096924, 0.3238763059187022)


(<torch.profiler.profiler.profile at 0x7ef61ada38b0>,
 <torch.profiler.profiler.profile at 0x7ef5dd0d7340>)

In [102]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 1284 / 4096 (31.3%)
Greatest absolute difference: 0.003814190626144409 at index (2234,) (up to 1e-05 allowed)
Greatest relative difference: 0.018878545037736397 at index (1018,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:52:16 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:52:19 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:52:19 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(27.346034049987793, 1.0112193788017976)


Tensor-likes are not close!

Mismatched elements: 1247 / 4096 (30.4%)
Greatest absolute difference: 0.0034676194190979004 at index (1884,) (up to 1e-05 allowed)
Greatest relative difference: 0.01665599826474294 at index (222,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:52:20 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:52:22 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:52:22 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(21.587905883789062, 0.846977029732443)


(<torch.profiler.profiler.profile at 0x7ef2ab32d760>,
 <torch.profiler.profiler.profile at 0x7ef26312fe20>)

In [103]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 8192, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145, batch :8192
now gen workload of wdl with config: dim: 32, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 2614 / 8192 (31.9%)
Greatest absolute difference: 0.003866732120513916 at index (1754,) (up to 1e-05 allowed)
Greatest relative difference: 0.02325055359753698 at index (5468,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:52:38 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:52:46 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:52:46 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(79.08278703689575, 9.497569267722383)


Tensor-likes are not close!

Mismatched elements: 2607 / 8192 (31.8%)
Greatest absolute difference: 0.00432056188583374 at index (2868,) (up to 1e-05 allowed)
Greatest relative difference: 0.017885696430067988 at index (7818,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:52:46 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:52:51 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:52:51 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(41.850173473358154, 5.967429348601172)


(<torch.profiler.profiler.profile at 0x7ef22a67b100>,
 <torch.profiler.profiler.profile at 0x7ef1d80cb520>)

In [105]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 1024, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145, batch :1024
now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 339 / 1024 (33.1%)
Greatest absolute difference: 0.0031284689903259277 at index (750,) (up to 1e-05 allowed)
Greatest relative difference: 0.014662294887849316 at index (213,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:54:40 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:54:42 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:54:42 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(19.639668464660645, 0.7801486965718141)


Tensor-likes are not close!

Mismatched elements: 325 / 1024 (31.7%)
Greatest absolute difference: 0.004178285598754883 at index (1016,) (up to 1e-05 allowed)
Greatest relative difference: 0.023285555783639098 at index (899,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:54:42 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:54:43 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:54:43 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(4.769694805145264, 0.5406786605988145)


(<torch.profiler.profiler.profile at 0x7ef10a169520>,
 <torch.profiler.profiler.profile at 0x7eee5f0ea160>)

In [107]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 2048, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145, batch :2048
now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 671 / 2048 (32.8%)
Greatest absolute difference: 0.0038806498050689697 at index (1356,) (up to 1e-05 allowed)
Greatest relative difference: 0.015182208382317583 at index (162,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:55:27 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:55:30 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:55:30 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(32.25517749786377, 1.5305161241258247)


Tensor-likes are not close!

Mismatched elements: 655 / 2048 (32.0%)
Greatest absolute difference: 0.0049974918365478516 at index (1601,) (up to 1e-05 allowed)
Greatest relative difference: 0.022012335656227403 at index (946,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:55:31 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:55:32 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:55:32 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(7.018165588378906, 0.503697799422298)


(<torch.profiler.profiler.profile at 0x7eed8bf0f4c0>,
 <torch.profiler.profiler.profile at 0x7eed43e5a790>)

In [110]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 4096, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145, batch :4096
now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 1278 / 4096 (31.2%)
Greatest absolute difference: 0.004263877868652344 at index (1295,) (up to 1e-05 allowed)
Greatest relative difference: 0.01820466727415332 at index (3683,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:56:15 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:56:21 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:56:21 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(58.50992202758789, 22.911699498990856)


Tensor-likes are not close!

Mismatched elements: 1304 / 4096 (31.8%)
Greatest absolute difference: 0.004256784915924072 at index (1358,) (up to 1e-05 allowed)
Greatest relative difference: 0.021239103699626387 at index (293,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:56:22 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:56:23 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:56:23 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(10.812389850616455, 1.9480543919314641)


(<torch.profiler.profiler.profile at 0x7ee98c405880>,
 <torch.profiler.profiler.profile at 0x7ee939d1c520>)

In [112]:
gen_and_test(num_field = 34 * 5,prefix = 29 * 5, batch = 8192, dim = 64)

now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145, batch :8192
now gen workload of wdl with config: dim: 64, num_field: 170, prefix: 145


Tensor-likes are not close!

Mismatched elements: 2537 / 8192 (31.0%)
Greatest absolute difference: 0.0046485066413879395 at index (7690,) (up to 1e-05 allowed)
Greatest relative difference: 0.01985108832420935 at index (933,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:57:22 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:57:33 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:57:33 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(112.00615644454956, 117.66273853783656)


Tensor-likes are not close!

Mismatched elements: 2589 / 8192 (31.6%)
Greatest absolute difference: 0.004388868808746338 at index (5582,) (up to 1e-05 allowed)
Greatest relative difference: 0.016579765174645704 at index (4045,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-03 19:57:34 1908423:1908423 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 19:57:39 1908423:1908423 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 19:57:39 1908423:1908423 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(49.85299348831177, 3.565654843549737)


(<torch.profiler.profiler.profile at 0x7ee86bde95b0>,
 <torch.profiler.profiler.profile at 0x7ee81ea7e820>)

In [65]:

wdl_model = wd.WideAndDeepModel([100 for i in range(22)],32,[1024,512,256],0.1)
traced = symbolic_trace(wdl_model)

In [66]:
ori, modify = workload_wdl(22,10)

now gen workload of wdl with config: dim: 64, num_field: 22, prefix: 10


In [39]:
type(ori)

torch.fx.graph_module.GraphModule.__new__.<locals>.GraphModuleImpl

In [40]:
traced_model = torch.jit.trace(ori, torch.randint(low=0, high=88, size=(4096,22), dtype=torch.long))


Tensor-likes are not close!

Mismatched elements: 2981 / 4096 (72.8%)
Greatest absolute difference: 0.004326552152633667 at index (2561,) (up to 1e-05 allowed)
Greatest relative difference: 0.019436159845888815 at index (2609,) (up to 1e-05 allowed)
  _check_trace(


In [56]:
%timeit -n 1 -r 30  output = traced_model(torch.randint(low=0, high=88, size=(4096,22), dtype=torch.long))


12.9 ms ± 3.49 ms per loop (mean ± std. dev. of 30 runs, 1 loop each)


In [47]:
traced_model = torch.jit.trace(modify, torch.randint(low=0, high=88, size=(4096,22), dtype=torch.long))


Tensor-likes are not close!

Mismatched elements: 3030 / 4096 (74.0%)
Greatest absolute difference: 0.004077315330505371 at index (3957,) (up to 1e-05 allowed)
Greatest relative difference: 0.021345988140694876 at index (65,) (up to 1e-05 allowed)
  _check_trace(


In [74]:
%timeit -n 1 -r 30  output = traced_model(torch.randint(low=0, high=88, size=(4096,22), dtype=torch.long))


RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
<eval_with_key>.34 from /home/yssun/pytorch-fm/torchfm/model/test_fx/exp/../../wd.py:22 in forward(6): forward
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/torch/nn/modules/module.py(1522): _slow_forward
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/torch/nn/modules/module.py(1541): _call_impl
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/torch/nn/modules/module.py(1532): _wrapped_call_impl
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/torch/fx/graph_module.py(304): __call__
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/torch/fx/graph_module.py(737): call_wrapped
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/torch/jit/_trace.py(1088): trace_module
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/torch/jit/_trace.py(820): trace
/tmp/ipykernel_3938445/35299551.py(1): <module>
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/IPython/core/interactiveshell.py(3526): run_code
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/IPython/core/interactiveshell.py(3466): run_ast_nodes
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/IPython/core/interactiveshell.py(3284): run_cell_async
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/IPython/core/async_helpers.py(129): _pseudo_sync_runner
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/IPython/core/interactiveshell.py(3079): _run_cell
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/IPython/core/interactiveshell.py(3024): run_cell
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel/zmqshell.py(549): run_cell
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel/ipkernel.py(429): do_execute
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel/kernelbase.py(767): execute_request
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel/kernelbase.py(429): dispatch_shell
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel/kernelbase.py(523): process_one
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel/kernelbase.py(534): dispatch_queue
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/asyncio/events.py(80): _run
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/asyncio/base_events.py(1905): _run_once
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/asyncio/base_events.py(601): run_forever
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/tornado/platform/asyncio.py(195): start
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel/kernelapp.py(701): start
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/traitlets/config/application.py(1075): launch_instance
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/site-packages/ipykernel_launcher.py(17): <module>
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/runpy.py(87): _run_code
/home/yssun/miniconda3/envs/deepctr-torch/lib/python3.9/runpy.py(197): _run_module_as_main
RuntimeError: The size of tensor a (22) must match the size of tensor b (34) at non-singleton dimension 1


In [68]:
interp = utils.ProfilingInterpreter(ori)
interp.run(torch.randint(low=0, high=88, size=(4096,22), dtype=torch.long))
print(interp.summary(True))

total true time 25.59208869934082 ms
total time: 25.890588760375977 ms
Op type        Op                     Average runtime (ms)    Pct total runtime
-------------  -------------------  ----------------------  -------------------
call_module    mlp_mlp_0                         9.8083              37.8837
call_module    mlp_mlp_2                         3.25298             12.5643
call_module    embedding_embedding               3.01933             11.6619
call_module    mlp_mlp_3                         2.33793              9.03005
call_module    mlp_mlp_1                         1.52564              5.89265
call_function  add                               1.27602              4.92849
call_module    mlp_mlp_6                         1.13153              4.37045
call_function  add_1                             0.720024             2.78103
call_module    mlp_mlp_5                         0.534058             2.06275
call_module    linear_fc                         0.52166              

In [73]:
%timeit -n 1 -r 30 with torch.no_grad(): output = modify(torch.randint(low=0, high=88, size=(4096,22), dtype=torch.long))

27.9 ms ± 3.61 ms per loop (mean ± std. dev. of 30 runs, 1 loop each)


In [71]:
interp = utils.ProfilingInterpreter(modify)
interp.run(torch.randint(low=0, high=88, size=(4096,22), dtype=torch.long))
print(interp.summary(True))

total true time 17.078638076782227 ms
total time: 17.4102783203125 ms
Op type        Op                   Average runtime (ms)    Pct total runtime
-------------  -----------------  ----------------------  -------------------
call_module    unredency_linear                5.36466             30.8132
call_module    mlp_mlp_3                       3.5882              20.6097
call_module    mlp_mlp_6                       1.33348              7.65913
call_module    mlp_mlp_2                       1.20568              6.92512
call_module    mlp_mlp_5                       0.722408             4.14932
call_module    embed_1                         0.691414             3.9713
call_function  add_3                           0.654697             3.76041
call_module    mlp_mlp_1                       0.566959             3.25646
call_module    mlp_mlp_8                       0.531673             3.05379
call_module    linear_fc                       0.447273             2.56902
call_function  ad

In [57]:
ori, modify = workload_wdl(34,29)

now gen workload of wdl with config: dim: 64, num_field: 34, prefix: 29


In [58]:
traced_model = torch.jit.trace(ori, torch.randint(low=0, high=88, size=(4096,34), dtype=torch.long))


Tensor-likes are not close!

Mismatched elements: 2613 / 4096 (63.8%)
Greatest absolute difference: 0.004204422235488892 at index (1427,) (up to 1e-05 allowed)
Greatest relative difference: 0.016725226649988593 at index (547,) (up to 1e-05 allowed)
  _check_trace(


In [60]:
%timeit -n 1 -r 30  output = traced_model(torch.randint(low=0, high=88, size=(4096,34), dtype=torch.long))


19.3 ms ± 2.51 ms per loop (mean ± std. dev. of 30 runs, 1 loop each)


In [61]:
traced_model = torch.jit.trace(modify, torch.randint(low=0, high=88, size=(4096,34), dtype=torch.long))


Tensor-likes are not close!

Mismatched elements: 2490 / 4096 (60.8%)
Greatest absolute difference: 0.00400996208190918 at index (2898,) (up to 1e-05 allowed)
Greatest relative difference: 0.01629094860381473 at index (1725,) (up to 1e-05 allowed)
  _check_trace(


In [64]:
%timeit -n 1 -r 30  output = traced_model(torch.randint(low=0, high=88, size=(4096,34), dtype=torch.long))


13.8 ms ± 7.42 ms per loop (mean ± std. dev. of 30 runs, 1 loop each)


In [8]:
gen_and_test(num_field = 100,prefix = 10, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 10, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 10


Tensor-likes are not close!

Mismatched elements: 1566 / 4096 (38.2%)
Greatest absolute difference: 0.004455745220184326 at index (2393,) (up to 1e-05 allowed)
Greatest relative difference: 0.019936104650030425 at index (1413,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:14:44 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:14:46 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:14:46 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(26.067006587982178, 19.1510434142117)


Tensor-likes are not close!

Mismatched elements: 1650 / 4096 (40.3%)
Greatest absolute difference: 0.00403904914855957 at index (2577,) (up to 1e-05 allowed)
Greatest relative difference: 0.02018615143973462 at index (3747,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:14:47 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:14:50 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:14:50 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(24.900972843170166, 1.3302087181443767)


(<torch.profiler.profiler.profile at 0x7f254750b310>,
 <torch.profiler.profiler.profile at 0x7f24cc2274f0>)

In [17]:
gen_and_test(num_field = 100,prefix = 20, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 20, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 20


Tensor-likes are not close!

Mismatched elements: 1651 / 4096 (40.3%)
Greatest absolute difference: 0.0039713382720947266 at index (378,) (up to 1e-05 allowed)
Greatest relative difference: 0.019091199539618176 at index (2522,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:24 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:26 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:26 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(21.67973279953003, 1.0849808369300717)


Tensor-likes are not close!

Mismatched elements: 1605 / 4096 (39.2%)
Greatest absolute difference: 0.003366798162460327 at index (3689,) (up to 1e-05 allowed)
Greatest relative difference: 0.016472350362389414 at index (1286,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:27 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:28 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:28 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(18.238983154296875, 0.864354277291568)


(<torch.profiler.profiler.profile at 0x7f232bcfdc40>,
 <torch.profiler.profiler.profile at 0x7f231e10b130>)

In [16]:
gen_and_test(num_field = 100,prefix = 30, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 30, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 30


Tensor-likes are not close!

Mismatched elements: 1672 / 4096 (40.8%)
Greatest absolute difference: 0.006113529205322266 at index (1567,) (up to 1e-05 allowed)
Greatest relative difference: 0.020976524688257496 at index (3334,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:18 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:20 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:20 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(19.904286861419678, 1.688579978525695)


Tensor-likes are not close!

Mismatched elements: 1613 / 4096 (39.4%)
Greatest absolute difference: 0.0040863752365112305 at index (2269,) (up to 1e-05 allowed)
Greatest relative difference: 0.022751832695967422 at index (2640,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:20 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:22 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:22 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(16.637537479400635, 1.3512566771339607)


(<torch.profiler.profiler.profile at 0x7f234006e8e0>,
 <torch.profiler.profiler.profile at 0x7f2338d86dc0>)

In [15]:
gen_and_test(num_field = 100,prefix = 40, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 40, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 40


Tensor-likes are not close!

Mismatched elements: 1682 / 4096 (41.1%)
Greatest absolute difference: 0.00382271409034729 at index (1540,) (up to 1e-05 allowed)
Greatest relative difference: 0.02211447155755267 at index (742,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:12 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:14 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:14 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(20.639586448669434, 1.226675518068987)


Tensor-likes are not close!

Mismatched elements: 1620 / 4096 (39.6%)
Greatest absolute difference: 0.0037133991718292236 at index (3918,) (up to 1e-05 allowed)
Greatest relative difference: 0.017600844298210596 at index (3515,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:16 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:17 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:17 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(12.92841911315918, 0.6068356231935468)


(<torch.profiler.profiler.profile at 0x7f24a84b3b50>,
 <torch.profiler.profiler.profile at 0x7f234d819fd0>)

In [14]:
gen_and_test(num_field = 100,prefix = 50, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 50, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 50


Tensor-likes are not close!

Mismatched elements: 1651 / 4096 (40.3%)
Greatest absolute difference: 0.0038703083992004395 at index (522,) (up to 1e-05 allowed)
Greatest relative difference: 0.01735371815888012 at index (360,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:07 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:09 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:09 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(20.556962490081787, 0.8857728145983401)


Tensor-likes are not close!

Mismatched elements: 1615 / 4096 (39.4%)
Greatest absolute difference: 0.0036076605319976807 at index (1555,) (up to 1e-05 allowed)
Greatest relative difference: 0.017785871534745576 at index (1153,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:10 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:11 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:11 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(12.306792736053467, 1.3778062876269814)


(<torch.profiler.profiler.profile at 0x7f2368d86a00>,
 <torch.profiler.profiler.profile at 0x7f23624ef100>)

In [13]:
gen_and_test(num_field = 100,prefix = 60, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 60, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 60


Tensor-likes are not close!

Mismatched elements: 1651 / 4096 (40.3%)
Greatest absolute difference: 0.004994094371795654 at index (2430,) (up to 1e-05 allowed)
Greatest relative difference: 0.020447587005213038 at index (4,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:02 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:04 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:04 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(23.962390422821045, 0.5046486380422266)


Tensor-likes are not close!

Mismatched elements: 1689 / 4096 (41.2%)
Greatest absolute difference: 0.004037082195281982 at index (524,) (up to 1e-05 allowed)
Greatest relative difference: 0.019295802035897272 at index (1214,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:16:05 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:16:06 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:16:06 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(11.043789386749268, 1.643301379198192)


(<torch.profiler.profiler.profile at 0x7f238447d4f0>,
 <torch.profiler.profiler.profile at 0x7f2377068d00>)

In [18]:
gen_and_test(num_field = 100,prefix = 70, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 70, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 70


Tensor-likes are not close!

Mismatched elements: 1634 / 4096 (39.9%)
Greatest absolute difference: 0.003493666648864746 at index (2801,) (up to 1e-05 allowed)
Greatest relative difference: 0.015632367016009705 at index (2507,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:14 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:16 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:16 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(23.23714256286621, 1.6776125875367143)


Tensor-likes are not close!

Mismatched elements: 1678 / 4096 (41.0%)
Greatest absolute difference: 0.004688680171966553 at index (1536,) (up to 1e-05 allowed)
Greatest relative difference: 0.0178786671660291 at index (3290,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:17 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:18 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:18 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(9.00054931640625, 0.9987564634684531)


(<torch.profiler.profiler.profile at 0x7f2316aa59a0>,
 <torch.profiler.profiler.profile at 0x7f2309534640>)

In [19]:
gen_and_test(num_field = 100,prefix = 80, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 80, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 80


Tensor-likes are not close!

Mismatched elements: 1660 / 4096 (40.5%)
Greatest absolute difference: 0.0036106109619140625 at index (2050,) (up to 1e-05 allowed)
Greatest relative difference: 0.01957697309877473 at index (3573,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:30 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:32 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:32 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(21.360960006713867, 3.6617048039261135)


Tensor-likes are not close!

Mismatched elements: 1617 / 4096 (39.5%)
Greatest absolute difference: 0.00388413667678833 at index (255,) (up to 1e-05 allowed)
Greatest relative difference: 0.019172057702368798 at index (2887,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:34 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:35 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:35 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(7.939450740814209, 0.8209464084018236)


(<torch.profiler.profiler.profile at 0x7f22fbdc1ca0>,
 <torch.profiler.profiler.profile at 0x7f22f4848730>)

In [20]:
gen_and_test(num_field = 100,prefix = 90, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 90, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 90


Tensor-likes are not close!

Mismatched elements: 1601 / 4096 (39.1%)
Greatest absolute difference: 0.003227740526199341 at index (228,) (up to 1e-05 allowed)
Greatest relative difference: 0.01610973706502481 at index (697,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:35 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:38 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:38 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(21.208510398864746, 1.0696661693373244)


Tensor-likes are not close!

Mismatched elements: 1626 / 4096 (39.7%)
Greatest absolute difference: 0.0037603378295898438 at index (461,) (up to 1e-05 allowed)
Greatest relative difference: 0.015071350386513624 at index (2446,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:38 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:39 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:39 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(6.792166233062744, 1.0899073198459064)


(<torch.profiler.profiler.profile at 0x7f22e7359850>,
 <torch.profiler.profiler.profile at 0x7f22dfdb9880>)

In [21]:
gen_and_test(num_field = 100,prefix = 99, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 99, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 100, prefix: 99


Tensor-likes are not close!

Mismatched elements: 1627 / 4096 (39.7%)
Greatest absolute difference: 0.0038437247276306152 at index (2776,) (up to 1e-05 allowed)
Greatest relative difference: 0.015229304932848759 at index (1862,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:40 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:42 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:42 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(21.565823554992676, 0.42311978038469533)


Tensor-likes are not close!

Mismatched elements: 1628 / 4096 (39.7%)
Greatest absolute difference: 0.003881394863128662 at index (3828,) (up to 1e-05 allowed)
Greatest relative difference: 0.02001201582697765 at index (217,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-06 22:23:43 1631212:1631212 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-06 22:23:43 1631212:1631212 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-06 22:23:43 1631212:1631212 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(5.613346099853516, 0.31016451280265755)


(<torch.profiler.profiler.profile at 0x7f22d25daa60>,
 <torch.profiler.profiler.profile at 0x7f22c50faf40>)

In [10]:
def gen_and_test_profile(num_field = 22,prefix = 10, batch = 4096, dim = 32, workload_func = workload_wdl,l = [1024,512,256]):
  def run(model):
    traced_model = torch.jit.trace(model, torch.randint(low=0, high=88, size=(batch,num_field), dtype=torch.long))
    compiled_model = torch.compile(traced_model, backend="inductor")
    compiled_model.eval()
    total_time = []
    t = torch.randint(low=0, high=88, size=(batch ,num_field), dtype=torch.long)

    with profile(activities=[ProfilerActivity.CPU],  record_shapes=True,on_trace_ready=torch.profiler.tensorboard_trace_handler('./log_wdl_modify') ,with_stack=True ) as prof:
        with record_function("model_inference"):
          for i in range(100):
              start_time = time.time()  # 开始计时
              with torch.no_grad():
                soutput = compiled_model(t)
              end_time = time.time()  # 结束计时
              
              # 计算并打印函数执行所需的时间
              elapsed_time = end_time - start_time
              total_time.append(elapsed_time * 1000)
    print(calculate_mean_and_variance_manual(total_time))
    return prof
  print(f"now gen workload of wdl with config: dim: {dim}, num_field: {num_field}, prefix: {prefix}, batch :{batch}")
  ori, modify = workload_func(num_field,prefix,dim,l)
  p1 = run(ori)
  p2 = run(modify)
  return p1, p2

In [13]:
gen_and_test_profile(num_field = 22,prefix = 10, batch = 1024, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10, batch :1024
now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 721 / 1024 (70.4%)
Greatest absolute difference: 0.0037140846252441406 at index (367,) (up to 1e-05 allowed)
Greatest relative difference: 0.015452480478401046 at index (701,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-08 16:08:36 4142457:4142457 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-08 16:08:37 4142457:4142457 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-08 16:08:37 4142457:4142457 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(5.275027751922607, 0.6495660241569112)


Tensor-likes are not close!

Mismatched elements: 756 / 1024 (73.8%)
Greatest absolute difference: 0.005004376173019409 at index (754,) (up to 1e-05 allowed)
Greatest relative difference: 0.02146991363127327 at index (493,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-08 16:08:37 4142457:4142457 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-08 16:08:38 4142457:4142457 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-08 16:08:38 4142457:4142457 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(10.729498863220215, 1.5112201601596098)


(<torch.profiler.profiler.profile at 0x7fb0c01ecfd0>,
 <torch.profiler.profiler.profile at 0x7fb0580cf460>)

In [14]:
gen_and_test_profile(num_field = 22,prefix = 10, batch = 2048, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10, batch :2048
now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 1440 / 2048 (70.3%)
Greatest absolute difference: 0.003741443157196045 at index (311,) (up to 1e-05 allowed)
Greatest relative difference: 0.017083407840963287 at index (775,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-08 16:08:46 4142457:4142457 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-08 16:08:47 4142457:4142457 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-08 16:08:47 4142457:4142457 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(7.576448917388916, 1.4887320131208526)


Tensor-likes are not close!

Mismatched elements: 1495 / 2048 (73.0%)
Greatest absolute difference: 0.002955496311187744 at index (1144,) (up to 1e-05 allowed)
Greatest relative difference: 0.014663245952207967 at index (619,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-08 16:08:48 4142457:4142457 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-08 16:08:50 4142457:4142457 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-08 16:08:50 4142457:4142457 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(15.046193599700928, 0.9352638957068393)


(<torch.profiler.profiler.profile at 0x7faf07bba0d0>,
 <torch.profiler.profiler.profile at 0x7faf05686550>)

In [15]:
gen_and_test_profile(num_field = 22,prefix = 10, batch = 4096, dim = 32)

now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10, batch :4096
now gen workload of wdl with config: dim: 32, num_field: 22, prefix: 10


Tensor-likes are not close!

Mismatched elements: 2916 / 4096 (71.2%)
Greatest absolute difference: 0.004583805799484253 at index (2336,) (up to 1e-05 allowed)
Greatest relative difference: 0.016851423039309716 at index (3936,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-08 16:09:12 4142457:4142457 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-08 16:09:13 4142457:4142457 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-08 16:09:13 4142457:4142457 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(13.041141033172607, 3.176675242565352)


Tensor-likes are not close!

Mismatched elements: 2924 / 4096 (71.4%)
Greatest absolute difference: 0.005351066589355469 at index (2990,) (up to 1e-05 allowed)
Greatest relative difference: 0.02312557945766021 at index (1214,) (up to 1e-05 allowed)
  _check_trace(
STAGE:2025-02-08 16:09:14 4142457:4142457 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-08 16:09:16 4142457:4142457 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-08 16:09:16 4142457:4142457 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


(20.12953758239746, 1.1175145084507676)


(<torch.profiler.profiler.profile at 0x7facce01d340>,
 <torch.profiler.profiler.profile at 0x7faccc48a040>)