Skip to content

reshape fails with RuntimeError: old_rfactor.size() == new_rfactor.size() INTERNAL ASSERT FAILED at "/workspace/Fuser/csrc/dynamic_transform.cpp":652 #1691

@nikitaved

Description

@nikitaved

The following code

import torch                                                                                                           
from nvfuser import FusionDefinition, DataType                                                                         
                                                                                                                       
def nvfuser_fusion_id9(fd : FusionDefinition) -> None :                                                                                                                                                                                       
    T0 = fd.define_tensor(shape=[1, -1, -1], contiguity=[None, True, True], dtype=DataType.Float, is_cpu=False, stride_order=[2, 1, 0])
    T1 = fd.define_tensor(shape=[-1, -1], contiguity=[True, True], dtype=DataType.Float, is_cpu=False, stride_order=[1, 0])
    T2 = fd.ops.sum(T1, axes=[1], keepdim=False, dtype=DataType.Null)                                                                                                                                                                         
    T3 = fd.ops.sum(T0, axes=[1, 0], keepdim=False, dtype=DataType.Null)                                                                                                                                                                      
    S4 = fd.define_scalar(4, dtype=DataType.Int)                                                                                                                                                                                              
    V5 = fd.define_vector([S4], dtype=DataType.Int)                                                                                                                                                                                           
    T6 = fd.ops.reshape(T2, new_shape=V5)                                                                                                                                                                                                     
    S7 = fd.define_scalar(4, dtype=DataType.Int)                                                                       
    V8 = fd.define_vector([S7], dtype=DataType.Int)                                                                                                                                                                                           
    T9 = fd.ops.reshape(T3, new_shape=V8)                                                                              
    T10 = fd.ops.mul(T6, T9)                                                                                                                                                                                                                  
    T11 = fd.ops.sum(T10, axes=[0], keepdim=False, dtype=DataType.Null)                                                
    fd.add_output(T11)                                                                                                                                                                                                                        
                                                                                                                       
with FusionDefinition() as fd:                                                                                                                                                                                                                
    nvfuser_fusion_id9(fd)                                                                                                                                                                                                                    
                                                                                                                                                                                                                                              
inputs = [                                                                                                                                                                                                                                        torch.randn((12,), dtype=torch.float32, device='cuda:0').as_strided((1, 3, 4), (12, 4, 1)),            
    torch.randn((12,), dtype=torch.float32, device='cuda:0').as_strided((4, 3), (3, 1)),                                                                                                                                                      
]                                                                                                                      
fd.execute(inputs)

fails with

RuntimeError: old_rfactor.size() == new_rfactor.size() INTERNAL ASSERT FAILED at "/workspace/Fuser/csrc/dynamic_transform.cpp":652, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues. Concretized re
shape rfactor size does not match symbolic rfactor

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions