Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WASM] Implement movemask routine #131980

Open
Validark opened this issue Mar 19, 2025 · 1 comment
Open

[WASM] Implement movemask routine #131980

Validark opened this issue Mar 19, 2025 · 1 comment

Comments

@Validark
Copy link

Validark commented Mar 19, 2025

Currently LLVM does not recognize movemasks. Godbolt link

export fn z(x: @Vector(64, u8)) u64 {
    return @bitCast(x == @as(@Vector(64, u8), @splat(' ')));
}

LLVM IR:

define dso_local i64 @z(<64 x i8> %0) local_unnamed_addr {
Entry:
  %1 = icmp eq <64 x i8> %0, <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
  %2 = bitcast <64 x i1> %1 to i64
  ret i64 %2
}

Emit:

z:
        local.get       0
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       1
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       3
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       2
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       4
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       5
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       7
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       6
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       12
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       13
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       15
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       14
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       8
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       9
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       11
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       10
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       65535
        i32.and 
        local.get       16
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       17
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       19
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       18
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       20
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       21
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       23
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       22
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       28
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       29
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       31
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       30
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       24
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       25
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       27
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       26
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       16
        i32.shl 
        i32.or  
        i64.extend_i32_u
        local.get       32
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       33
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       35
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       34
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       36
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       37
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       39
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       38
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       44
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       45
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       47
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       46
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       40
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       41
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       43
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       42
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       65535
        i32.and 
        local.get       48
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       49
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       51
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       50
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       52
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       53
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       55
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       54
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       60
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       61
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       63
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       62
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       56
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       57
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       59
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       58
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       16
        i32.shl 
        i32.or  
        i64.extend_i32_u
        i64.const       32
        i64.shl 
        i64.or  
        end_function

I believe the instructions to use were implemented here:
WebAssembly/simd#201

@llvmbot
Copy link
Member

llvmbot commented Mar 19, 2025

@llvm/issue-subscribers-backend-webassembly

Author: Niles Salter (Validark)

Currently LLVM does not recognize movemasks. [Godbolt link](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:'export+fn+z(x:+@Vector(64,+u8))+u64+%7B%0A++++return+@bitCast(x+%3D%3D+@as(@Vector(64,+u8),+@splat(!'+!')))%3B%0A%7D%0A'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:50,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:ztrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:2,lang:zig,libs:!(),options:'-O+ReleaseFast+-target+wasm64-linux',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+trunk+(Editor+%231)',t:'0')),header:(),k:50,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
export fn z(x: @<!-- -->Vector(64, u8)) u64 {
    return @<!-- -->bitCast(x == @<!-- -->as(@<!-- -->Vector(64, u8), @<!-- -->splat(' ')));
}

LLVM IR:

define dso_local i64 @<!-- -->z(&lt;64 x i8&gt; %0) local_unnamed_addr {
Entry:
  %1 = icmp eq &lt;64 x i8&gt; %0, &lt;i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32&gt;
  %2 = bitcast &lt;64 x i1&gt; %1 to i64
  ret i64 %2
}

Emit:

z:
        local.get       0
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       1
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       3
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       2
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       4
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       5
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       7
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       6
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       12
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       13
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       15
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       14
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       8
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       9
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       11
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       10
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       65535
        i32.and 
        local.get       16
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       17
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       19
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       18
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       20
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       21
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       23
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       22
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       28
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       29
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       31
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       30
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       24
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       25
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       27
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       26
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       16
        i32.shl 
        i32.or  
        i64.extend_i32_u
        local.get       32
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       33
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       35
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       34
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       36
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       37
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       39
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       38
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       44
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       45
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       47
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       46
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       40
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       41
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       43
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       42
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       65535
        i32.and 
        local.get       48
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       49
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       51
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       50
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        local.get       52
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       53
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       55
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       54
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       4
        i32.shl 
        i32.or  
        local.get       60
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       61
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       63
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       62
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       12
        i32.shl 
        local.get       56
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        local.get       57
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       1
        i32.shl 
        i32.or  
        local.get       59
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       3
        i32.shl 
        local.get       58
        i32.const       255
        i32.and 
        i32.const       32
        i32.eq  
        i32.const       2
        i32.shl 
        i32.or  
        i32.or  
        i32.const       8
        i32.shl 
        i32.or  
        i32.or  
        i32.const       16
        i32.shl 
        i32.or  
        i64.extend_i32_u
        i64.const       32
        i64.shl 
        i64.or  
        end_function

I believe the instructions to use were implemented here:
WebAssembly/simd#201

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

3 participants