diff --git a/kernel/x86_64/cscal_microk_bulldozer-2.c b/kernel/x86_64/cscal_microk_bulldozer-2.c index 3abffc4cfa..31451aa6cb 100644 --- a/kernel/x86_64/cscal_microk_bulldozer-2.c +++ b/kernel/x86_64/cscal_microk_bulldozer-2.c @@ -116,11 +116,11 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -208,11 +208,11 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -285,11 +285,11 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -330,11 +330,11 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", diff --git a/kernel/x86_64/cscal_microk_haswell-2.c b/kernel/x86_64/cscal_microk_haswell-2.c index 0a4eb683c2..a04a4c4aba 100644 --- a/kernel/x86_64/cscal_microk_haswell-2.c +++ b/kernel/x86_64/cscal_microk_haswell-2.c @@ -116,11 +116,11 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"0", "1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -208,9 +208,9 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 : "cc", // "0", "1", "%xmm0", "%xmm1", "%xmm2", "%xmm3", @@ -285,9 +285,9 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 : "cc", //"%0", "%1", "%xmm0", "%xmm1", "%xmm2", "%xmm3", @@ -329,12 +329,12 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" - : - : - "r" (n), // 0 - "r" (x), // 1 + : + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"0", "1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", diff --git a/kernel/x86_64/cscal_microk_steamroller-2.c b/kernel/x86_64/cscal_microk_steamroller-2.c index 8346e17483..e8073d485e 100644 --- a/kernel/x86_64/cscal_microk_steamroller-2.c +++ b/kernel/x86_64/cscal_microk_steamroller-2.c @@ -117,11 +117,11 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"0", "1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -208,12 +208,12 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" + : + "+r" (n), // 0 + "+r" (x) // 1 : - : - "r" (n), // 0 - "r" (x), // 1 "r" (alpha) // 2 - : "cc", //"0", "1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -286,11 +286,11 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -331,11 +331,11 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"0", "1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", diff --git a/kernel/x86_64/dscal_microk_bulldozer-2.c b/kernel/x86_64/dscal_microk_bulldozer-2.c index de53b0bc4b..096662781e 100644 --- a/kernel/x86_64/dscal_microk_bulldozer-2.c +++ b/kernel/x86_64/dscal_microk_bulldozer-2.c @@ -122,9 +122,9 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n1), // 0 - "r" (x), // 1 + "+r" (n1), // 0 + "+r" (x) // 1 + : "r" (alpha), // 2 "r" (n2) // 3 : "cc", @@ -188,9 +188,9 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n1), // 0 - "r" (x), // 1 + "+r" (n1), // 0 + "+r" (x) // 1 + : "r" (alpha), // 2 "r" (n2) // 3 : "cc", diff --git a/kernel/x86_64/dscal_microk_haswell-2.c b/kernel/x86_64/dscal_microk_haswell-2.c index e732a27181..77ed59a4e3 100644 --- a/kernel/x86_64/dscal_microk_haswell-2.c +++ b/kernel/x86_64/dscal_microk_haswell-2.c @@ -122,9 +122,9 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n1), // 0 - "r" (x), // 1 + "+r" (n1), // 0 + "+r" (x) // 1 + : "r" (alpha), // 2 "r" (n2) // 3 : "cc", @@ -187,10 +187,10 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" + : + "+r" (n1), // 0 + "+r" (x) // 1 : - : - "r" (n1), // 0 - "r" (x), // 1 "r" (alpha), // 2 "r" (n2) // 3 : "cc", diff --git a/kernel/x86_64/dscal_microk_sandy-2.c b/kernel/x86_64/dscal_microk_sandy-2.c index 8d855072b4..9982b8e587 100644 --- a/kernel/x86_64/dscal_microk_sandy-2.c +++ b/kernel/x86_64/dscal_microk_sandy-2.c @@ -122,9 +122,9 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n1), // 0 - "r" (x), // 1 + "+r" (n1), // 0 + "+r" (x) // 1 + : "r" (alpha), // 2 "r" (n2) // 3 : "cc", @@ -187,10 +187,10 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" + : + "+r" (n1), // 0 + "+r" (x) // 1 : - : - "r" (n1), // 0 - "r" (x), // 1 "r" (alpha), // 2 "r" (n2) // 3 : "cc", diff --git a/kernel/x86_64/zscal_microk_bulldozer-2.c b/kernel/x86_64/zscal_microk_bulldozer-2.c index 03882d6b66..5e733ffdae 100644 --- a/kernel/x86_64/zscal_microk_bulldozer-2.c +++ b/kernel/x86_64/zscal_microk_bulldozer-2.c @@ -116,11 +116,11 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -208,11 +208,11 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -285,9 +285,9 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 : "cc", //"%0", "%1", "%xmm0", "%xmm1", "%xmm2", "%xmm3", @@ -329,10 +329,10 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" + : + "+r" (n), // 0 + "+r" (x) // 1 : - : - "r" (n), // 0 - "r" (x), // 1 "r" (alpha) // 2 : "cc", //"%0", "%1", "%xmm0", "%xmm1", "%xmm2", "%xmm3", diff --git a/kernel/x86_64/zscal_microk_haswell-2.c b/kernel/x86_64/zscal_microk_haswell-2.c index d9253c1ed5..8c8f5b75cb 100644 --- a/kernel/x86_64/zscal_microk_haswell-2.c +++ b/kernel/x86_64/zscal_microk_haswell-2.c @@ -116,11 +116,11 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -208,11 +208,11 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -285,11 +285,11 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -330,11 +330,11 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", diff --git a/kernel/x86_64/zscal_microk_steamroller-2.c b/kernel/x86_64/zscal_microk_steamroller-2.c index 97b07add65..c9267ee0c3 100644 --- a/kernel/x86_64/zscal_microk_steamroller-2.c +++ b/kernel/x86_64/zscal_microk_steamroller-2.c @@ -116,12 +116,12 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" + : + "+r" (n), // 0 + "+r" (x) // 1 : - : - "r" (n), // 0 - "r" (x), // 1 "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -209,11 +209,11 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -286,11 +286,11 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", @@ -331,11 +331,11 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x) "vzeroupper \n\t" : - : - "r" (n), // 0 - "r" (x), // 1 + "+r" (n), // 0 + "+r" (x) // 1 + : "r" (alpha) // 2 - : "cc", //"%0", "%1", + : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11",