Skip to content

Commit

Permalink
work on simd
Browse files Browse the repository at this point in the history
  • Loading branch information
jsorg71 committed Nov 4, 2014
1 parent 649a7ad commit 61f6e92
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 108 deletions.
2 changes: 0 additions & 2 deletions include/rfxcodec_encode.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
#ifndef __RFXCODEC_ENCODE_H
#define __RFXCODEC_ENCODE_H

#define RFX_USE_ACCEL 0

#define RFX_FORMAT_BGRA 0
#define RFX_FORMAT_RGBA 1
#define RFX_FORMAT_BGR 2
Expand Down
3 changes: 2 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ rfxencode_rlgr1.o rfxencode_rlgr3.o
#OBJS += cpuid_amd64.o rfxrlgr1_amd64.o rfxrlgr3_amd64.o rfxdwt_amd64_sse2.o

CFLAGS = $(PROFIL) -g -O2 -Wall -fPIC -I../include
#-DRFX_USE_ACCEL
#-DRFX_USE_ACCEL_X86
#-DRFX_USE_ACCEL_AMD64

LDFLAGS =

Expand Down
5 changes: 4 additions & 1 deletion src/amd64/funcs_amd64.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ amd64 asm files
int
cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx);
int
dwt_shift_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
dwt_shift_amd64_sse2(const int *quantization_values, uint8 *data,
sint16 *dwt_buffer1, sint16 *dwt_buffer);
int
diff_rlgr1_amd64(sint16 *co, int num_co, uint8 *dst, int dst_bytes);
int
diff_rlgr3_amd64(sint16 *co, int num_co, uint8 *dst, int dst_bytes);

Expand Down
3 changes: 2 additions & 1 deletion src/amd64/rfxdwt_amd64_sse2.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ section .data
%endmacro

;int
;dwt_shift_amd64_sse2(const int* qtable, sint8* src, sint16* dst, sint16* temp)
;dwt_shift_amd64_sse2(const int *quantization_values, uint8 *data,
; sint16 *dwt_buffer1, sint16 *dwt_buffer);

PROC dwt_shift_amd64_sse2
; save registers
Expand Down
138 changes: 53 additions & 85 deletions src/rfxencode.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,103 +28,40 @@
#include "rfxconstants.h"
#include "rfxencode_tile.h"

/******************************************************************************/
static void
cpuid(int func, int *eax, int *ebx, int *ecx, int *edx)
{
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
#ifdef __GNUC__
#if defined(__i386__) || defined(__x86_64__)
*eax = func;
__asm volatile
(
"mov %%ebx, %%edi;"
"cpuid;"
"mov %%ebx, %%esi;"
"mov %%edi, %%ebx;"
:"+a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
: :"edi"
);
#endif
#endif
}

#if 0
inline unsigned int get_cpu_feature_flags()
{
unsigned int features;

__asm
{
// Save registers
push eax
push ebx
push ecx
push edx

// Get the feature flags (eax=1) from edx
mov eax, 1
cpuid
mov features, edx

// Restore registers
pop edx
pop ecx
pop ebx
pop eax
}

return features;
}

#define cpuid(func,a,b,c,d)\
asm {\
mov eax, func\
cpuid\
mov a, eax\
mov b, ebx\
mov c, ecx\
mov d, edx\
}

#ifdef RFX_USE_ACCEL_X86
#include "x86/funcs_x86.h"
#endif

// http://softpixel.com/~cwright/programming/simd/cpuid.php

#define SSE4_1_FLAG 0x080000
#define SSE4_2_FLAG 0x100000

/*
Function 0x80000001:
bit (edx) feature
22 AMD MMX Extensions
30 3DNow!2
31 3DNow!
*/

#if 0
#define cpuid(_func, _ax, _bx, _cx, _dx) \
__asm volatile ("cpuid": \
"=a" (_ax), "=b" (_bx), "=c" (_cx), "=d" (_dx) : "a" (_func));
#ifdef RFX_USE_ACCEL_AMD64
#include "amd64/funcs_amd64.h"
#endif

/******************************************************************************/
void *
rfxcodec_encode_create(int width, int height, int format, int flags)
{
struct rfxencode *enc;
int ax, bx, cx, dx;
int ax;
int bx;
int cx;
int dx;

enc = (struct rfxencode *) malloc(sizeof(struct rfxencode));
if (enc == 0)
{
return 0;
}
memset(enc, 0, sizeof(struct rfxencode));
cpuid(1, &ax, &bx, &cx, &dx);
#if defined(RFX_USE_ACCEL_X86)
cpuid_x86(1, 0, &ax, &bx, &cx, &dx);
#elif defined(RFX_USE_ACCEL_AMD64)
cpuid_amd64(1, 0, &ax, &bx, &cx, &dx);
#else
ax = 0;
bx = 0;
cx = 0;
dx = 0;
#endif
if (dx & (1 << 26)) /* SSE 2 */
{
printf("rfxcodec_encode_create: got sse2\n");
Expand All @@ -150,7 +87,16 @@ rfxcodec_encode_create(int width, int height, int format, int flags)
printf("rfxcodec_encode_create: got popcnt\n");
enc->got_popcnt = 1;
}
cpuid(0x80000001, &ax, &bx, &cx, &dx);
#if defined(RFX_USE_ACCEL_X86)
cpuid_x86(0x80000001, 0, &ax, &bx, &cx, &dx);
#elif defined(RFX_USE_ACCEL_AMD64)
cpuid_amd64(0x80000001, 0, &ax, &bx, &cx, &dx);
#else
ax = 0;
bx = 0;
cx = 0;
dx = 0;
#endif
if (cx & (1 << 5)) /* lzcnt */
{
printf("rfxcodec_encode_create: got lzcnt\n");
Expand Down Expand Up @@ -205,8 +151,24 @@ rfxcodec_encode_create(int width, int height, int format, int flags)
}
else
{
#if defined(RFX_USE_ACCEL) && RFX_USE_ACCEL
enc->rfx_encode = rfx_encode_component_x86_sse4; /* rfxencode_tile.c */
#if defined(RFX_USE_ACCEL_X86)
if (enc->mode == RLGR3)
{
enc->rfx_encode = rfx_encode_component_rlgr3_x86_sse2; /* rfxencode_tile.c */
}
else
{
enc->rfx_encode = rfx_encode_component_rlgr1_x86_sse2; /* rfxencode_tile.c */
}
#elif defined(RFX_USE_ACCEL_AMD64)
if (enc->mode == RLGR3)
{
enc->rfx_encode = rfx_encode_component_rlgr3_amd64_sse2; /* rfxencode_tile.c */
}
else
{
enc->rfx_encode = rfx_encode_component_rlgr1_amd64_sse2; /* rfxencode_tile.c */
}
#else
if (enc->mode == RLGR3)
{
Expand All @@ -218,7 +180,13 @@ rfxcodec_encode_create(int width, int height, int format, int flags)
}
#endif
}
return enc;
if (ax == 0)
{
}
if (bx == 0)
{
}
return enc;
}

/******************************************************************************/
Expand Down
78 changes: 71 additions & 7 deletions src/rfxencode_tile.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@
#include "rfxencode_rlgr1.h"
#include "rfxencode_rlgr3.h"

#ifdef RFX_USE_ACCEL_X86
#include "x86/funcs_x86.h"
#endif

#ifdef RFX_USE_ACCEL_AMD64
#include "amd64/funcs_amd64.h"
#endif

#define LLOG_LEVEL 1
#define LLOGLN(_level, _args) \
do { if (_level < LLOG_LEVEL) { printf _args ; printf("\n"); } } while (0)
Expand Down Expand Up @@ -199,14 +207,32 @@ rfx_encode_component_rlgr3(struct rfxencode *enc, const int *quantization_values

/******************************************************************************/
int
rfx_encode_component_x86_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size)
rfx_encode_component_rlgr1_x86_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size)
{
LLOGLN(10, ("rfx_encode_component_rlgr1_x86_sse2:"));
#if defined(RFX_USE_ACCEL_X86)
if (dwt_shift_x86_sse2(quantization_values, data, enc->dwt_buffer1,
enc->dwt_buffer) != 0)
{
return 1;
}
*size = diff_rlgr1_x86(enc->dwt_buffer1, 4096, buffer, buffer_size);
#endif
return 0;
}

/******************************************************************************/
int
rfx_encode_component_rlgr3_x86_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size)
{
LLOGLN(10, ("rfx_encode_component_x86_sse2:"));
#if defined(RFX_USE_ACCEL) && RFX_USE_ACCEL
/* put asm calls here */
LLOGLN(10, ("rfx_encode_component_rlgr3_x86_sse2:"));
#if defined(RFX_USE_ACCEL_X86)
if (dwt_shift_x86_sse2(quantization_values, data, enc->dwt_buffer1,
enc->dwt_buffer) != 0)
{
Expand All @@ -217,6 +243,44 @@ rfx_encode_component_x86_sse2(struct rfxencode *enc,
return 0;
}

/******************************************************************************/
int
rfx_encode_component_rlgr1_amd64_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size)
{
LLOGLN(10, ("rfx_encode_component_rlgr1_amd64_sse2:"));
#if defined(RFX_USE_ACCEL_AMD64)
if (dwt_shift_amd64_sse2(quantization_values, data, enc->dwt_buffer1,
enc->dwt_buffer) != 0)
{
return 1;
}
*size = diff_rlgr1_amd64(enc->dwt_buffer1, 4096, buffer, buffer_size);
#endif
return 0;
}

/******************************************************************************/
int
rfx_encode_component_rlgr3_amd64_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size)
{
LLOGLN(10, ("rfx_encode_component_rlgr3_amd64_sse2:"));
#if defined(RFX_USE_ACCEL_AMD64)
if (dwt_shift_amd64_sse2(quantization_values, data, enc->dwt_buffer1,
enc->dwt_buffer) != 0)
{
return 1;
}
*size = diff_rlgr3_amd64(enc->dwt_buffer1, 4096, buffer, buffer_size);
#endif
return 0;
}

/******************************************************************************/
int
rfx_encode_rgb(struct rfxencode *enc, char *rgb_data,
Expand Down
28 changes: 19 additions & 9 deletions src/rfxencode_tile.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,25 @@ rfx_encode_component_rlgr3(struct rfxencode *enc,
uint8 *data,
uint8 *buffer, int buffer_size, int *size);
int
rfx_encode_component_x86_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size);
int
rfx_encode_component_amd64_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size);
rfx_encode_component_rlgr1_x86_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size);
int
rfx_encode_component_rlgr3_x86_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size);
int
rfx_encode_component_rlgr1_amd64_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size);
int
rfx_encode_component_rlgr3_amd64_sse2(struct rfxencode *enc,
const int *quantization_values,
uint8 *data,
uint8 *buffer, int buffer_size, int *size);
int
rfx_encode_rgb(struct rfxencode *enc, char *rgb_data,
int width, int height, int stride_bytes,
Expand Down
5 changes: 4 additions & 1 deletion src/x86/funcs_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ x86 asm files
int
cpuid_x86(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx);
int
dwt_shift_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
dwt_shift_x86_sse2(const int *quantization_values, uint8 *data,
sint16 *dwt_buffer1, sint16 *dwt_buffer);
int
diff_rlgr1_x86(sint16 *co, int num_co, uint8 *dst, int dst_bytes);
int
diff_rlgr3_x86(sint16 *co, int num_co, uint8 *dst, int dst_bytes);

Expand Down
3 changes: 2 additions & 1 deletion src/x86/rfxdwt_x86_sse2.asm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ section .data
%endmacro

;int
;dwt_shift_x86_sse2(const int* qtable, sint8* src, sint16* dst, sint16* temp)
;dwt_shift_x86_sse2(const int *quantization_values, uint8 *data,
; sint16 *dwt_buffer1, sint16 *dwt_buffer);

PROC dwt_shift_x86_sse2
push ebx
Expand Down

0 comments on commit 61f6e92

Please sign in to comment.