Permalink
Browse files

Optimization of AMR NB and WB decoders for MIPS

AMR NB and WB decoders are optimized for MIPS architecture.
Appropriate Makefiles are changed accordingly.

Cnfigure script is changed in order to support optimizations.
 Optimizations are enabled by default when compiling is done for
  mips architecture.
 Appropriate cflags are automatically set.
 Support for several mips CPUs is added in configure script.

New ffmpeg options are added for disabling optimizations.

The FFMPEG option --disable-mipsfpu disables MIPS floating point
 optimizations.
The FFMPEG option --disable-mips32r2 disables MIPS32R2
 optimizations.
The FFMPEG option --disable-mipsdspr1 disables MIPS DSP ASE R1
 optimizations.
The FFMPEG option --disable-mipsdspr2 disables MIPS DSP ASE R2
 optimizations.

Signed-off-by: Nedeljko Babic <nbabic@mips.com>
Reviewed-by: Vitor Sessak <vitor1001@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
  • Loading branch information...
1 parent 751dcd9 commit 3827a86eacd04d9d7b356f769be553f7b8cca361 @nbabic nbabic committed with michaelni Jun 4, 2012
View
@@ -70,6 +70,7 @@ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \
ARCH_HEADERS BUILT_HEADERS SKIPHEADERS \
ALTIVEC-OBJS ARMV5TE-OBJS ARMV6-OBJS ARMVFP-OBJS MMI-OBJS \
MMX-OBJS NEON-OBJS VIS-OBJS YASM-OBJS \
+ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS MIPS32R2-OBJS \
OBJS TESTOBJS
define RESET
View
@@ -4,6 +4,10 @@ OBJS-$(HAVE_ARMVFP) += $(ARMVFP-OBJS) $(ARMVFP-OBJS-yes)
OBJS-$(HAVE_NEON) += $(NEON-OBJS) $(NEON-OBJS-yes)
OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
+OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes)
+OBJS-$(HAVE_MIPS32R2) += $(MIPS32R2-OBJS) $(MIPS32R2-OBJS-yes)
+OBJS-$(HAVE_MIPSDSPR1) += $(MIPSDSPR1-OBJS) $(MIPSDSPR1-OBJS-yes)
+OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes)
OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
View
@@ -268,6 +268,10 @@ Optimization options (experts only):
--disable-neon disable NEON optimizations
--disable-vis disable VIS optimizations
--disable-yasm disable use of yasm assembler
+ --disable-mips32r2 disable MIPS32R2 optimizations
+ --disable-mipsdspr1 disable MIPS DSP ASE R1 optimizations
+ --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations
+ --disable-mipsfpu disable floating point MIPS optimizations
--postproc-version=V build libpostproc version V.
Where V can be '$ALT_PP_VER_MAJOR.$ALT_PP_VER_MINOR.$ALT_PP_VER_MICRO' or 'current'. [$postproc_version_default]
@@ -1149,6 +1153,10 @@ ARCH_EXT_LIST='
ssse3
vfpv3
vis
+ mipsfpu
+ mips32r2
+ mipsdspr1
+ mipsdspr2
'
HAVE_LIST_PUB='
@@ -1368,6 +1376,10 @@ armvfp_deps="arm"
neon_deps="arm"
vfpv3_deps="armvfp"
+mipsfpu_deps="mips"
+mips32r2_deps="mips"
+mipsdspr1_deps="mips"
+mipsdspr2_deps="mips"
mmi_deps="mips"
altivec_deps="ppc"
@@ -2567,6 +2579,28 @@ elif enabled mips; then
cpuflags="-march=$cpu"
+ case $cpu in
+ 24kc)
+ disable mipsfpu
+ disable mipsdspr1
+ disable mipsdspr2
+ ;;
+ 24kf*)
+ disable mipsdspr1
+ disable mipsdspr2
+ ;;
+ 24kec|34kc|1004kc)
+ disable mipsfpu
+ disable mipsdspr2
+ ;;
+ 24kef*|34kf*|1004kf*)
+ disable mipsdspr2
+ ;;
+ 74kc)
+ disable mipsfpu
+ ;;
+ esac
+
elif enabled avr32; then
case $cpu in
@@ -2942,6 +2976,15 @@ elif enabled mips; then
check_asm loongson '"dmult.g $1, $2, $3"'
enabled mmi && check_asm mmi '"lq $2, 0($2)"'
+ enabled mips32r2 && add_cflags "-mips32r2" &&
+ check_asm mips32r2 '"rotr $t0, $t1, 1"'
+ enabled mipsdspr1 && add_cflags "-mdsp" && add_asflags "-mdsp" &&
+ check_asm mipsdspr1 '"addu.qb $t0, $t1, $t2"'
+ enabled mipsdspr2 && add_cflags "-mdspr2" && add_asflags "-mdspr2" &&
+ check_asm mipsdspr2 '"absq_s.qb $t0, $t1"'
+ enabled mipsfpu && add_cflags "-mhard-float" &&
+ check_asm mipsfpu '"madd.d $f0, $f2, $f4, $f6"'
+
elif enabled ppc; then
@@ -3541,6 +3584,10 @@ if enabled arm; then
fi
if enabled mips; then
echo "MMI enabled ${mmi-no}"
+ echo "MIPS FPU enabled ${mipsfpu-no}"
+ echo "MIPS32R2 enabled ${mips32r2-no}"
+ echo "MIPS DSP R1 enabled ${mipsdspr1-no}"
+ echo "MIPS DSP R2 enabled ${mipsdspr2-no}"
fi
if enabled ppc; then
echo "AltiVec enabled ${altivec-no}"
@@ -142,3 +142,12 @@ void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
samples[0] -= tilt * *mem;
*mem = new_tilt_mem;
}
+
+void ff_acelp_filter_init(ACELPFContext *c)
+{
+ c->acelp_interpolatef = ff_acelp_interpolatef;
+ c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function;
+
+ if(HAVE_MIPSFPU)
+ ff_acelp_filter_init_mips(c);
+}
View
@@ -25,6 +25,39 @@
#include <stdint.h>
+typedef struct ACELPFContext {
+ /**
+ * Floating point version of ff_acelp_interpolate()
+ */
+ void (*acelp_interpolatef)(float *out, const float *in,
+ const float *filter_coeffs, int precision,
+ int frac_pos, int filter_length, int length);
+
+ /**
+ * Apply an order 2 rational transfer function in-place.
+ *
+ * @param out output buffer for filtered speech samples
+ * @param in input buffer containing speech data (may be the same as out)
+ * @param zero_coeffs z^-1 and z^-2 coefficients of the numerator
+ * @param pole_coeffs z^-1 and z^-2 coefficients of the denominator
+ * @param gain scale factor for final output
+ * @param mem intermediate values used by filter (should be 0 initially)
+ * @param n number of samples (should be a multiple of eight)
+ */
+ void (*acelp_apply_order_2_transfer_function)(float *out, const float *in,
+ const float zero_coeffs[2],
+ const float pole_coeffs[2],
+ float gain,
+ float mem[2], int n);
+
+}ACELPFContext;
+
+/**
+ * Initialize ACELPFContext.
+ */
+void ff_acelp_filter_init(ACELPFContext *c);
+void ff_acelp_filter_init_mips(ACELPFContext *c);
+
/**
* low-pass Finite Impulse Response filter coefficients.
*
@@ -260,3 +260,11 @@ void ff_clear_fixed_vector(float *out, const AMRFixed *in, int size)
} while (x < size && repeats);
}
}
+
+void ff_acelp_vectors_init(ACELPVContext *c)
+{
+ c->weighted_vector_sumf = ff_weighted_vector_sumf;
+
+ if(HAVE_MIPSFPU)
+ ff_acelp_vectors_init_mips(c);
+}
View
@@ -25,6 +25,30 @@
#include <stdint.h>
+typedef struct ACELPVContext {
+ /**
+ * float implementation of weighted sum of two vectors.
+ * @param[out] out result of addition
+ * @param in_a first vector
+ * @param in_b second vector
+ * @param weight_coeff_a first vector weight coefficient
+ * @param weight_coeff_a second vector weight coefficient
+ * @param length vectors length (should be a multiple of two)
+ *
+ * @note It is safe to pass the same buffer for out and in_a or in_b.
+ */
+ void (*weighted_vector_sumf)(float *out, const float *in_a, const float *in_b,
+ float weight_coeff_a, float weight_coeff_b,
+ int length);
+
+}ACELPVContext;
+
+/**
+ * Initialize ACELPVContext.
+ */
+void ff_acelp_vectors_init(ACELPVContext *c);
+void ff_acelp_vectors_init_mips(ACELPVContext *c);
+
/** Sparse representation for the algebraic codebook (fixed) vector */
typedef struct {
int n;
View
@@ -136,6 +136,11 @@ typedef struct AMRContext {
float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples
+ ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs
+ ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs
+ CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs
+ CELPMContext celpm_ctx; ///< context for fixed point math operations
+
} AMRContext;
/** Double version of ff_weighted_vector_sumf() */
@@ -171,6 +176,11 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx)
avcodec_get_frame_defaults(&p->avframe);
avctx->coded_frame = &p->avframe;
+ ff_acelp_filter_init(&p->acelpf_ctx);
+ ff_acelp_vectors_init(&p->acelpv_ctx);
+ ff_celp_filter_init(&p->celpf_ctx);
+ ff_celp_math_init(&p->celpm_ctx);
+
return 0;
}
@@ -214,15 +224,16 @@ static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf,
* Interpolate the LSF vector (used for fixed gain smoothing).
* The interpolation is done over all four subframes even in MODE_12k2.
*
+ * @param[in] ctx The Context
* @param[in,out] lsf_q LSFs in [0,1] for each subframe
* @param[in] lsf_new New LSFs in [0,1] for subframe 4
*/
-static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new)
+static void interpolate_lsf(ACELPVContext *ctx, float lsf_q[4][LP_FILTER_ORDER], float *lsf_new)
{
int i;
for (i = 0; i < 4; i++)
- ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new,
+ ctx->weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new,
0.25 * (3 - i), 0.25 * (i + 1),
LP_FILTER_ORDER);
}
@@ -266,7 +277,7 @@ static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER],
ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER);
if (update)
- interpolate_lsf(p->lsf_q, lsf_q);
+ interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q);
ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER);
}
@@ -329,7 +340,7 @@ static void lsf2lsp_3(AMRContext *p)
ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER);
// store data for computing the next frame's LSFs
- interpolate_lsf(p->lsf_q, lsf_q);
+ interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q);
memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r));
ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER);
@@ -395,7 +406,8 @@ static void decode_pitch_vector(AMRContext *p,
/* Calculate the pitch vector by interpolating the past excitation at the
pitch lag using a b60 hamming windowed sinc function. */
- ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int,
+ p->acelpf_ctx.acelp_interpolatef(p->excitation,
+ p->excitation + 1 - pitch_lag_int,
ff_b60_sinc, 6,
pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0),
10, AMR_SUBFRAME_SIZE);
@@ -780,12 +792,12 @@ static int synthesis(AMRContext *p, float *lpc,
for (i = 0; i < AMR_SUBFRAME_SIZE; i++)
p->pitch_vector[i] *= 0.25;
- ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector,
+ p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector,
p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE);
// emphasize pitch vector contribution
if (p->pitch_gain[4] > 0.5 && !overflow) {
- float energy = ff_dot_productf(excitation, excitation,
+ float energy = p->celpm_ctx.dot_productf(excitation, excitation,
AMR_SUBFRAME_SIZE);
float pitch_factor =
p->pitch_gain[4] *
@@ -800,7 +812,8 @@ static int synthesis(AMRContext *p, float *lpc,
AMR_SUBFRAME_SIZE);
}
- ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE,
+ p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation,
+ AMR_SUBFRAME_SIZE,
LP_FILTER_ORDER);
// detect overflow
@@ -846,10 +859,11 @@ static void update_state(AMRContext *p)
/**
* Get the tilt factor of a formant filter from its transfer function
*
+ * @param p The Context
* @param lpc_n LP_FILTER_ORDER coefficients of the numerator
* @param lpc_d LP_FILTER_ORDER coefficients of the denominator
*/
-static float tilt_factor(float *lpc_n, float *lpc_d)
+static float tilt_factor(AMRContext *p, float *lpc_n, float *lpc_d)
{
float rh0, rh1; // autocorrelation at lag 0 and 1
@@ -859,11 +873,12 @@ static float tilt_factor(float *lpc_n, float *lpc_d)
hf[0] = 1.0;
memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER);
- ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE,
+ p->celpf_ctx.celp_lp_synthesis_filterf(hf, lpc_d, hf,
+ AMR_TILT_RESPONSE,
LP_FILTER_ORDER);
- rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE);
- rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1);
+ rh0 = p->celpm_ctx.dot_productf(hf, hf, AMR_TILT_RESPONSE);
+ rh1 = p->celpm_ctx.dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1);
// The spec only specifies this check for 12.2 and 10.2 kbit/s
// modes. But in the ref source the tilt is always non-negative.
@@ -883,7 +898,7 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
int i;
float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input
- float speech_gain = ff_dot_productf(samples, samples,
+ float speech_gain = p->celpm_ctx.dot_productf(samples, samples,
AMR_SUBFRAME_SIZE);
float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter
@@ -904,16 +919,16 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
}
memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER);
- ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples,
+ p->celpf_ctx.celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples,
AMR_SUBFRAME_SIZE, LP_FILTER_ORDER);
memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE,
sizeof(float) * LP_FILTER_ORDER);
- ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n,
+ p->celpf_ctx.celp_lp_zero_synthesis_filterf(buf_out, lpc_n,
pole_out + LP_FILTER_ORDER,
AMR_SUBFRAME_SIZE, LP_FILTER_ORDER);
- ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out,
+ ff_tilt_compensation(&p->tilt_mem, tilt_factor(p, lpc_n, lpc_d), buf_out,
AMR_SUBFRAME_SIZE);
ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE,
@@ -990,7 +1005,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
p->fixed_gain[4] =
ff_amr_set_fixed_gain(fixed_gain_factor,
- ff_dot_productf(p->fixed_vector, p->fixed_vector,
+ p->celpm_ctx.dot_productf(p->fixed_vector, p->fixed_vector,
AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE,
p->prediction_error,
energy_mean[p->cur_frame_mode], energy_pred_fac);
@@ -1034,7 +1049,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
update_state(p);
}
- ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros,
+ p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out,
+ buf_out, highpass_zeros,
highpass_poles,
highpass_gain * AMR_SAMPLE_SCALE,
p->high_pass_mem, AMR_BLOCK_SIZE);
@@ -1045,7 +1061,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
* for fixed_gain_smooth.
* The specification has an incorrect formula: the reference decoder uses
* qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */
- ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
+ p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
0.84, 0.16, LP_FILTER_ORDER);
*got_frame_ptr = 1;
Oops, something went wrong.

0 comments on commit 3827a86

Please sign in to comment.