Skip to content

Commit

Permalink
AE: fix passthrough for truehd
Browse files Browse the repository at this point in the history
  • Loading branch information
FernetMenta committed Nov 30, 2015
1 parent 02f0b62 commit adfa2b8
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 5 deletions.
5 changes: 4 additions & 1 deletion xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp
Expand Up @@ -74,7 +74,10 @@ void CEngineStats::AddSamples(int samples, std::list<CActiveAEStream*> &streams)
std::deque<CSampleBuffer*>::iterator itBuf;
for(itBuf=(*it)->m_processingSamples.begin(); itBuf!=(*it)->m_processingSamples.end(); ++itBuf)
{
delay += (float)(*itBuf)->pkt->nb_samples / (*itBuf)->pkt->config.sample_rate;
if (m_pcmOutput)
delay += (float)(*itBuf)->pkt->nb_samples / (*itBuf)->pkt->config.sample_rate;
else
delay += m_sinkFormat.m_streamInfo.GetDuration() / 1000;

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 2, 2015

That's already seconds isn't it? Why the / 1000?

This comment has been minimized.

Copy link
@FernetMenta

FernetMenta Dec 3, 2015

Author Owner

getDuration is in ms

}
delay += (*it)->m_resampleBuffers->GetDelay();
(*it)->m_bufferedTime = delay;
Expand Down
16 changes: 15 additions & 1 deletion xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAESink.cpp
Expand Up @@ -895,7 +895,21 @@ unsigned int CActiveAESink::OutputSamples(CSampleBuffer* samples)

if (m_requestedFormat.m_dataFormat == AE_FMT_RAW && m_needIecPack && samples->pool)
{
m_packer->Pack(m_sinkFormat.m_streamInfo, buffer[0], frames);
if (m_sinkFormat.m_streamInfo.m_type == CAEStreamInfo::STREAM_TYPE_TRUEHD)
{
int offset;
int len;
m_packer->GetBuffer();
for (int i=0; i<24; i++)
{
offset = i*2560;
len = (*(buffer[0] + offset+2560-2) << 8) + *(buffer[0] + offset+2560-1);
m_packer->Pack(m_sinkFormat.m_streamInfo, buffer[0] + offset, len);
}
}
else
m_packer->Pack(m_sinkFormat.m_streamInfo, buffer[0], frames);

unsigned int size = m_packer->GetSize();
packBuffer = m_packer->GetBuffer();
buffer = &packBuffer;
Expand Down
30 changes: 27 additions & 3 deletions xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp
Expand Up @@ -263,13 +263,37 @@ unsigned int CActiveAEStream::AddData(uint8_t* const *data, unsigned int offset,
}
copied += minFrames;

bool rawPktComplete = false;
{
CSingleLock lock(*m_statsLock);
m_currentBuffer->pkt->nb_samples += minFrames;
m_bufferedTime += (double)minFrames / m_currentBuffer->pkt->config.sample_rate;
if (m_format.m_dataFormat != AE_FMT_RAW)
{
m_currentBuffer->pkt->nb_samples += minFrames;
m_bufferedTime += (double)minFrames / m_currentBuffer->pkt->config.sample_rate;
}
else
{
if (m_format.m_streamInfo.m_type == CAEStreamInfo::STREAM_TYPE_TRUEHD)
{
m_currentBuffer->pkt->nb_samples += 2560;
uint8_t highByte = (minFrames >> 8) & 0xFF;
uint8_t lowByte = minFrames & 0xFF;
memcpy(m_currentBuffer->pkt->data[0]+m_currentBuffer->pkt->nb_samples-2, &highByte, 1);

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

I don't understand this highByte / lowByte derived from minFrames. The 2560 is the TRUEHD_FRAME_OFFSET in order to build up 24 frames.

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

Something else: Could you use = instead of the memcpy ? :-)

This comment has been minimized.

Copy link
@FernetMenta

FernetMenta Dec 3, 2015

Author Owner

you can't cast any adress to a multi byte variable like int. the only reliable way it to write bytes. i used memcopy to be consistent with the rest of this block.

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

Thx. That makes sense, yes!

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

I think I am now confused heavily. As we write single bytes - and as layout might be a problem depending on the type / architecture wouldn't then a simple memset also work? Or is the representation as unsigned char that memset wants again problematic?

or alternatively?

uint8_t high =  (minFrames >> 8) & 0xFF;
uint8_t  low = minFrames & 0xFF;
uint8_t * p;
p = (uint8_t *) m_currentBuffer->pkt->data[0]+m_currentBuffer->pkt->nb_samples-2;
*p = high;
p++; // we could do this one by ++ in one command
*p = low;

which would result in:

uint8_t * p;
p = (uint8_t *) m_currentBuffer->pkt->data[0]+m_currentBuffer->pkt->nb_samples-2;
*p = (uint8_t) ((minFrames >> 8) & 0xFF);
p++; // we could do this one by ++ in one command
*p = (uint8_t) (minFrames & 0xFF);

or even:

*((uint8_t *) m_currentBuffer->pkt->data[0]+m_currentBuffer->pkt->nb_samples-2) = (uint8_t) ((minFrames >> 8) & 0xFF);
*((uint8_t *) m_currentBuffer->pkt->data[0]+m_currentBuffer->pkt->nb_samples-1) = (uint8_t) (minFrames & 0xFF);

via @opdenkamp :-)

This comment has been minimized.

Copy link
@opdenkamp

opdenkamp via email Dec 3, 2015

This comment has been minimized.

Copy link
@MrMC

MrMC Dec 3, 2015

thx opdenkamp :) and, since I'm also a comment freak. some comments as to why even do this, would be nice for those that stumble along long after the original authors are gone.

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

I will sent a PR if @FernetMenta is okay with our investigation: fritsch@e091f4e

This comment has been minimized.

Copy link
@FernetMenta

FernetMenta Dec 3, 2015

Author Owner

what exactly is the issue here?

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

The issue is, that a memcpy is very expensive, if it's used for two times one byte. As the original issue was the memory to take care of the memory organization, the PR I sent you will fix both. Memory alignment, but also save a whole lot of cycles ...

Remember this is done 24 times for every TrueHD package.

This comment has been minimized.

Copy link
@FernetMenta

FernetMenta Dec 3, 2015

Author Owner

I am not aware that a memcpy would be expensive even if you copy just a single byte. What is the source of your information?

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

Here, have a look: http://sprunge.us/jiTQ

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

And here what the compiler can do: http://sprunge.us/RZHG

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

Code:

#include <string.h>
#include <stdio.h>
int main()
{

 char buffer[1024];
 char src = 5;
 for (int i = 0; i < sizeof(buffer); ++i)
 {
   buffer[i] = src;
 }
 return buffer[0];

}

Assembly:

GAS LISTING /tmp/ccR9oO8K.s             page 1


   1                    .file   "memcpy.c"
   2                    .section    .text.unlikely,"ax",@progbits
   3                .LCOLDB0:
   4                    .section    .text.startup,"ax",@progbits
   5                .LHOTB0:
   6                    .p2align 4,,15
   7                    .globl  main
   9                main:
  10                .LFB47:
  11                    .cfi_startproc
  12 0000 4881EC18      subq    $1048, %rsp
  12      040000
  13                    .cfi_def_cfa_offset 1056
  14 0007 488D9424      leaq    1024(%rsp), %rdx
  14      00040000 
  15 000f 64488B04      movq    %fs:40, %rax
  15      25280000 
  15      00
  16 0018 48898424      movq    %rax, 1032(%rsp)
  16      08040000 
  17 0020 31C0          xorl    %eax, %eax
  18 0022 4889E0        movq    %rsp, %rax
  19                    .p2align 4,,10
  20 0025 0F1F00        .p2align 3
  21                .L2:
  22 0028 C60005        movb    $5, (%rax)
  23 002b 4883C001      addq    $1, %rax
  24 002f 4839D0        cmpq    %rdx, %rax
  25 0032 75F4          jne .L2
  26 0034 488B8C24      movq    1032(%rsp), %rcx
  26      08040000 
  27 003c 6448330C      xorq    %fs:40, %rcx
  27      25280000 
  27      00
  28 0045 0FBE0424      movsbl  (%rsp), %eax
  29 0049 7508          jne .L7
  30 004b 4881C418      addq    $1048, %rsp
  30      040000
  31                    .cfi_remember_state
  32                    .cfi_def_cfa_offset 8
  33 0052 C3            ret
  34                .L7:
  35                    .cfi_restore_state
  36 0053 E8000000      call    __stack_chk_fail
  36      00
  37                    .cfi_endproc
  38                .LFE47:
  40                    .section    .text.unlikely
  41                .LCOLDE0:
  42                    .section    .text.startup
  43                .LHOTE0:
  44                    .ident  "GCC: (Ubuntu 5.2.1-22ubuntu2) 5.2.1 20151010"
  45                    .section    .note.GNU-stack,"",@progbits
GAS LISTING /tmp/ccR9oO8K.s            page 2


DEFINED SYMBOLS
                            *ABS*:0000000000000000 memcpy.c
     /tmp/ccR9oO8K.s:9      .text.startup:0000000000000000 main

UNDEFINED SYMBOLS
__stack_chk_fail

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

Code:

#include <string.h>
#include <stdio.h>
int main()
{

 char buffer[1024];
 char src = 5;

 for (int i = 0; i < sizeof(buffer); ++i)
 {
   memcpy(buffer + i, &src, 1);
 }
 return buffer[0];

}

Assembly:

GAS LISTING /tmp/cc2B5G9w.s             page 1


   1                    .file   "memcpy.c"
   2                    .section    .text.unlikely,"ax",@progbits
   3                .LCOLDB0:
   4                    .section    .text.startup,"ax",@progbits
   5                .LHOTB0:
   6                    .p2align 4,,15
   7                    .globl  main
   9                main:
  10                .LFB47:
  11                    .cfi_startproc
  12 0000 4881EC18      subq    $1048, %rsp
  12      040000
  13                    .cfi_def_cfa_offset 1056
  14 0007 488D9424      leaq    1024(%rsp), %rdx
  14      00040000 
  15 000f 64488B04      movq    %fs:40, %rax
  15      25280000 
  15      00
  16 0018 48898424      movq    %rax, 1032(%rsp)
  16      08040000 
  17 0020 31C0          xorl    %eax, %eax
  18 0022 4889E0        movq    %rsp, %rax
  19                    .p2align 4,,10
  20 0025 0F1F00        .p2align 3
  21                .L2:
  22 0028 C60005        movb    $5, (%rax)
  23 002b 4883C001      addq    $1, %rax
  24 002f 4839D0        cmpq    %rdx, %rax
  25 0032 75F4          jne .L2
  26 0034 488B8C24      movq    1032(%rsp), %rcx
  26      08040000 
  27 003c 6448330C      xorq    %fs:40, %rcx
  27      25280000 
  27      00
  28 0045 0FBE0424      movsbl  (%rsp), %eax
  29 0049 7508          jne .L7
  30 004b 4881C418      addq    $1048, %rsp
  30      040000
  31                    .cfi_remember_state
  32                    .cfi_def_cfa_offset 8
  33 0052 C3            ret
  34                .L7:
  35                    .cfi_restore_state
  36 0053 E8000000      call    __stack_chk_fail
  36      00
  37                    .cfi_endproc
  38                .LFE47:
  40                    .section    .text.unlikely
  41                .LCOLDE0:
  42                    .section    .text.startup
  43                .LHOTE0:
  44                    .ident  "GCC: (Ubuntu 5.2.1-22ubuntu2) 5.2.1 20151010"
  45                    .section    .note.GNU-stack,"",@progbits
GAS LISTING /tmp/cc2B5G9w.s            page 2


DEFINED SYMBOLS
                            *ABS*:0000000000000000 memcpy.c
     /tmp/cc2B5G9w.s:9      .text.startup:0000000000000000 main

UNDEFINED SYMBOLS
__stack_chk_fail

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

@MrMC @opdenkamp for me @FernetMenta is right - it optimized to the exact same code :-)

This comment has been minimized.

Copy link
@wsnipex

wsnipex Dec 3, 2015

tried this on ARM, when using -O2 there is no difference there either. compiler does its job

This comment has been minimized.

Copy link
@wsnipex

wsnipex Dec 3, 2015

even on O1 there is no diff

This comment has been minimized.

Copy link
@fritsch

fritsch Dec 3, 2015

I asked in ffmpeg channel and got:
21:16 @Nevcairiel any half-decent compiler would hopefully replace the memcpy with a simple assignment in such a case

So - yes we rely on the compiler

memcpy(m_currentBuffer->pkt->data[0]+m_currentBuffer->pkt->nb_samples-1, &lowByte, 1);
m_bufferedTime += m_format.m_streamInfo.GetDuration() / 1000 / 24;
if (m_currentBuffer->pkt->nb_samples / 2560 == 24)
rawPktComplete = true;
}
else
{
m_bufferedTime += m_format.m_streamInfo.GetDuration() / 1000;
m_currentBuffer->pkt->nb_samples += minFrames;
rawPktComplete = true;
}
}
}

if (m_currentBuffer->pkt->nb_samples == m_currentBuffer->pkt->max_nb_samples || m_format.m_dataFormat == AE_FMT_RAW)
if (m_currentBuffer->pkt->nb_samples == m_currentBuffer->pkt->max_nb_samples || rawPktComplete)
{
MsgStreamSample msgData;
msgData.buffer = m_currentBuffer;
Expand Down
1 change: 1 addition & 0 deletions xbmc/cores/AudioEngine/Utils/AEBitstreamPacker.cpp
Expand Up @@ -98,6 +98,7 @@ unsigned int CAEBitstreamPacker::GetSize()
uint8_t* CAEBitstreamPacker::GetBuffer()
{
m_dataSize = 0;
m_trueHDPos = 0;
return m_packedBuffer;
}

Expand Down

6 comments on commit adfa2b8

@MrMC
Copy link

@MrMC MrMC commented on adfa2b8 Dec 3, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, if you trust all version of compilers on all platforms from now to the future to do the right thing.

@fritsch
Copy link

@fritsch fritsch commented on adfa2b8 Dec 3, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that was the question. I found it really curious that it seems that gcc uses memcpy itself internally for a simple assignment.

I think there are way, way less optimized compilers for ARM / whatever versions - so that we should go with the simple assignment version that I Prd.

I will test with gcc 4.9 just for fun, too.

@FernetMenta
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't get me wrong. I am not against a direct assignment. just was curious.

@fritsch
Copy link

@fritsch fritsch commented on adfa2b8 Dec 3, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am now also curious :-)

@MrMC could you compile test on an ARM machine?

Perhaps this code here:

#include <string.h>
#include <stdio.h>
int main()
{

 char buffer[1024];
 char src = 5;

 for (int i = 0; i < 1023; ++i)
 {
   // Remove the following two for test 2
   //buffer[i] = src;
   //buffer[i+1] = buffer[i];
   // Comment these two for test 2
   memcpy(buffer + i, &src, 1);
   memcpy(buffer + i + 1, &src, 1);
 }
 return buffer[0];
}

to assembly?

@MrMC
Copy link

@MrMC MrMC commented on adfa2b8 Dec 3, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clang (darwin) arm64

test:
(__TEXT,__text) section
_main:
0000000100007ecc stp x28, x27, [sp, #-32]!
0000000100007ed0 stp x29, x30, [sp, #16]
0000000100007ed4 add x29, sp, #16
0000000100007ed8 sub sp, sp, xbmc#1088
0000000100007edc add x8, sp, #32
0000000100007ee0 movz w9, #0x5
0000000100007ee4 adrp x10, 1 ; 0x100007000
0000000100007ee8 ldr x10, [x10] ; literal pool symbol address: ___stack_chk_guard
0000000100007eec ldr x10, [x10]
0000000100007ef0 mov x0, x10
0000000100007ef4 str x10, [x8]
0000000100007ef8 str wzr, [sp, #52]
0000000100007efc strb w9, [sp, #51]
0000000100007f00 str wzr, [sp, #44]
0000000100007f04 str x8, [sp, #24]
0000000100007f08 str x0, [sp, #16]
0000000100007f0c ldr w8, [sp, #44]
0000000100007f10 cmp w8, xbmc#1023
0000000100007f14 b.ge 0x100007f4c
0000000100007f18 add x8, sp, #56
0000000100007f1c ldrsw x9, [sp, #44]
0000000100007f20 add x9, x8, x9
0000000100007f24 ldrb w10, [sp, #51]
0000000100007f28 strb w10, [x9]
0000000100007f2c ldrsw x9, [sp, #44]
0000000100007f30 add x8, x8, x9
0000000100007f34 ldrb w10, [sp, #51]
0000000100007f38 strb w10, [x8, #1]
0000000100007f3c ldr w8, [sp, #44]
0000000100007f40 add w8, w8, #1
0000000100007f44 str w8, [sp, #44]
0000000100007f48 b 0x100007f0c
0000000100007f4c adrp x8, 1 ; 0x100007000
0000000100007f50 ldr x8, [x8] ; literal pool symbol address: ___stack_chk_guard
0000000100007f54 ldrsb w0, [sp, #56]
0000000100007f58 ldr x8, [x8]
0000000100007f5c ldr x9, [sp, #24]
0000000100007f60 ldr x10, [x9]
0000000100007f64 cmp x8, x10
0000000100007f68 str w0, [sp, #12]
0000000100007f6c b.ne 0x100007f84
0000000100007f70 ldr w0, [sp, #12]
0000000100007f74 sub sp, x29, #16
0000000100007f78 ldp x29, x30, [sp, #16]
0000000100007f7c ldp x28, x27, [sp], #32
0000000100007f80 ret
0000000100007f84 bl 0x100007f88 ; symbol stub for: ___stack_chk_fail
S

@MrMC
Copy link

@MrMC MrMC commented on adfa2b8 Dec 3, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clang (darwin) armv7

(__TEXT,__text) section
_main:
0000bf50 b580 push {r7, lr}
0000bf52 466f mov r7, sp
0000bf54 f5ad6d82 sub.w sp, sp, #0x410
0000bf58 b081 sub sp, #0x4
0000bf5a 2000 movs r0, #0x0
0000bf5c 2105 movs r1, #0x5
0000bf5e f24002a2 movw r2, #0xa2
0000bf62 f2c00200 movt r2, #0x0
0000bf66 447a add r2, pc
0000bf68 6812 ldr r2, [r2]
0000bf6a 6812 ldr r2, [r2]
0000bf6c f8472c04 str r2, [r7, #-4]
0000bf70 9003 str r0, [sp, #0xc]
0000bf72 f88d100b strb.w r1, [sp, #0xb]
0000bf76 9001 str r0, [sp, #0x4]
0000bf78 f24030ff movw r0, #0x3ff
0000bf7c 9901 ldr r1, [sp, #0x4]
0000bf7e 4281 cmp r1, r0
0000bf80 da0e bge 0xbfa0
0000bf82 a804 add r0, sp, #0x10
0000bf84 9901 ldr r1, [sp, #0x4]
0000bf86 4401 add r1, r0
0000bf88 f89d200b ldrb.w r2, [sp, #0xb]
0000bf8c 700a strb r2, [r1]
0000bf8e 9901 ldr r1, [sp, #0x4]
0000bf90 4408 add r0, r1
0000bf92 f89d100b ldrb.w r1, [sp, #0xb]
0000bf96 7041 strb r1, [r0, #0x1]
0000bf98 9801 ldr r0, [sp, #0x4]
0000bf9a 3001 adds r0, #0x1
0000bf9c 9001 str r0, [sp, #0x4]
0000bf9e e7eb b 0xbf78
0000bfa0 f2400060 movw r0, #0x60
0000bfa4 f2c00000 movt r0, #0x0
0000bfa8 4478 add r0, pc
0000bfaa 6800 ldr r0, [r0]
0000bfac f99d1010 ldrsb.w r1, [sp, #0x10]
0000bfb0 6800 ldr r0, [r0]
0000bfb2 f8572c04 ldr r2, [r7, #-4]
0000bfb6 4290 cmp r0, r2
0000bfb8 9100 str r1, [sp]
0000bfba d104 bne 0xbfc6
0000bfbc 9800 ldr r0, [sp]
0000bfbe f50d6d82 add.w sp, sp, #0x410
0000bfc2 b001 add sp, #0x4
0000bfc4 bd80 pop {r7, pc}
0000bfc6 f000e81a blx 0xbffc

Please sign in to comment.