Skip to content

Commit

Permalink
src/mumble/AudioInput: Fix echo cancellation, added options to dump i…
Browse files Browse the repository at this point in the history
…nput audio data and echo canceller queue state

(Backported from mumble-voip#4167 and adapted to work with 1.3.x)
  • Loading branch information
fedetft authored and Krzmbrzl committed May 26, 2020
1 parent 8b4692a commit f3017c4
Show file tree
Hide file tree
Showing 5 changed files with 264 additions and 88 deletions.
220 changes: 146 additions & 74 deletions src/mumble/AudioInput.cpp
Expand Up @@ -26,6 +26,96 @@ extern "C" {
}
#endif

void Resynchronizer::addMic(short *mic)
{
bool drop = false;
{
std::unique_lock<std::mutex> l(m);
micQueue.push_back(mic);
switch (state) {
case S0: state = S1a; break;
case S1a: state = S2; break;
case S1b: state = S2; break;
case S2: state = S3; break;
case S3: state = S4a; break;
case S4a: state = S5; break;
case S4b: drop = true; break;
case S5: drop = true; break;
}
if (drop) {
delete[] micQueue.front();
micQueue.pop_front();
}
}
if (bDebugPrintQueue) {
if (drop) qWarning("Resynchronizer::addMic(): dropped microphone chunk due to overflow");
printQueue('+');
}
}

AudioChunk Resynchronizer::addSpeaker(short *speaker)
{
AudioChunk result;
bool drop = false;
{
std::unique_lock<std::mutex> l(m);
switch (state) {
case S0: drop = true; break;
case S1a: drop = true; break;
case S1b: state = S0; break;
case S2: state = S1b; break;
case S3: state = S2; break;
case S4a: state = S3; break;
case S4b: state = S3; break;
case S5: state = S4b; break;
}
if (drop == false) {
result = AudioChunk(micQueue.front(), speaker);
micQueue.pop_front();
}
}
if (drop) delete[] speaker;
if (bDebugPrintQueue) {
if (drop) qWarning("Resynchronizer::addSpeaker(): dropped speaker chunk due to underflow");
printQueue('-');
}
return result;
}

void Resynchronizer::reset()
{
if (bDebugPrintQueue) qWarning("Resetting echo queue");
std::unique_lock<std::mutex> l(m);
state = S0;
while (!micQueue.empty()) {
delete[] micQueue.front();
micQueue.pop_front();
}
}

Resynchronizer::~Resynchronizer()
{
reset();
}

void Resynchronizer::printQueue(char who)
{
unsigned int mic;
{
std::unique_lock<std::mutex> l(m);
mic = static_cast<unsigned int>(micQueue.size());
}
std::string line;
line.reserve(32);
line += who;
line += " Echo queue [";
for(unsigned int i = 0; i < 5; i++) line += i < mic ? '#' : ' ';
line += "]\r";
// This relies on \r to retrace always on the same line, can't use qWarining
printf("%s",line.c_str());
fflush(stdout);
}

// Remember that we cannot use static member classes that are not pointers, as the constructor
// for AudioInputRegistrar() might be called before they are initialized, as the constructor
// is called from global initialization.
Expand Down Expand Up @@ -75,7 +165,15 @@ bool AudioInputRegistrar::canExclusive() const {
}

AudioInput::AudioInput() : opusBuffer(g.s.iFramesPerPacket * (SAMPLE_RATE / 100)) {
adjustBandwidth(g.iMaxBandwidth, iAudioQuality, iAudioFrames);
bDebugDumpInput = g.bDebugDumpInput;
resync.bDebugPrintQueue = g.bDebugPrintQueue;
if (bDebugDumpInput) {
outMic.open("raw_microphone_dump", std::ios::binary);
outSpeaker.open("speaker_dump", std::ios::binary);
outProcessed.open("processed_microphone_dump", std::ios::binary);
}

adjustBandwidth(g.iMaxBandwidth, iAudioQuality, iAudioFrames, bAllowLowDelay);

g.iAudioBandwidth = getNetworkBandwidth(iAudioQuality, iAudioFrames);

Expand All @@ -87,9 +185,6 @@ AudioInput::AudioInput() : opusBuffer(g.s.iFramesPerPacket * (SAMPLE_RATE / 100)
cCodec = NULL;
ceEncoder = NULL;

iSampleRate = SAMPLE_RATE;
iFrameSize = SAMPLE_RATE / 100;

#ifdef USE_OPUS
oCodec = g.oCodec;
if (oCodec) {
Expand Down Expand Up @@ -124,13 +219,6 @@ AudioInput::AudioInput() : opusBuffer(g.s.iFramesPerPacket * (SAMPLE_RATE / 100)
sppPreprocess = NULL;
sesEcho = NULL;
srsMic = srsEcho = NULL;
iJitterSeq = 0;
iMinBuffered = 1000;

psMic = new short[iFrameSize];
psClean = new short[iFrameSize];

psSpeaker = NULL;

iEchoChannels = iMicChannels = 0;
iEchoFilled = iMicFilled = 0;
Expand All @@ -141,7 +229,7 @@ AudioInput::AudioInput() : opusBuffer(g.s.iFramesPerPacket * (SAMPLE_RATE / 100)

bResetEncoder = true;

pfMicInput = pfEchoInput = pfOutput = NULL;
pfMicInput = pfEchoInput = NULL;

iBitrate = 0;
dPeakSignal = dPeakSpeaker = dPeakMic = dPeakCleanMic = 0.0;
Expand Down Expand Up @@ -176,9 +264,6 @@ AudioInput::~AudioInput() {
cCodec->celt_encoder_destroy(ceEncoder);
}

foreach(short *buf, qlEchoFrames)
delete [] buf;

if (sppPreprocess)
speex_preprocess_state_destroy(sppPreprocess);
if (sesEcho)
Expand All @@ -189,13 +274,8 @@ AudioInput::~AudioInput() {
if (srsEcho)
speex_resampler_destroy(srsEcho);

delete [] psMic;
delete [] psClean;
delete [] psSpeaker;

delete [] pfMicInput;
delete [] pfEchoInput;
delete [] pfOutput;
}

bool AudioInput::isTransmitting() const {
Expand Down Expand Up @@ -381,15 +461,13 @@ void AudioInput::initializeMixer() {
speex_resampler_destroy(srsEcho);
delete [] pfMicInput;
delete [] pfEchoInput;
delete [] pfOutput;

if (iMicFreq != iSampleRate)
srsMic = speex_resampler_init(1, iMicFreq, iSampleRate, 3, &err);

iMicLength = (iFrameSize * iMicFreq) / iSampleRate;

pfMicInput = new float[iMicLength];
pfOutput = new float[iFrameSize * qMax(1U,iEchoChannels)];

if (iEchoChannels > 0) {
bEchoMulti = g.s.bEchoMulti;
Expand Down Expand Up @@ -447,6 +525,7 @@ void AudioInput::addMic(const void *data, unsigned int nsamp) {
iMicFilled = 0;

// If needed resample frame
float *pfOutput = srsMic ? (float*)alloca(iFrameSize*sizeof(float)) : nullptr;
float *ptr = srsMic ? pfOutput : pfMicInput;

if (srsMic) {
Expand All @@ -455,43 +534,21 @@ void AudioInput::addMic(const void *data, unsigned int nsamp) {
speex_resampler_process_float(srsMic, 0, pfMicInput, &inlen, pfOutput, &outlen);
}

// If echo cancellation is enabled the pointer ends up in the resynchronizer queue
// and may need to outlive this function's frame
short *psMic = iEchoChannels > 0 ? new short[iFrameSize] : (short*)alloca(iFrameSize*sizeof(short));

// Convert float to 16bit PCM
const float mul = 32768.f;
for (int j = 0; j < iFrameSize; ++j)
psMic[j] = static_cast<short>(qBound(-32768.f, (ptr[j] * mul), 32767.f));

// If we have echo chancellation enabled...
// If we have echo cancellation enabled...
if (iEchoChannels > 0) {
short *echo = NULL;

{
QMutexLocker l(&qmEcho);

if (qlEchoFrames.isEmpty()) {
iJitterSeq = 0;
iMinBuffered = 1000;
} else {
// Compensate for drift between the microphone and the echo source
iMinBuffered = qMin(iMinBuffered, qlEchoFrames.count());

if ((iJitterSeq > 100) && (iMinBuffered > 1)) {
iJitterSeq = 0;
iMinBuffered = 1000;
delete [] qlEchoFrames.takeFirst();
}
echo = qlEchoFrames.takeFirst();
}
}

if (echo) {
// We have echo data for the current frame, remember that
delete [] psSpeaker;
psSpeaker = echo;
}
resync.addMic(psMic);
} else {
encodeAudioFrame(AudioChunk(psMic));
}

// Encode and send frame
encodeAudioFrame();
}
}
}
Expand All @@ -506,12 +563,12 @@ void AudioInput::addEcho(const void *data, unsigned int nsamp) {

if (eEchoFormat == SampleFloat) {
for (unsigned int i=0;i<samples;++i)
pfEchoInput[i] = reinterpret_cast<const float *>(data)[i];
pfEchoInput[i + iEchoFilled * iEchoChannels] = reinterpret_cast<const float *>(data)[i];
}
else {
// 16bit PCM -> float
for (unsigned int i=0;i<samples;++i)
pfEchoInput[i] = static_cast<float>(reinterpret_cast<const short *>(data)[i]) * (1.0f / 32768.f);
pfEchoInput[i + iEchoFilled * iEchoChannels] = static_cast<float>(reinterpret_cast<const short *>(data)[i]) * (1.0f / 32768.f);
}
} else {
// Mix echo channels (converts 16bit PCM -> float if needed)
Expand All @@ -535,6 +592,7 @@ void AudioInput::addEcho(const void *data, unsigned int nsamp) {
iEchoFilled = 0;

// Resample if necessary
float *pfOutput = srsEcho ? (float*)alloca(iEchoFrameSize*sizeof(float)) : nullptr;
float *ptr = srsEcho ? pfOutput : pfEchoInput;

if (srsEcho) {
Expand All @@ -547,14 +605,16 @@ void AudioInput::addEcho(const void *data, unsigned int nsamp) {

// float -> 16bit PCM
const float mul = 32768.f;
for (unsigned int j=0;j<iEchoFrameSize;++j)
outbuff[j] = static_cast<short>(ptr[j] * mul);

// Push frame into the echo chancellers jitter buffer
QMutexLocker l(&qmEcho);
for (int j = 0; j < iEchoFrameSize; ++j) {
outbuff[j] = static_cast<short>(qBound(-32768.f, (ptr[j] * mul), 32767.f));
}

iJitterSeq = qMin(iJitterSeq + 1,10000U);
qlEchoFrames.append(outbuff);
auto chunk = resync.addSpeaker(outbuff);
if (!chunk.empty()) {
encodeAudioFrame(chunk);
delete[] chunk.mic;
delete[] chunk.speaker;
}
}
}
}
Expand Down Expand Up @@ -631,6 +691,7 @@ void AudioInput::resetAudioProcessor() {
speex_preprocess_state_destroy(sppPreprocess);
if (sesEcho)
speex_echo_state_destroy(sesEcho);
resync.reset();

sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate);

Expand All @@ -654,7 +715,8 @@ void AudioInput::resetAudioProcessor() {
speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg);

if (iEchoChannels > 0) {
sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize * 10, 1, bEchoMulti ? iEchoChannels : 1);
int filterSize = iFrameSize * (10 + resync.getNominalLag());
sesEcho = speex_echo_state_init_mc(iFrameSize, filterSize, 1, bEchoMulti ? iEchoChannels : 1);
iArg = iSampleRate;
speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg);
speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho);
Expand Down Expand Up @@ -784,7 +846,7 @@ int AudioInput::encodeCELTFrame(short *psSource, EncodingOutputBuffer& buffer) {
return len;
}

void AudioInput::encodeAudioFrame() {
void AudioInput::encodeAudioFrame(AudioChunk chunk) {
int iArg;
int i;
float sum;
Expand All @@ -807,16 +869,17 @@ void AudioInput::encodeAudioFrame() {
sum=1.0f;
max = 1;
for (i=0;i<iFrameSize;i++) {
sum += static_cast<float>(psMic[i] * psMic[i]);
max = std::max(static_cast<short>(abs(psMic[i])), max);
sum += static_cast<float>(chunk.mic[i] * chunk.mic[i]);
max = std::max(static_cast<short>(abs(chunk.mic[i])), max);
}
dPeakMic = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f);
dMaxMic = max;

if (psSpeaker && (iEchoChannels > 0)) {
if (chunk.speaker && (iEchoChannels > 0)) {
sum=1.0f;
for (i=0;i<iFrameSize;i++)
sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]);
for (i = 0; i < iEchoFrameSize; ++i) {
sum += static_cast<float>(chunk.speaker[i] * chunk.speaker[i]);
}
dPeakSpeaker = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f);
} else {
dPeakSpeaker = 0.0;
Expand All @@ -830,13 +893,13 @@ void AudioInput::encodeAudioFrame() {
if (g.s.bDenoise && denoiseState && (iFrameSize == 480)) {
float denoiseFrames[480];
for (int i = 0; i < 480; i++) {
denoiseFrames[i] = psMic[i];
denoiseFrames[i] = chunk.mic[i];
}

rnnoise_process_frame(denoiseState, denoiseFrames, denoiseFrames);

for (int i = 0; i < 480; i++) {
psMic[i] = denoiseFrames[i];
chunk.mic[i] = denoiseFrames[i];
}
}
#endif
Expand All @@ -846,20 +909,29 @@ void AudioInput::encodeAudioFrame() {
iArg = g.s.iNoiseSuppress - iArg;
speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg);

if (sesEcho && psSpeaker) {
speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean);
short psClean[iFrameSize];
if (sesEcho && chunk.speaker) {
speex_echo_cancellation(sesEcho, chunk.mic, chunk.speaker, psClean);
speex_preprocess_run(sppPreprocess, psClean);
psSource = psClean;
} else {
speex_preprocess_run(sppPreprocess, psMic);
psSource = psMic;
speex_preprocess_run(sppPreprocess, chunk.mic);
psSource = chunk.mic;
}

sum=1.0f;
for (i=0;i<iFrameSize;i++)
sum += static_cast<float>(psSource[i] * psSource[i]);
float micLevel = sqrtf(sum / static_cast<float>(iFrameSize));
dPeakSignal = qMax(20.0f*log10f(micLevel / 32768.0f), -96.0f);

if (bDebugDumpInput) {
outMic.write(reinterpret_cast<const char *>(chunk.mic), iFrameSize * sizeof(short));
if (chunk.speaker) {
outSpeaker.write(reinterpret_cast<const char *>(chunk.speaker), iEchoFrameSize * sizeof(short));
}
outProcessed.write(reinterpret_cast<const char *>(psSource), iFrameSize * sizeof(short));
}

spx_int32_t prob = 0;
speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob);
Expand Down

0 comments on commit f3017c4

Please sign in to comment.