Skip to content
This repository has been archived by the owner on Feb 23, 2021. It is now read-only.

Commit

Permalink
149 stitch intro and body for articleservice (#151)
Browse files Browse the repository at this point in the history
Augment the Listen /articleservice API to handle stitching intro + article.
  • Loading branch information
tamarahills committed Aug 22, 2018
1 parent 305fc0f commit 55d5078
Show file tree
Hide file tree
Showing 8 changed files with 332 additions and 82 deletions.
21 changes: 20 additions & 1 deletion command/AudioFileHelper.js
Expand Up @@ -14,7 +14,18 @@ class CommandHelper {
summaryOnly ? 'summary' : 'full'
);

if (!await this.checkFileExistence(fileUrl)) {
if (!(await this.checkFileExistence(fileUrl))) {
fileUrl = '';
}

return fileUrl;
}

async getMobileFileLocation(articleId) {
// first check if we have this file in the DB
let fileUrl = await database.getMobileFileLocation(articleId);

if (!(await this.checkFileExistence(fileUrl))) {
fileUrl = '';
}

Expand Down Expand Up @@ -67,6 +78,14 @@ class CommandHelper {
async storeOutroLocation(articleId, outroLocation) {
return await database.storeOutroLocation(articleId, outroLocation);
}

/*
* Mobile file has the stitched intro and the body of the
* file.
*/
async storeMobileLocation(articleId, FileLocation) {
return await database.storeMobileLocation(articleId, FileLocation);
}
}

module.exports = CommandHelper;
92 changes: 71 additions & 21 deletions command/CommandController.js
Expand Up @@ -174,10 +174,11 @@ router.post('/articleservice', VerifyToken, async function(req, res) {
let audioUrl;
if (req.body.article_id) {
// we have a pocket item. do we already have the audio file?
audioUrl = await audioHelper.getAudioFileLocation(
audioUrl = await audioHelper.getMobileFileLocation(
req.body.article_id,
false
);
logger.info('audioUrl: ' + audioUrl);
} else {
logger.info('error: missing article_id');
}
Expand All @@ -186,21 +187,39 @@ router.post('/articleservice', VerifyToken, async function(req, res) {
// if we didn't find it in the DB, create the audio file
if (!audioUrl) {
logger.info('Did not find the audio URL in DB: ' + req.body.article_id);
audioUrl = await buildAudioFromUrl(req.body.url);

if (audioUrl) {
logger.info('built audio');
// Create the body as a local file.
let article = await getPocketArticleTextFromUrl(req.body.url);
if (article) {
// Build the stitched file first
let articleFile = await createAudioFileFromText(`${article.article}`);
let introFile = await createAudioFileFromText(buildIntro(article));
let audioUrl = await buildPocketAudio(introFile, articleFile);
logger.debug('Calling StoreMobileLocation: ' + audioUrl);
await audioHelper.storeMobileLocation(req.body.article_id, audioUrl);
result.url = audioUrl;
// Send it back to the mobile as quick as possible.
logger.info('POST article resp: ' + JSON.stringify(result));
res.status(200).send(JSON.stringify(result));

// Upload the individual parts for use by Alexa later & cleanup.
let introUrl = await polly_tts.postProcessPart(introFile);
let articleUrl = await polly_tts.postProcessPart(articleFile);
await audioHelper.storeIntroLocation(
req.body.article_id,
introUrl,
false
);
await audioHelper.storeAudioFileLocation(
req.body.article_id,
false,
audioUrl
articleUrl,
false
);
}
} else {
result.url = audioUrl;
logger.info('POST article resp: ' + JSON.stringify(result));
res.status(200).send(JSON.stringify(result));
}
result.url = audioUrl;

logger.info('POST article resp: ' + JSON.stringify(result));
res.status(200).send(JSON.stringify(result));
} catch (reason) {
logger.error('Error in /articleservice ' + reason);
const errSpeech = `There was an error processing the article. ${reason}`;
Expand Down Expand Up @@ -386,15 +405,7 @@ async function generateMetaAudio(data, summaryOnly) {
outro = metaAudio.outro_location;
} else {
logger.info('Generating outro for item:' + data.item_id);
articleOptions.formData = {
consumer_key: process.env.POCKET_KEY,
url: data.resolved_url,
images: '0',
videos: '0',
refresh: '0',
output: 'json'
};
const article = JSON.parse(await rp(articleOptions));
let article = await getPocketArticleTextFromUrl(data.resolved_url);
var dateOptions = { year: 'numeric', month: 'long', day: 'numeric' };
let publishedDate = new Date(article.timePublished * 1000);
let dateString =
Expand Down Expand Up @@ -764,6 +775,31 @@ async function searchAndPlayArticle(
}

async function buildAudioFromUrl(url) {
let article = await getPocketArticleTextFromUrl(url);
return buildAudioFromText(`${article.article}`);
}

function buildIntro(article) {
//Intro: “article title, published by host, on publish date"
let introFullText;
if (article.timePublished) {
var dateOptions = { year: 'numeric', month: 'long', day: 'numeric' };
let publishedDate = new Date(article.timePublished * 1000);
let dateString = publishedDate.toLocaleDateString('en-US', dateOptions);

introFullText = article.publisher
? `${article.title}, published by ${article.host}, on ${dateString}`
: `${article.title}, published on ${dateString}`;
} else {
// The case where date is not available.
introFullText = article.publisher
? `${article.title}, published by ${article.host}.`
: `${article.title}.`;
}
return introFullText;
}

async function getPocketArticleTextFromUrl(url) {
articleOptions.formData = {
consumer_key: process.env.POCKET_KEY,
url,
Expand All @@ -775,7 +811,17 @@ async function buildAudioFromUrl(url) {
logger.info('Getting article from pocket API: ' + url);
const article = JSON.parse(await rp(articleOptions));
logger.info('Returned article from pocket API: ' + article.title);
return buildAudioFromText(`${article.article}`);
return article;
}

async function createAudioFileFromText(
textString,
voiceType = process.env.POLLY_VOICE || 'Salli'
) {
const cleanText = texttools.cleanText(textString);
const chunkText = texttools.chunkText(cleanText);
logger.debug('chunkText is: ', chunkText.length, chunkText);
return polly_tts.synthesizeSpeechFile(chunkText, voiceType);
}

async function buildSummaryAudioFromUrl(url) {
Expand Down Expand Up @@ -804,6 +850,10 @@ async function buildAudioFromText(
return polly_tts.getSpeechSynthUrl(chunkText, voiceType);
}

async function buildPocketAudio(introFile, articleFile) {
return polly_tts.processPocketAudio(introFile, articleFile);
}

function findBestScoringTitle(searchPhrase, articleMetadataArray) {
return new Promise((resolve, reject) => {
natural.PorterStemmer.attach();
Expand Down
177 changes: 176 additions & 1 deletion command/polly_tts.js
Expand Up @@ -7,6 +7,17 @@ const logger = require('../logger');
const xcodeQueue = require('./xcodeQueue');

var polly_tts = {
/* Sends a chunk of text to be synthesized by Polly.
* text: Text to be synthesized (with ssml tags)
* filenameIndex: an index denoting this chunk of
* text's array index (for later stitching)
* audio_file: the name of the root of the file to
* attach the index to.
* voiceType:
*
* resolves: The name of the new synthesized local file.
* reject: error from Polly.
*/
getPollyChunk: function(text, filenameIndex, audio_file, voiceType) {
return new Promise(function(resolve, reject) {
let rate = process.env.PROSODY_RATE || 'medium';
Expand Down Expand Up @@ -56,6 +67,12 @@ var polly_tts = {
});
},

/* Stitches together an array of local audio
* files using ffmpeg.
*
* resolves: The name of the new stitches file.
* reject: error from ffmpeg
*/
concatAudio: function(parts, audio_file) {
return new Promise((resolve, reject) => {
let filename = './' + audio_file + '.mp3';
Expand All @@ -78,6 +95,123 @@ var polly_tts = {
});
},

/* This is special handling for the Pocket audio file.
* Synthesizes a speech file for an array of text
* chunks.
*
* resolves: The name of the new local audio file
*/
synthesizeSpeechFile(parts, voiceType) {
return new Promise(resolve => {
let audio_file = uuidgen.generate();
let promArray = [];
for (var i = 0; i < parts.length; i++) {
promArray.push(this.getPollyChunk(parts[i], i, audio_file, voiceType));
}

Promise.all(promArray)
.then(function(values) {
logger.debug('resolved the big promise array');
return polly_tts.concatAudio(values, audio_file);
})
.then(function(newAudioFile) {
resolve(newAudioFile);
});
});
},

/* This is special handling for the Pocket audio file.
* It stitches together the intro and outro for the clients.
*
* concat intro + body
* upload stitched file
* resolve stitched file
* ... then the rest can be done after the promise resolves
* fire xcode request to sqs
* handle db writes
* upload intro & body separately for Alexa.
*/
processPocketAudio(introFile, articleFile) {
return new Promise(resolve => {
polly_tts
.concatAudio([introFile, articleFile], uuidgen.generate())
.then(function(audio_file) {
return polly_tts.uploadFile(audio_file);
})
.then(function(audio_url) {
resolve(audio_url);
// Delete the local file now that it's uploaded.
let audio_file = audio_url.substr(audio_url.lastIndexOf('/') + 1);
polly_tts.deleteLocalFiles(audio_file, function(err) {
if (err) {
logger.error('Error removing files ' + err);
} else {
logger.debug('all files removed');
}
});
// Send the stitched file off for transcoding.
xcodeQueue.add(audio_file);
});
});
},

/*
* This uploads a synthesized file to the
* configured S3 bucket in the environment
* variable POLLY_S3_BUCKET.
*
* resolves: URL of the file
* reject: error
*/
uploadFile: function(newAudioFile) {
return new Promise((resolve, reject) => {
var s3 = new AWS.S3({
apiVersion: '2006-03-01'
});
var bucketParams = {
Bucket: process.env.POLLY_S3_BUCKET,
Key: '',
Body: ''
};

var fileStream = fs.createReadStream(newAudioFile);
fileStream.on('error', function(err) {
logger.error('File Error' + err);
reject('File error:' + err);
return;
});
bucketParams.Body = fileStream;
var path = require('path');
bucketParams.Key = path.basename(newAudioFile);

logger.debug('startupload: ' + Date.now());
s3.upload(bucketParams, function(err, data) {
if (err) {
logger.error('error uploading');
reject('error uploading:' + err);
} else {
logger.debug('Upload Success' + data.Location);
// Return the URL of the Mp3 in the S3 bucket.
resolve(data.Location);
}
});
});
},

/*
* This synthesizes the chunked up file
* and returns a URL of the mp3. Clients
* of this function are the Scout skill,
* mobile app. Not used by the pocket app.
*
* It also queues the final product for
* transcoding to opus format in the S3
* bucket at a later date. All temp files
* used to synthesize the file are deleted
*
* resolves: URL of the file
* reject: error
*/
getSpeechSynthUrl: function(parts, voiceType) {
return new Promise((resolve, reject) => {
let audio_file = uuidgen.generate();
Expand Down Expand Up @@ -137,8 +271,49 @@ var polly_tts = {
});
},

/*
* Takes a local audio file and:
* 1. Uploads to the S3 bucket
* 2. Queues it for transcoding to opus
* 3. Deletes the local file.
* Currently used by the Pocket app as a
* special handling for the case of stitching
* the intro/main article instead of returning
* separate parts.
*
* resolves: URL of the audio file in S3 Bucket.
* reject: error
*/
postProcessPart: function(audio_file) {
return new Promise(resolve => {
polly_tts.uploadFile(audio_file).then(function(audio_url) {
//Put the file in queue for transcoding.
logger.debug('audio_file is: ' + audio_file);
xcodeQueue.add(audio_file.replace(/^.*[\\/]/, ''));
resolve(audio_url);
polly_tts.deleteLocalFiles(audio_file, function(err) {
if (err) {
logger.error('Error removing files ' + err);
} else {
logger.debug('all files removed');
}
});
});
});
},

/*
* Takes a local mp3 file:
* 1. Changes file.mp3 to file*.*
* 2. Searches locally for file*.* files
* 3. Iterates through those files and
* deletes them
* Should only be called after everything has
* been uploaded.
*/
deleteLocalFiles: function(rootFile, callback) {
let files = glob.sync('./' + rootFile + '*.*');
logger.debug('Entering deleteLocalFiles: ' + rootFile);
let files = glob.sync(rootFile.replace('.mp3', '*.*'));
var i = files.length;
files.forEach(function(filepath) {
fs.unlink(filepath, function(err) {
Expand Down
2 changes: 2 additions & 0 deletions command/xcodeQueue.js
Expand Up @@ -11,13 +11,15 @@

var uuidgen = require('node-uuid-generator');
var AWS = require('aws-sdk');
const logger = require('../logger');
AWS.config.update({ region: process.env.AWS_REGION });

// Create an SQS service object
var sqs = new AWS.SQS({ apiVersion: '2012-11-05' });

const xcodeQueue = {
add: function(file) {
logger.debug('XCODE: filename: ' + file);
var jsonBody = {
filename: file,
targetCodec: 'opus 24'
Expand Down

0 comments on commit 55d5078

Please sign in to comment.