Skip to content

Commit

Permalink
Warn about ID3v2 text frame with multiple strings
Browse files Browse the repository at this point in the history
First step to support multiple strings within ID3v2 text
frame.

See
* #10
* Martchus/tageditor#38
  • Loading branch information
Martchus committed Jul 1, 2018
1 parent 262b823 commit 943123a
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 62 deletions.
199 changes: 138 additions & 61 deletions id3/id3v2frame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,24 @@ template <class stringtype> int parseGenreIndex(const stringtype &denotation)
return index;
}

/*!
* \brief Returns an std::string instance for the substring parsed using parseSubstring().
*/
string stringFromSubstring(tuple<const char *, size_t, const char *> substr)
{
return string(get<0>(substr), get<1>(substr));
}

/*!
* \brief Returns an std::u16string instance for the substring parsed using parseSubstring().
*/
u16string wideStringFromSubstring(tuple<const char *, size_t, const char *> substr, TagTextEncoding encoding)
{
u16string res(reinterpret_cast<u16string::const_pointer>(get<0>(substr)), get<1>(substr) / 2);
TagValue::ensureHostByteOrder(res, encoding);
return res;
}

/*!
* \brief Parses a frame from the stream read using the specified \a reader.
*
Expand Down Expand Up @@ -226,85 +244,148 @@ void Id3v2Frame::parse(BinaryReader &reader, uint32 version, uint32 maximalSize,
reader.read(buffer.get(), m_dataSize);
}

// -> get tag value depending of field type
// read tag value depending on frame ID/type
if (Id3v2FrameIds::isTextFrame(id())) {
// frame contains text
TagTextEncoding dataEncoding = parseTextEncodingByte(static_cast<byte>(*buffer.get()), diag); // the first byte stores the encoding
if ((version >= 3 && (id() == Id3v2FrameIds::lTrackPosition || id() == Id3v2FrameIds::lDiskPosition))
|| (version < 3 && (id() == Id3v2FrameIds::sTrackPosition || id() == Id3v2FrameIds::sDiskPosition))) {
// the track number or the disk number frame
try {
if (characterSize(dataEncoding) > 1) {
value().assignPosition(PositionInSet(parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag)));
} else {
value().assignPosition(PositionInSet(parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag)));
// parse text encoding byte
TagTextEncoding dataEncoding = parseTextEncodingByte(static_cast<byte>(*buffer.get()), diag);

// parse string values (since ID3v2.4 a text frame may contain multiple strings)
std::vector<TagValue> additionalValues;
const char *currentOffset = buffer.get() + 1;
for (size_t currentIndex = 1; currentIndex < m_dataSize;) {
// determine the next substring
const auto substr(parseSubstring(currentOffset, m_dataSize - currentIndex, dataEncoding, false, diag));

// handle case when string is empty
if (!get<1>(substr)) {
if (currentIndex == 1) {
value().clearDataAndMetadata();
}
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of track/disk position frame is not numeric and will be ignored.", context);
currentIndex = static_cast<size_t>(get<2>(substr) - buffer.get());
currentOffset = get<2>(substr);
continue;
}

} else if ((version >= 3 && id() == Id3v2FrameIds::lLength) || (version < 3 && id() == Id3v2FrameIds::sLength)) {
// frame contains length
try {
string milliseconds;
if (dataEncoding == TagTextEncoding::Utf16BigEndian || dataEncoding == TagTextEncoding::Utf16LittleEndian) {
const auto parsedStringRef = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
const auto convertedStringData = dataEncoding == TagTextEncoding::Utf16BigEndian
? convertUtf16BEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef))
: convertUtf16LEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef));
milliseconds = string(convertedStringData.first.get(), convertedStringData.second);
} else { // Latin-1 or UTF-8
milliseconds = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
// determine the TagValue instance to store the value
TagValue *const value = [&] {
if (this->value().isEmpty()) {
return &this->value();
}
additionalValues.emplace_back();
return &additionalValues.back();
}();

// apply further parsing for some text frame types (eg. convert track number to PositionInSet)
if ((version >= 3 && (id() == Id3v2FrameIds::lTrackPosition || id() == Id3v2FrameIds::lDiskPosition))
|| (version < 3 && (id() == Id3v2FrameIds::sTrackPosition || id() == Id3v2FrameIds::sDiskPosition))) {
// parse the track number or the disk number frame
try {
if (characterSize(dataEncoding) > 1) {
value->assignPosition(PositionInSet(wideStringFromSubstring(substr, dataEncoding)));
} else {
value->assignPosition(PositionInSet(stringFromSubstring(substr)));
}
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of track/disk position frame is not numeric and will be ignored.", context);
}
value().assignTimeSpan(TimeSpan::fromMilliseconds(stringToNumber<double>(milliseconds)));
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of the length frame is not numeric and will be ignored.", context);
}

} else if ((version >= 3 && id() == Id3v2FrameIds::lGenre) || (version < 3 && id() == Id3v2FrameIds::sGenre)) {
// genre/content type
int genreIndex;
if (characterSize(dataEncoding) > 1) {
const auto genreDenotation = parseWideString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
genreIndex = parseGenreIndex(genreDenotation);
} else if ((version >= 3 && id() == Id3v2FrameIds::lLength) || (version < 3 && id() == Id3v2FrameIds::sLength)) {
// parse frame contains length
try {
const auto milliseconds = [&] {
if (dataEncoding == TagTextEncoding::Utf16BigEndian || dataEncoding == TagTextEncoding::Utf16LittleEndian) {
const auto parsedStringRef = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
const auto convertedStringData = dataEncoding == TagTextEncoding::Utf16BigEndian
? convertUtf16BEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef))
: convertUtf16LEToUtf8(get<0>(parsedStringRef), get<1>(parsedStringRef));
return string(convertedStringData.first.get(), convertedStringData.second);
} else { // Latin-1 or UTF-8
return stringFromSubstring(substr);
}
}();
value->assignTimeSpan(TimeSpan::fromMilliseconds(stringToNumber<double>(milliseconds)));
} catch (const ConversionException &) {
diag.emplace_back(DiagLevel::Warning, "The value of the length frame is not numeric and will be ignored.", context);
}

} else if ((version >= 3 && id() == Id3v2FrameIds::lGenre) || (version < 3 && id() == Id3v2FrameIds::sGenre)) {
// parse genre/content type
const auto genreIndex = [&] {
if (characterSize(dataEncoding) > 1) {
return parseGenreIndex(wideStringFromSubstring(substr, dataEncoding));
} else {
return parseGenreIndex(stringFromSubstring(substr));
}
}();
if (genreIndex != -1) {
// genre is specified as ID3 genre number
value->assignStandardGenreIndex(genreIndex);
} else {
// genre is specified as string
value->assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
}
} else {
const auto genreDenotation = parseString(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
genreIndex = parseGenreIndex(genreDenotation);
// store any other text frames as-is
value->assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
}
if (genreIndex != -1) {
// genre is specified as ID3 genre number
value().assignStandardGenreIndex(genreIndex);

currentIndex = static_cast<size_t>(get<2>(substr) - buffer.get());
currentOffset = get<2>(substr);
}

// add warning about additional values
if (!additionalValues.empty()) {
// format list of values
const auto valuesString = [&]() -> string {
if (additionalValues.size() == 1) {
return argsToString("value \"", additionalValues.front().toString(TagTextEncoding::Utf8), "\" is ignored.");
}
string valuesString = "values";
for (auto value = additionalValues.cbegin(), end = additionalValues.cend() - 1; value != end; ++value) {
valuesString += ' ';
valuesString += '\"';
valuesString += value->toString(TagTextEncoding::Utf8);
valuesString += '\"';
if (value != end) {
valuesString += ',';
}
}
valuesString += " and \"";
valuesString += additionalValues.back().toString(TagTextEncoding::Utf8);
valuesString += "\" are ignored.";
return valuesString;
}();

// emplace diag message
if (version < 4) {
diag.emplace_back(
DiagLevel::Warning, argsToString("Multiple strings found though the tag is pre-ID3v2.4. Additional ", valuesString), context);
additionalValues.clear();
} else {
// genre is specified as string
// string might be null terminated
const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
diag.emplace_back(DiagLevel::Warning,
argsToString("Multiple strings found. This is not supported so far. Hence the additional ", valuesString), context);
}
} else {
// any other text frame
const auto substr = parseSubstring(buffer.get() + 1, m_dataSize - 1, dataEncoding, false, diag);
value().assignData(get<0>(substr), get<1>(substr), TagDataType::Text, dataEncoding);
}

} else if (version >= 3 && id() == Id3v2FrameIds::lCover) {
// frame stores picture
// parse picture frame
byte type;
parsePicture(buffer.get(), m_dataSize, value(), type, diag);
setTypeInfo(type);

} else if (version < 3 && id() == Id3v2FrameIds::sCover) {
// frame stores legacy picutre
// parse legacy picutre
byte type;
parseLegacyPicture(buffer.get(), m_dataSize, value(), type, diag);
setTypeInfo(type);

} else if (((version >= 3 && id() == Id3v2FrameIds::lComment) || (version < 3 && id() == Id3v2FrameIds::sComment))
|| ((version >= 3 && id() == Id3v2FrameIds::lUnsynchronizedLyrics) || (version < 3 && id() == Id3v2FrameIds::sUnsynchronizedLyrics))) {
// comment frame or unsynchronized lyrics frame (these two frame types have the same structure)
// parse comment frame or unsynchronized lyrics frame (these two frame types have the same structure)
parseComment(buffer.get(), m_dataSize, value(), diag);

} else {
// unknown frame
// parse unknown/unsupported frame
value().assignData(buffer.get(), m_dataSize, TagDataType::Undefined);
}
}
Expand Down Expand Up @@ -609,7 +690,7 @@ byte Id3v2Frame::makeTextEncodingByte(TagTextEncoding textEncoding)
}

/*!
* \brief Parses a substring in the specified \a buffer.
* \brief Parses a substring from the specified \a buffer.
*
* This method ensures that byte order marks and termination characters for the specified \a encoding are omitted.
* It might add a waring if the substring is not terminated.
Expand Down Expand Up @@ -691,29 +772,25 @@ tuple<const char *, size_t, const char *> Id3v2Frame::parseSubstring(
}

/*!
* \brief Parses a substring in the specified \a buffer.
* \brief Parses a substring from the specified \a buffer.
*
* Same as Id3v2Frame::parseSubstring() but returns the substring as string object.
*/
string Id3v2Frame::parseString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
{
const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag);
return string(get<0>(substr), get<1>(substr));
return stringFromSubstring(parseSubstring(buffer, dataSize, encoding, addWarnings, diag));
}

/*!
* \brief Parses a substring in the specified \a buffer.
* \brief Parses a substring from the specified \a buffer.
*
* Same as Id3v2Frame::parseSubstring() but returns the substring as u16string object
*
* \remarks Converts byte order to match host byte order (otherwise it wouldn't make much sense to use the resulting u16string).
*/
u16string Id3v2Frame::parseWideString(const char *buffer, size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag)
{
const auto substr = parseSubstring(buffer, dataSize, encoding, addWarnings, diag);
u16string res(reinterpret_cast<u16string::const_pointer>(get<0>(substr)), get<1>(substr) / 2);
TagValue::ensureHostByteOrder(res, encoding);
return res;
return wideStringFromSubstring(parseSubstring(buffer, dataSize, encoding, addWarnings, diag), encoding);
}

/*!
Expand Down
2 changes: 1 addition & 1 deletion id3/id3v2frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class TAG_PARSER_EXPORT Id3v2Frame : public TagField<Id3v2Frame> {

// parsing helper
TagTextEncoding parseTextEncodingByte(byte textEncodingByte, Diagnostics &diag);
std::tuple<const char *, size_t, const char *> parseSubstring(
std::tuple<const char *, std::size_t, const char *> parseSubstring(
const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag);
std::string parseString(const char *buffer, std::size_t maxSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag);
std::u16string parseWideString(const char *buffer, std::size_t dataSize, TagTextEncoding &encoding, bool addWarnings, Diagnostics &diag);
Expand Down
6 changes: 6 additions & 0 deletions scripts/download_testfiles.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ download_custom \
'MPlayer samples' \
'wget -r -np -R index.html* http://samples.mplayerhq.hu/A-codecs/lossless'

mkdir -p misc && pushd misc
download_custom \
'ID3v2.4 with multiple strings' \
'wget https://trac.ffmpeg.org/raw-attachment/ticket/6949/multiple_id3v2_4_values.mp3'
popd

# convert FLAC files for FLAC tests with ffmpeg
mkdir -p flac
[[ ! -f flac/test.flac ]] && ffmpeg -i mtx-test-data/alac/othertest-itunes.m4a -c:a flac flac/test.flac # raw FLAC stream
Expand Down
1 change: 1 addition & 0 deletions tests/overall.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class OverallTests : public TestFixture {
void checkMp4Constraints();

void checkMp3Testfile1();
void checkMp3Testfile2();
void checkMp3TestMetaData();
void checkMp3PaddingConstraints();

Expand Down
62 changes: 62 additions & 0 deletions tests/overallmp3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,67 @@ void OverallTests::checkMp3Testfile1()
CPPUNIT_ASSERT(m_diag.level() <= DiagLevel::Information);
}

/*!
* \brief Checks "misc/multiple_id3v2_4_values.mp3" (from https://trac.ffmpeg.org/ticket/6949).
*/
void OverallTests::checkMp3Testfile2()
{
CPPUNIT_ASSERT_EQUAL(ContainerFormat::MpegAudioFrames, m_fileInfo.containerFormat());
const auto tracks = m_fileInfo.tracks();
CPPUNIT_ASSERT_EQUAL(1_st, tracks.size());
for (const auto &track : tracks) {
CPPUNIT_ASSERT_EQUAL(MediaType::Audio, track->mediaType());
CPPUNIT_ASSERT_EQUAL(GeneralMediaFormat::Mpeg1Audio, track->format().general);
CPPUNIT_ASSERT_EQUAL(static_cast<unsigned char>(SubFormats::Mpeg1Layer3), track->format().sub);
CPPUNIT_ASSERT_EQUAL(static_cast<uint16>(2), track->channelCount());
CPPUNIT_ASSERT_EQUAL(static_cast<byte>(MpegChannelMode::Stereo), track->channelConfig());
CPPUNIT_ASSERT_EQUAL(44100u, track->samplingFrequency());
CPPUNIT_ASSERT_EQUAL(20, track->duration().seconds());
}
const auto tags = m_fileInfo.tags();
switch (m_tagStatus) {
case TagStatus::Original:
CPPUNIT_ASSERT(!m_fileInfo.id3v1Tag());
CPPUNIT_ASSERT_EQUAL(1_st, m_fileInfo.id3v2Tags().size());
CPPUNIT_ASSERT_EQUAL(1_st, tags.size());
for (const auto &tag : tags) {
switch (tag->type()) {
case TagType::Id3v1Tag:
CPPUNIT_FAIL("no ID3v1 tag expected");
case TagType::Id3v2Tag:
CPPUNIT_ASSERT_EQUAL(TagTextEncoding::Utf8, tag->value(KnownField::Title).dataEncoding());
CPPUNIT_ASSERT_EQUAL("Infinite (Original Mix)"s, tag->value(KnownField::Title).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("B-Front"s, tag->value(KnownField::Artist).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Infinite"s, tag->value(KnownField::Album).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Hardstyle"s, tag->value(KnownField::Genre).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Lavf57.83.100"s, tag->value(KnownField::EncoderSettings).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("Roughstate"s, tag->value(KnownField::RecordLabel).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL("2017"s, tag->value(KnownField::RecordDate).toString(TagTextEncoding::Utf8));
CPPUNIT_ASSERT_EQUAL(1, tag->value(KnownField::TrackPosition).toPositionInSet().position());
CPPUNIT_ASSERT(tag->value(KnownField::Length).toTimeSpan().isNull());
CPPUNIT_ASSERT(tag->value(KnownField::Lyricist).isEmpty());
break;
default:;
}
}
CPPUNIT_ASSERT_GREATEREQUAL(2_st, m_diag.size());
CPPUNIT_ASSERT_EQUAL(DiagLevel::Warning, m_diag[0].level());
CPPUNIT_ASSERT_EQUAL(DiagLevel::Warning, m_diag[1].level());
CPPUNIT_ASSERT_EQUAL("parsing TCON frame"s, m_diag[1].context());
CPPUNIT_ASSERT_EQUAL(
"Multiple strings found. This is not supported so far. Hence the additional values \"Test\", \"Example\", and \"Hard Dance\" are ignored."s,
m_diag[1].message());
break;
case TagStatus::TestMetaDataPresent:
checkMp3TestMetaData();
break;
case TagStatus::Removed:
CPPUNIT_ASSERT_EQUAL(0_st, tracks.size());
}

CPPUNIT_ASSERT(m_diag.level() <= DiagLevel::Warning);
}

/*!
* \brief Checks whether test meta data for MP3 files has been applied correctly.
*/
Expand Down Expand Up @@ -213,6 +274,7 @@ void OverallTests::testMp3Parsing()
m_fileInfo.setForceFullParse(false);
m_tagStatus = TagStatus::Original;
parseFile(TestUtilities::testFilePath("mtx-test-data/mp3/id3-tag-and-xing-header.mp3"), &OverallTests::checkMp3Testfile1);
parseFile(TestUtilities::testFilePath("misc/multiple_id3v2_4_values.mp3"), &OverallTests::checkMp3Testfile2);
}

#ifdef PLATFORM_UNIX
Expand Down

0 comments on commit 943123a

Please sign in to comment.