Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better SplitLongLinesInSubtitle #3370

Merged
merged 3 commits into from Feb 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
187 changes: 98 additions & 89 deletions libse/Forms/SplitLongLinesHelper.cs
Expand Up @@ -43,105 +43,114 @@ public static bool QualifiesForSplit(string text, int singleLineMaxCharacters, i
public static Subtitle SplitLongLinesInSubtitle(Subtitle subtitle, int totalLineMaxCharacters, int singleLineMaxCharacters)
{
var splittedSubtitle = new Subtitle(subtitle);
splittedSubtitle.Paragraphs.Clear();
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle);
for (int i = 0; i < subtitle.Paragraphs.Count; i++)

// calculate gaps
var halfMinGaps = Configuration.Settings.General.MinimumMillisecondsBetweenLines / 2.0;
var halfMinGapsMood = halfMinGaps + Configuration.Settings.General.MinimumMillisecondsBetweenLines % 2;

const int FirstLine = 0;
const int SecondLine = 1;

for (int i = splittedSubtitle.Paragraphs.Count - 1; i >= 0; i--)
{
bool added = false;
var p = subtitle.GetParagraphOrDefault(i);
if (p?.Text != null)
var oldParagraph = splittedSubtitle.Paragraphs[i];

// don't split into two paragraph if it can be balanced
var text = Utilities.AutoBreakLine(oldParagraph.Text, language);
if (!QualifiesForSplit(text, singleLineMaxCharacters, totalLineMaxCharacters))
{
if (QualifiesForSplit(p.Text, singleLineMaxCharacters, totalLineMaxCharacters))
{
var text = Utilities.AutoBreakLine(p.Text, language);
if (!QualifiesForSplit(text, singleLineMaxCharacters, totalLineMaxCharacters))
{
var newParagraph = new Paragraph(p) { Text = text };
splittedSubtitle.Paragraphs.Add(newParagraph);
added = true;
}
else
{
if (text.Contains(Environment.NewLine))
{
var arr = text.SplitToLines();
if (arr.Count == 2)
{
var minMsBtwnLnBy2 = Configuration.Settings.General.MinimumMillisecondsBetweenLines / 2;
int spacing1 = minMsBtwnLnBy2;
int spacing2 = minMsBtwnLnBy2;
if (Configuration.Settings.General.MinimumMillisecondsBetweenLines % 2 == 1)
{
spacing2++;
}

double duration = p.Duration.TotalMilliseconds / 2.0;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is really wrong, we can't assume both line with have same amount of characters... this problem is handle in this PR.

Correct one should be:
d1 = (td / at) * (l1 - nl)

  • d1: duration for paragraph one
  • td: total duration
  • at: all text
  • l1: length of firstline
  • nl: Environment.NewLine.Length

Cleaner and easy to read/understand

var newParagraph1 = new Paragraph(p);
var newParagraph2 = new Paragraph(p);
newParagraph1.Text = Utilities.AutoBreakLine(arr[0], language);
newParagraph1.EndTime.TotalMilliseconds = p.StartTime.TotalMilliseconds + duration - spacing1;
newParagraph2.Text = Utilities.AutoBreakLine(arr[1], language);
newParagraph2.StartTime.TotalMilliseconds = newParagraph1.EndTime.TotalMilliseconds + spacing2;

string p1 = HtmlUtil.RemoveHtmlTags(newParagraph1.Text);
var len = p1.Length - 1;
if (p1.Length > 0 && (p1[len] == '.' || p1[len] == '!' || p1[len] == '?' || p1[len] == ':' || p1[len] == ')' || p1[len] == ']' || p1[len] == '♪'))
{
if (newParagraph1.Text.StartsWith('-') && newParagraph2.Text.StartsWith('-'))
{
newParagraph1.Text = newParagraph1.Text.Remove(0, 1).Trim();
newParagraph2.Text = newParagraph2.Text.Remove(0, 1).Trim();
}
else if (newParagraph1.Text.StartsWith("<i>-", StringComparison.Ordinal) && newParagraph2.Text.StartsWith('-'))
{
newParagraph1.Text = newParagraph1.Text.Remove(3, 1).Trim();
if (newParagraph1.Text.StartsWith("<i> ", StringComparison.Ordinal))
{
newParagraph1.Text = newParagraph1.Text.Remove(3, 1).Trim();
}

newParagraph2.Text = newParagraph2.Text.Remove(0, 1).Trim();
}
}
else
{
if (newParagraph1.Text.EndsWith("</i>", StringComparison.Ordinal))
{
const string post = "</i>";
newParagraph1.Text = newParagraph1.Text.Remove(newParagraph1.Text.Length - post.Length);
}

if (newParagraph2.Text.StartsWith("<i>", StringComparison.Ordinal))
{
const string pre = "<i>";
newParagraph2.Text = newParagraph2.Text.Remove(0, pre.Length);
}
}

var indexOfItalicOpen1 = newParagraph1.Text.IndexOf("<i>", StringComparison.Ordinal);
if (indexOfItalicOpen1 >= 0 && indexOfItalicOpen1 < 10 && newParagraph1.Text.IndexOf("</i>", StringComparison.Ordinal) < 0 &&
newParagraph2.Text.Contains("</i>") && newParagraph2.Text.IndexOf("<i>", StringComparison.Ordinal) < 0)
{
newParagraph1.Text += "</i>";
newParagraph2.Text = "<i>" + newParagraph2.Text;
}

splittedSubtitle.Paragraphs.Add(newParagraph1);
splittedSubtitle.Paragraphs.Add(newParagraph2);
added = true;
}
}
}
}
oldParagraph.Text = text;
continue;
}

// continue if paragraph doesn't contain exactly two lines
var lines = text.SplitToLines();
if (lines.Count != 2)
{
continue; // ignore 3+ lines
}
if (!added)

// calculate milliseconds per char
double millisecondsPerChar = oldParagraph.Duration.TotalMilliseconds / (HtmlUtil.RemoveHtmlTags(text, true).Length - Environment.NewLine.Length);

oldParagraph.Text = lines[FirstLine];

// use optimal time to adjust duration
oldParagraph.EndTime.TotalMilliseconds = oldParagraph.StartTime.TotalMilliseconds + millisecondsPerChar * oldParagraph.Text.Length - halfMinGaps;

// build second paragraph
var newParagraph = new Paragraph(oldParagraph) { Text = lines[SecondLine] };
newParagraph.StartTime.TotalMilliseconds = oldParagraph.EndTime.TotalMilliseconds + halfMinGapsMood;
newParagraph.EndTime.TotalMilliseconds = newParagraph.StartTime.TotalMilliseconds + millisecondsPerChar * newParagraph.Text.Length;

// only remove dash (if dialog) if first line is fully closed
if (IsTextClosed(oldParagraph.Text))
{
splittedSubtitle.Paragraphs.Add(new Paragraph(p));
RemoveInvalidDash(oldParagraph, newParagraph);
}

// handle invalid tags
if (oldParagraph.Text.Contains('<'))
{
oldParagraph.Text = HtmlUtil.FixInvalidItalicTags(oldParagraph.Text);
}
if (newParagraph.Text.Contains('<'))
{
newParagraph.Text = HtmlUtil.FixInvalidItalicTags(newParagraph.Text);
}

oldParagraph.Text = Utilities.AutoBreakLine(oldParagraph.Text, language);
newParagraph.Text = Utilities.AutoBreakLine(newParagraph.Text, language);

// insert new paragraph after the current/old one
splittedSubtitle.Paragraphs.Insert(i + 1, newParagraph);
}

splittedSubtitle.Renumber();
return splittedSubtitle;
}

private static void RemoveInvalidDash(Paragraph p1, Paragraph p2)
{
// return if not dialog
if ((StartsWithDash(p1.Text) && StartsWithDash(p2.Text)) == false)
{
return;
}
const char Dash = '-';
// update first text
int dashIdx = p1.Text.IndexOf(Dash);
p1.Text = p1.Text.Substring(0, dashIdx) + p1.Text.Substring(dashIdx + 1).TrimStart();
// update second text
dashIdx = p2.Text.IndexOf(Dash);
p2.Text = p2.Text.Substring(0, dashIdx) + p2.Text.Substring(dashIdx + 1).TrimStart();
}

private static bool StartsWithDash(string text)
{
if (!text.LineStartsWithHtmlTag(true, true))
{
return text.StartsWith('-');
}
int closeIdx = text.IndexOf('>');
if (closeIdx + 1 == text.Length) // found in last position
{
return false;
}
return text[closeIdx + 1] == '-';
}

private static bool IsTextClosed(string text)
{
if (string.IsNullOrEmpty(text) || text.Length == 0)
{
return false;
}
string textNoTags = HtmlUtil.RemoveHtmlTags(text);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Handles if text ends with any of the supported html tags

char lastChar = textNoTags[textNoTags.Length - 1];
return lastChar == '.' || lastChar == '!' || lastChar == '?' || lastChar == ':' || lastChar == ')' || lastChar == ']' || lastChar == '♪';
}

}
}
91 changes: 91 additions & 0 deletions src/Test/Logic/SplitLongLinesHelperTest.cs
@@ -0,0 +1,91 @@
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Nikse.SubtitleEdit.Core;
using Nikse.SubtitleEdit.Core.Forms;
using System;

namespace Test.Logic
{
/// <summary>
/// Summary description for SplitLongLinesHelperTest
/// </summary>
[TestClass]
public class SplitLongLinesHelperTest
{
private int _maxLineLength;

private readonly Subtitle _subtitle;

public SplitLongLinesHelperTest()
{
_maxLineLength = Configuration.Settings.General.SubtitleLineMaximumLength;

_subtitle = new Subtitle()
{
Paragraphs =
{
new Paragraph { Text = "We have never been to Asia, nor have we visited Africa."},
new Paragraph { Text = "We have never\r\nbeen to Asia, nor\r\nhave we visited Africa."},
new Paragraph { Text = "- Foobar.\r\n- Foobar"},
new Paragraph { Text = "- Sometimes, all you need to do is completely make an ass?\r\n- Of yourself and laugh it off to realise that life isn’t so bad after all."},
new Paragraph { Text = "Sometimes, all you need to do is completely make an ass\r\nof yourself and laugh it off to realise that life isn’t so bad after all."},
}
};

// build timing
for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
{
var p = _subtitle.Paragraphs[i];
if (i > 0)
{
p.StartTime.TotalMilliseconds = _subtitle.Paragraphs[i - 1].EndTime.TotalMilliseconds +
Configuration.Settings.General.MinimumMillisecondsBetweenLines;
}
p.EndTime.TotalMilliseconds = p.StartTime.TotalMilliseconds + Utilities.GetOptimalDisplayMilliseconds(p.Text);
}
}

[TestMethod]
public void SplitLongLinesInSubtitleTest()
{
var procSubtitle = SplitLongLinesHelper.SplitLongLinesInSubtitle(_subtitle, _maxLineLength * 2, _maxLineLength);

Assert.AreEqual("We have never been to Asia,\r\nnor have we visited Africa.", procSubtitle.Paragraphs[0].Text);
Assert.AreEqual("We have never been to Asia,\r\nnor have we visited Africa.", procSubtitle.Paragraphs[1].Text);
Assert.AreEqual(_subtitle.Paragraphs[2].Text, procSubtitle.Paragraphs[2].Text);

Assert.AreNotEqual(_subtitle.Paragraphs.Count, procSubtitle.Paragraphs.Count);

// too long (dialog)
ivandrofly marked this conversation as resolved.
Show resolved Hide resolved
Assert.AreEqual(Utilities.AutoBreakLine("Sometimes, all you need to do is completely make an ass?", "en"), procSubtitle.Paragraphs[3].Text);
Assert.AreEqual(Utilities.AutoBreakLine("Of yourself and laugh it off to realise that life isn’t so bad after all.", "en"), procSubtitle.Paragraphs[4].Text);

// too long
Assert.AreEqual("Sometimes, all you need to do is\r\ncompletely make an ass of yourself", procSubtitle.Paragraphs[5].Text);
Assert.AreEqual("and laugh it off to realise that\r\nlife isn’t so bad after all.", procSubtitle.Paragraphs[6].Text);

// timing test
if (procSubtitle.Paragraphs[5].Duration.TotalMilliseconds > procSubtitle.Paragraphs[6].Duration.TotalMilliseconds)
{
Assert.IsTrue(procSubtitle.Paragraphs[5].Text.Length > procSubtitle.Paragraphs[6].Text.Length);
}
if (procSubtitle.Paragraphs[5].Duration.TotalMilliseconds < procSubtitle.Paragraphs[6].Duration.TotalMilliseconds)
{
Assert.IsTrue(procSubtitle.Paragraphs[5].Text.Length < procSubtitle.Paragraphs[6].Text.Length);
}
}

[TestMethod]
public void MillisecondsPerCharTest()
{
string text = Utilities.AutoBreakLine("The waves were crashing on the\r\nshore; it was a lovely sight.");
double optimalDuration = Utilities.GetOptimalDisplayMilliseconds(text);
double displayCharLen = (HtmlUtil.RemoveHtmlTags(text, true).Length - ((Utilities.GetNumberOfLines(text) - 1) * Environment.NewLine.Length));
double msPerChar = optimalDuration / displayCharLen;

const double tolerance = .0001;
double diff = Math.Abs(optimalDuration - (displayCharLen * msPerChar));
Assert.IsTrue(diff < tolerance);
}

}
}
ivandrofly marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions src/Test/Test.csproj
Expand Up @@ -62,6 +62,7 @@
<Compile Include="Logic\Dictionaries\OcrFixReplaceListTest.cs" />
<Compile Include="Logic\Mp4\Mp4Test.cs" />
<Compile Include="Logic\Ocr\MatchesToItalicStringConverterTest.cs" />
<Compile Include="Logic\SplitLongLinesHelperTest.cs" />
<Compile Include="Logic\TarFileTest.cs" />
<Compile Include="Logic\TransportStream\TransportStreamTest.cs" />
<Compile Include="Logic\ParagraphTest.cs" />
Expand Down