New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Better SplitLongLinesInSubtitle #3370
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,105 +43,114 @@ public static bool QualifiesForSplit(string text, int singleLineMaxCharacters, i | |
public static Subtitle SplitLongLinesInSubtitle(Subtitle subtitle, int totalLineMaxCharacters, int singleLineMaxCharacters) | ||
{ | ||
var splittedSubtitle = new Subtitle(subtitle); | ||
splittedSubtitle.Paragraphs.Clear(); | ||
string language = LanguageAutoDetect.AutoDetectGoogleLanguage(subtitle); | ||
for (int i = 0; i < subtitle.Paragraphs.Count; i++) | ||
|
||
// calculate gaps | ||
var halfMinGaps = Configuration.Settings.General.MinimumMillisecondsBetweenLines / 2.0; | ||
var halfMinGapsMood = halfMinGaps + Configuration.Settings.General.MinimumMillisecondsBetweenLines % 2; | ||
|
||
const int FirstLine = 0; | ||
const int SecondLine = 1; | ||
|
||
for (int i = splittedSubtitle.Paragraphs.Count - 1; i >= 0; i--) | ||
{ | ||
bool added = false; | ||
var p = subtitle.GetParagraphOrDefault(i); | ||
if (p?.Text != null) | ||
var oldParagraph = splittedSubtitle.Paragraphs[i]; | ||
|
||
// don't split into two paragraph if it can be balanced | ||
var text = Utilities.AutoBreakLine(oldParagraph.Text, language); | ||
if (!QualifiesForSplit(text, singleLineMaxCharacters, totalLineMaxCharacters)) | ||
{ | ||
if (QualifiesForSplit(p.Text, singleLineMaxCharacters, totalLineMaxCharacters)) | ||
{ | ||
var text = Utilities.AutoBreakLine(p.Text, language); | ||
if (!QualifiesForSplit(text, singleLineMaxCharacters, totalLineMaxCharacters)) | ||
{ | ||
var newParagraph = new Paragraph(p) { Text = text }; | ||
splittedSubtitle.Paragraphs.Add(newParagraph); | ||
added = true; | ||
} | ||
else | ||
{ | ||
if (text.Contains(Environment.NewLine)) | ||
{ | ||
var arr = text.SplitToLines(); | ||
if (arr.Count == 2) | ||
{ | ||
var minMsBtwnLnBy2 = Configuration.Settings.General.MinimumMillisecondsBetweenLines / 2; | ||
int spacing1 = minMsBtwnLnBy2; | ||
int spacing2 = minMsBtwnLnBy2; | ||
if (Configuration.Settings.General.MinimumMillisecondsBetweenLines % 2 == 1) | ||
{ | ||
spacing2++; | ||
} | ||
|
||
double duration = p.Duration.TotalMilliseconds / 2.0; | ||
var newParagraph1 = new Paragraph(p); | ||
var newParagraph2 = new Paragraph(p); | ||
newParagraph1.Text = Utilities.AutoBreakLine(arr[0], language); | ||
newParagraph1.EndTime.TotalMilliseconds = p.StartTime.TotalMilliseconds + duration - spacing1; | ||
newParagraph2.Text = Utilities.AutoBreakLine(arr[1], language); | ||
newParagraph2.StartTime.TotalMilliseconds = newParagraph1.EndTime.TotalMilliseconds + spacing2; | ||
|
||
string p1 = HtmlUtil.RemoveHtmlTags(newParagraph1.Text); | ||
var len = p1.Length - 1; | ||
if (p1.Length > 0 && (p1[len] == '.' || p1[len] == '!' || p1[len] == '?' || p1[len] == ':' || p1[len] == ')' || p1[len] == ']' || p1[len] == '♪')) | ||
{ | ||
if (newParagraph1.Text.StartsWith('-') && newParagraph2.Text.StartsWith('-')) | ||
{ | ||
newParagraph1.Text = newParagraph1.Text.Remove(0, 1).Trim(); | ||
newParagraph2.Text = newParagraph2.Text.Remove(0, 1).Trim(); | ||
} | ||
else if (newParagraph1.Text.StartsWith("<i>-", StringComparison.Ordinal) && newParagraph2.Text.StartsWith('-')) | ||
{ | ||
newParagraph1.Text = newParagraph1.Text.Remove(3, 1).Trim(); | ||
if (newParagraph1.Text.StartsWith("<i> ", StringComparison.Ordinal)) | ||
{ | ||
newParagraph1.Text = newParagraph1.Text.Remove(3, 1).Trim(); | ||
} | ||
|
||
newParagraph2.Text = newParagraph2.Text.Remove(0, 1).Trim(); | ||
} | ||
} | ||
else | ||
{ | ||
if (newParagraph1.Text.EndsWith("</i>", StringComparison.Ordinal)) | ||
{ | ||
const string post = "</i>"; | ||
newParagraph1.Text = newParagraph1.Text.Remove(newParagraph1.Text.Length - post.Length); | ||
} | ||
|
||
if (newParagraph2.Text.StartsWith("<i>", StringComparison.Ordinal)) | ||
{ | ||
const string pre = "<i>"; | ||
newParagraph2.Text = newParagraph2.Text.Remove(0, pre.Length); | ||
} | ||
} | ||
|
||
var indexOfItalicOpen1 = newParagraph1.Text.IndexOf("<i>", StringComparison.Ordinal); | ||
if (indexOfItalicOpen1 >= 0 && indexOfItalicOpen1 < 10 && newParagraph1.Text.IndexOf("</i>", StringComparison.Ordinal) < 0 && | ||
newParagraph2.Text.Contains("</i>") && newParagraph2.Text.IndexOf("<i>", StringComparison.Ordinal) < 0) | ||
{ | ||
newParagraph1.Text += "</i>"; | ||
newParagraph2.Text = "<i>" + newParagraph2.Text; | ||
} | ||
|
||
splittedSubtitle.Paragraphs.Add(newParagraph1); | ||
splittedSubtitle.Paragraphs.Add(newParagraph2); | ||
added = true; | ||
} | ||
} | ||
} | ||
} | ||
oldParagraph.Text = text; | ||
continue; | ||
} | ||
|
||
// continue if paragraph doesn't contain exactly two lines | ||
var lines = text.SplitToLines(); | ||
if (lines.Count != 2) | ||
{ | ||
continue; // ignore 3+ lines | ||
} | ||
if (!added) | ||
|
||
// calculate milliseconds per char | ||
double millisecondsPerChar = oldParagraph.Duration.TotalMilliseconds / (HtmlUtil.RemoveHtmlTags(text, true).Length - Environment.NewLine.Length); | ||
|
||
oldParagraph.Text = lines[FirstLine]; | ||
|
||
// use optimal time to adjust duration | ||
oldParagraph.EndTime.TotalMilliseconds = oldParagraph.StartTime.TotalMilliseconds + millisecondsPerChar * oldParagraph.Text.Length - halfMinGaps; | ||
|
||
// build second paragraph | ||
var newParagraph = new Paragraph(oldParagraph) { Text = lines[SecondLine] }; | ||
newParagraph.StartTime.TotalMilliseconds = oldParagraph.EndTime.TotalMilliseconds + halfMinGapsMood; | ||
newParagraph.EndTime.TotalMilliseconds = newParagraph.StartTime.TotalMilliseconds + millisecondsPerChar * newParagraph.Text.Length; | ||
|
||
// only remove dash (if dialog) if first line is fully closed | ||
if (IsTextClosed(oldParagraph.Text)) | ||
{ | ||
splittedSubtitle.Paragraphs.Add(new Paragraph(p)); | ||
RemoveInvalidDash(oldParagraph, newParagraph); | ||
} | ||
|
||
// handle invalid tags | ||
if (oldParagraph.Text.Contains('<')) | ||
{ | ||
oldParagraph.Text = HtmlUtil.FixInvalidItalicTags(oldParagraph.Text); | ||
} | ||
if (newParagraph.Text.Contains('<')) | ||
{ | ||
newParagraph.Text = HtmlUtil.FixInvalidItalicTags(newParagraph.Text); | ||
} | ||
|
||
oldParagraph.Text = Utilities.AutoBreakLine(oldParagraph.Text, language); | ||
newParagraph.Text = Utilities.AutoBreakLine(newParagraph.Text, language); | ||
|
||
// insert new paragraph after the current/old one | ||
splittedSubtitle.Paragraphs.Insert(i + 1, newParagraph); | ||
} | ||
|
||
splittedSubtitle.Renumber(); | ||
return splittedSubtitle; | ||
} | ||
|
||
private static void RemoveInvalidDash(Paragraph p1, Paragraph p2) | ||
{ | ||
// return if not dialog | ||
if ((StartsWithDash(p1.Text) && StartsWithDash(p2.Text)) == false) | ||
{ | ||
return; | ||
} | ||
const char Dash = '-'; | ||
// update first text | ||
int dashIdx = p1.Text.IndexOf(Dash); | ||
p1.Text = p1.Text.Substring(0, dashIdx) + p1.Text.Substring(dashIdx + 1).TrimStart(); | ||
// update second text | ||
dashIdx = p2.Text.IndexOf(Dash); | ||
p2.Text = p2.Text.Substring(0, dashIdx) + p2.Text.Substring(dashIdx + 1).TrimStart(); | ||
} | ||
|
||
private static bool StartsWithDash(string text) | ||
{ | ||
if (!text.LineStartsWithHtmlTag(true, true)) | ||
{ | ||
return text.StartsWith('-'); | ||
} | ||
int closeIdx = text.IndexOf('>'); | ||
if (closeIdx + 1 == text.Length) // found in last position | ||
{ | ||
return false; | ||
} | ||
return text[closeIdx + 1] == '-'; | ||
} | ||
|
||
private static bool IsTextClosed(string text) | ||
{ | ||
if (string.IsNullOrEmpty(text) || text.Length == 0) | ||
{ | ||
return false; | ||
} | ||
string textNoTags = HtmlUtil.RemoveHtmlTags(text); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handles if text ends with any of the supported html tags |
||
char lastChar = textNoTags[textNoTags.Length - 1]; | ||
return lastChar == '.' || lastChar == '!' || lastChar == '?' || lastChar == ':' || lastChar == ')' || lastChar == ']' || lastChar == '♪'; | ||
} | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
using Microsoft.VisualStudio.TestTools.UnitTesting; | ||
using Nikse.SubtitleEdit.Core; | ||
using Nikse.SubtitleEdit.Core.Forms; | ||
using System; | ||
|
||
namespace Test.Logic | ||
{ | ||
/// <summary> | ||
/// Summary description for SplitLongLinesHelperTest | ||
/// </summary> | ||
[TestClass] | ||
public class SplitLongLinesHelperTest | ||
{ | ||
private int _maxLineLength; | ||
|
||
private readonly Subtitle _subtitle; | ||
|
||
public SplitLongLinesHelperTest() | ||
{ | ||
_maxLineLength = Configuration.Settings.General.SubtitleLineMaximumLength; | ||
|
||
_subtitle = new Subtitle() | ||
{ | ||
Paragraphs = | ||
{ | ||
new Paragraph { Text = "We have never been to Asia, nor have we visited Africa."}, | ||
new Paragraph { Text = "We have never\r\nbeen to Asia, nor\r\nhave we visited Africa."}, | ||
new Paragraph { Text = "- Foobar.\r\n- Foobar"}, | ||
new Paragraph { Text = "- Sometimes, all you need to do is completely make an ass?\r\n- Of yourself and laugh it off to realise that life isn’t so bad after all."}, | ||
new Paragraph { Text = "Sometimes, all you need to do is completely make an ass\r\nof yourself and laugh it off to realise that life isn’t so bad after all."}, | ||
} | ||
}; | ||
|
||
// build timing | ||
for (int i = 0; i < _subtitle.Paragraphs.Count; i++) | ||
{ | ||
var p = _subtitle.Paragraphs[i]; | ||
if (i > 0) | ||
{ | ||
p.StartTime.TotalMilliseconds = _subtitle.Paragraphs[i - 1].EndTime.TotalMilliseconds + | ||
Configuration.Settings.General.MinimumMillisecondsBetweenLines; | ||
} | ||
p.EndTime.TotalMilliseconds = p.StartTime.TotalMilliseconds + Utilities.GetOptimalDisplayMilliseconds(p.Text); | ||
} | ||
} | ||
|
||
[TestMethod] | ||
public void SplitLongLinesInSubtitleTest() | ||
{ | ||
var procSubtitle = SplitLongLinesHelper.SplitLongLinesInSubtitle(_subtitle, _maxLineLength * 2, _maxLineLength); | ||
|
||
Assert.AreEqual("We have never been to Asia,\r\nnor have we visited Africa.", procSubtitle.Paragraphs[0].Text); | ||
Assert.AreEqual("We have never been to Asia,\r\nnor have we visited Africa.", procSubtitle.Paragraphs[1].Text); | ||
Assert.AreEqual(_subtitle.Paragraphs[2].Text, procSubtitle.Paragraphs[2].Text); | ||
|
||
Assert.AreNotEqual(_subtitle.Paragraphs.Count, procSubtitle.Paragraphs.Count); | ||
|
||
// too long (dialog) | ||
ivandrofly marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Assert.AreEqual(Utilities.AutoBreakLine("Sometimes, all you need to do is completely make an ass?", "en"), procSubtitle.Paragraphs[3].Text); | ||
Assert.AreEqual(Utilities.AutoBreakLine("Of yourself and laugh it off to realise that life isn’t so bad after all.", "en"), procSubtitle.Paragraphs[4].Text); | ||
|
||
// too long | ||
Assert.AreEqual("Sometimes, all you need to do is\r\ncompletely make an ass of yourself", procSubtitle.Paragraphs[5].Text); | ||
Assert.AreEqual("and laugh it off to realise that\r\nlife isn’t so bad after all.", procSubtitle.Paragraphs[6].Text); | ||
|
||
// timing test | ||
if (procSubtitle.Paragraphs[5].Duration.TotalMilliseconds > procSubtitle.Paragraphs[6].Duration.TotalMilliseconds) | ||
{ | ||
Assert.IsTrue(procSubtitle.Paragraphs[5].Text.Length > procSubtitle.Paragraphs[6].Text.Length); | ||
} | ||
if (procSubtitle.Paragraphs[5].Duration.TotalMilliseconds < procSubtitle.Paragraphs[6].Duration.TotalMilliseconds) | ||
{ | ||
Assert.IsTrue(procSubtitle.Paragraphs[5].Text.Length < procSubtitle.Paragraphs[6].Text.Length); | ||
} | ||
} | ||
|
||
[TestMethod] | ||
public void MillisecondsPerCharTest() | ||
{ | ||
string text = Utilities.AutoBreakLine("The waves were crashing on the\r\nshore; it was a lovely sight."); | ||
double optimalDuration = Utilities.GetOptimalDisplayMilliseconds(text); | ||
double displayCharLen = (HtmlUtil.RemoveHtmlTags(text, true).Length - ((Utilities.GetNumberOfLines(text) - 1) * Environment.NewLine.Length)); | ||
double msPerChar = optimalDuration / displayCharLen; | ||
|
||
const double tolerance = .0001; | ||
double diff = Math.Abs(optimalDuration - (displayCharLen * msPerChar)); | ||
Assert.IsTrue(diff < tolerance); | ||
} | ||
|
||
} | ||
} | ||
ivandrofly marked this conversation as resolved.
Show resolved
Hide resolved
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is really wrong, we can't assume both line with have same amount of characters... this problem is handle in this PR.
Correct one should be:
d1 = (td / at) * (l1 - nl)
Cleaner and easy to read/understand