From f7a472153f70bf20cc69abc3f9ba7dd8555a74bd Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Sat, 22 Jul 2023 10:24:18 -0400 Subject: [PATCH 01/11] Changes pruning algorithm for audio visualizer. - Selects paragraphs in the visible portion of the visualizer first to ensure you can see the current timeline. - Select paragraphs across the entire timeline before selecting multiple paragraphs that start at the same time. - After visible paragraphs are pruned, add some invisible ones to assist with scrolling (and prune again to limit rendered paragraphs). https://github.com/SubtitleEdit/subtitleedit/issues/7000 --- src/ui/Controls/AudioVisualizer.cs | 70 +++++++++++++++++------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index daa8862785..22042bc779 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -431,40 +431,52 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie const double additionalSeconds = 15.0; // Helps when scrolling var startThresholdMilliseconds = (_startPositionSeconds - additionalSeconds) * TimeCode.BaseUnit; var endThresholdMilliseconds = (EndPositionSeconds + additionalSeconds) * TimeCode.BaseUnit; - var displayableParagraphs = new List(); - for (var i = 0; i < subtitle.Paragraphs.Count; i++) - { - var p = subtitle.Paragraphs[i]; - - if (p.StartTime.IsMaxTime) - { - continue; - } - _subtitle.Paragraphs.Add(p); - if (p.EndTime.TotalMilliseconds >= startThresholdMilliseconds && p.StartTime.TotalMilliseconds <= endThresholdMilliseconds) - { - displayableParagraphs.Add(p); - if (displayableParagraphs.Count > 99) - { - break; - } - } + List thresholdParagraphs = subtitle.Paragraphs.Where(p => !p.StartTime.IsMaxTime) + .Where(p => p.EndTime.TotalMilliseconds >= startThresholdMilliseconds && p.StartTime.TotalMilliseconds <= endThresholdMilliseconds).ToList(); + _subtitle.Paragraphs.AddRange(thresholdParagraphs); + + double startVisibleMilliseconds = _startPositionSeconds * TimeCode.BaseUnit; + double endVisibleMilliseconds = EndPositionSeconds * TimeCode.BaseUnit; + List visibleParagraphs = thresholdParagraphs.Where( + p => p.EndTime.TotalMilliseconds >= startVisibleMilliseconds && p.StartTime.TotalMilliseconds <= endVisibleMilliseconds).ToList(); + List invisibleParagraphs = thresholdParagraphs.Except(visibleParagraphs).ToList(); + + const int maxDisplayableParagraphs = 100; + IEnumerable displayableParagraphs = visibleParagraphs; + if (visibleParagraphs.Count > maxDisplayableParagraphs) + { + /* + * Group & select is done so that it draws paragraphs across the entire timeline before drawing overlapping paragraphs, up to the display limit. + * Materialize to list so that it can be counted below without pruning twice. + */ + displayableParagraphs = visibleParagraphs.Where(p => p.DurationTotalMilliseconds >= 0.01) + .GroupBy(p => Math.Floor(p.StartTime.TotalMilliseconds / 90)) + .SelectMany(group => group, + (group, p) => + { + int index = group.ToList().IndexOf(p); + return new { index, p }; + }) + .OrderBy(items => items.index) + .Select(item => item.p) + .Take(maxDisplayableParagraphs) + .ToList(); } - displayableParagraphs = displayableParagraphs.OrderBy(p => p.StartTime.TotalMilliseconds).ToList(); - var lastStartTime = -1d; - foreach (var p in displayableParagraphs) + if (displayableParagraphs.Count() < maxDisplayableParagraphs && invisibleParagraphs.Count + displayableParagraphs.Count() > maxDisplayableParagraphs) { - if (displayableParagraphs.Count > 30 && - (p.DurationTotalMilliseconds < 0.01 || p.StartTime.TotalMilliseconds - lastStartTime < 90)) - { - continue; - } - - _displayableParagraphs.Add(p); - lastStartTime = p.StartTime.TotalMilliseconds; + // These paragraphs won't be visible in the timeline and are in addition to visible paragraphs, so use simpler pruning algorithm to save time. + IEnumerable additionalParagraphs = invisibleParagraphs.Where(p => p.DurationTotalMilliseconds >= 0.01) + .GroupBy(p => Math.Floor(p.StartTime.TotalMilliseconds / 90)) + .Select(p => p.First()); + displayableParagraphs = displayableParagraphs.Concat(additionalParagraphs); + } + else + { + displayableParagraphs = displayableParagraphs.Concat(invisibleParagraphs); } + _displayableParagraphs.AddRange(displayableParagraphs.Take(maxDisplayableParagraphs)); var primaryParagraph = subtitle.GetParagraphOrDefault(primarySelectedIndex); if (primaryParagraph != null && !primaryParagraph.StartTime.IsMaxTime) From 1ecc13c67edc297bac36a646878721aa4e16e23c Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Sat, 22 Jul 2023 10:24:18 -0400 Subject: [PATCH 02/11] Changes pruning algorithm for audio visualizer. - Selects paragraphs in the visible portion of the visualizer first to ensure you can see the current timeline. - Select paragraphs across the entire timeline before selecting multiple paragraphs that start at the same time. - After visible paragraphs are pruned, add some invisible ones to assist with scrolling (and prune again to limit rendered paragraphs). https://github.com/SubtitleEdit/subtitleedit/issues/7000 --- src/ui/Controls/AudioVisualizer.cs | 70 +++++++++++++++++------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index daa8862785..152a767ce5 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -431,40 +431,52 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie const double additionalSeconds = 15.0; // Helps when scrolling var startThresholdMilliseconds = (_startPositionSeconds - additionalSeconds) * TimeCode.BaseUnit; var endThresholdMilliseconds = (EndPositionSeconds + additionalSeconds) * TimeCode.BaseUnit; - var displayableParagraphs = new List(); - for (var i = 0; i < subtitle.Paragraphs.Count; i++) - { - var p = subtitle.Paragraphs[i]; - - if (p.StartTime.IsMaxTime) - { - continue; - } - _subtitle.Paragraphs.Add(p); - if (p.EndTime.TotalMilliseconds >= startThresholdMilliseconds && p.StartTime.TotalMilliseconds <= endThresholdMilliseconds) - { - displayableParagraphs.Add(p); - if (displayableParagraphs.Count > 99) - { - break; - } - } + List thresholdParagraphs = subtitle.Paragraphs.Where(p => !p.StartTime.IsMaxTime) + .Where(p => p.EndTime.TotalMilliseconds >= startThresholdMilliseconds && p.StartTime.TotalMilliseconds <= endThresholdMilliseconds).ToList(); + _subtitle.Paragraphs.AddRange(thresholdParagraphs); + + double startVisibleMilliseconds = _startPositionSeconds * TimeCode.BaseUnit; + double endVisibleMilliseconds = EndPositionSeconds * TimeCode.BaseUnit; + List visibleParagraphs = thresholdParagraphs.Where( + p => p.EndTime.TotalMilliseconds >= startVisibleMilliseconds && p.StartTime.TotalMilliseconds <= endVisibleMilliseconds).ToList(); + List invisibleParagraphs = thresholdParagraphs.Except(visibleParagraphs).ToList(); + + const int maxDisplayableParagraphs = 100; + IEnumerable displayableParagraphs = visibleParagraphs; + if (visibleParagraphs.Count > maxDisplayableParagraphs) + { + /* + * Group & select is done so that it draws paragraphs across the entire timeline before drawing overlapping paragraphs, up to the display limit. + * Materialize to list so that it can be counted below without pruning twice. + */ + displayableParagraphs = visibleParagraphs.Where(p => p.DurationTotalMilliseconds >= 0.01) + .GroupBy(p => Math.Floor(SecondsToXPosition(p.StartTime.TotalSeconds) / 5.0)) + .SelectMany(group => group, + (group, p) => + { + int index = group.ToList().IndexOf(p); + return new { index, p }; + }) + .OrderBy(items => items.index) + .Select(item => item.p) + .Take(maxDisplayableParagraphs) + .ToList(); } - displayableParagraphs = displayableParagraphs.OrderBy(p => p.StartTime.TotalMilliseconds).ToList(); - var lastStartTime = -1d; - foreach (var p in displayableParagraphs) + if (displayableParagraphs.Count() < maxDisplayableParagraphs && invisibleParagraphs.Count + displayableParagraphs.Count() > maxDisplayableParagraphs) { - if (displayableParagraphs.Count > 30 && - (p.DurationTotalMilliseconds < 0.01 || p.StartTime.TotalMilliseconds - lastStartTime < 90)) - { - continue; - } - - _displayableParagraphs.Add(p); - lastStartTime = p.StartTime.TotalMilliseconds; + // These paragraphs won't be visible in the timeline and are in addition to visible paragraphs, so use simpler pruning algorithm to save time. + IEnumerable additionalParagraphs = invisibleParagraphs.Where(p => p.DurationTotalMilliseconds >= 0.01) + .GroupBy(p => Math.Floor(SecondsToXPosition(p.StartTime.TotalSeconds) / 5.0)) + .Select(p => p.First()); + displayableParagraphs = displayableParagraphs.Concat(additionalParagraphs); + } + else + { + displayableParagraphs = displayableParagraphs.Concat(invisibleParagraphs); } + _displayableParagraphs.AddRange(displayableParagraphs.Take(maxDisplayableParagraphs)); var primaryParagraph = subtitle.GetParagraphOrDefault(primarySelectedIndex); if (primaryParagraph != null && !primaryParagraph.StartTime.IsMaxTime) From 1a68c8727b0083b6233ca7db57522f044fc84a11 Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Fri, 15 Sep 2023 15:15:45 -0400 Subject: [PATCH 03/11] Rough attempt at paragraph pruning algorithm that prioritizes timeline coverage. Has a lot of debugging statements and is not optimized enough to be ready, but it's enough to save. --- src/ui/Controls/AudioVisualizer.cs | 112 ++++-- src/ui/Logic/DisplayableSubtitleHelper.cs | 443 ++++++++++++++++++++++ src/ui/SubtitleEdit.csproj | 1 + 3 files changed, 517 insertions(+), 39 deletions(-) create mode 100644 src/ui/Logic/DisplayableSubtitleHelper.cs diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index 152a767ce5..3bece35c00 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -11,6 +11,7 @@ using System.Threading.Tasks; using System.Windows.Forms; using Nikse.SubtitleEdit.Core.Forms; +using System.Diagnostics; namespace Nikse.SubtitleEdit.Controls { @@ -428,55 +429,46 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie return; } + const int maxDisplayableParagraphs = 100; const double additionalSeconds = 15.0; // Helps when scrolling var startThresholdMilliseconds = (_startPositionSeconds - additionalSeconds) * TimeCode.BaseUnit; var endThresholdMilliseconds = (EndPositionSeconds + additionalSeconds) * TimeCode.BaseUnit; - List thresholdParagraphs = subtitle.Paragraphs.Where(p => !p.StartTime.IsMaxTime) - .Where(p => p.EndTime.TotalMilliseconds >= startThresholdMilliseconds && p.StartTime.TotalMilliseconds <= endThresholdMilliseconds).ToList(); - _subtitle.Paragraphs.AddRange(thresholdParagraphs); - double startVisibleMilliseconds = _startPositionSeconds * TimeCode.BaseUnit; double endVisibleMilliseconds = EndPositionSeconds * TimeCode.BaseUnit; - List visibleParagraphs = thresholdParagraphs.Where( - p => p.EndTime.TotalMilliseconds >= startVisibleMilliseconds && p.StartTime.TotalMilliseconds <= endVisibleMilliseconds).ToList(); - List invisibleParagraphs = thresholdParagraphs.Except(visibleParagraphs).ToList(); - const int maxDisplayableParagraphs = 100; - IEnumerable displayableParagraphs = visibleParagraphs; - if (visibleParagraphs.Count > maxDisplayableParagraphs) - { - /* - * Group & select is done so that it draws paragraphs across the entire timeline before drawing overlapping paragraphs, up to the display limit. - * Materialize to list so that it can be counted below without pruning twice. - */ - displayableParagraphs = visibleParagraphs.Where(p => p.DurationTotalMilliseconds >= 0.01) - .GroupBy(p => Math.Floor(SecondsToXPosition(p.StartTime.TotalSeconds) / 5.0)) - .SelectMany(group => group, - (group, p) => - { - int index = group.ToList().IndexOf(p); - return new { index, p }; - }) - .OrderBy(items => items.index) - .Select(item => item.p) - .Take(maxDisplayableParagraphs) - .ToList(); - } + double bucketSize = 1 / Configuration.Settings.General.CurrentFrameRate; - if (displayableParagraphs.Count() < maxDisplayableParagraphs && invisibleParagraphs.Count + displayableParagraphs.Count() > maxDisplayableParagraphs) - { - // These paragraphs won't be visible in the timeline and are in addition to visible paragraphs, so use simpler pruning algorithm to save time. - IEnumerable additionalParagraphs = invisibleParagraphs.Where(p => p.DurationTotalMilliseconds >= 0.01) - .GroupBy(p => Math.Floor(SecondsToXPosition(p.StartTime.TotalSeconds) / 5.0)) - .Select(p => p.First()); - displayableParagraphs = displayableParagraphs.Concat(additionalParagraphs); - } - else + List displayableParagraphs = new List(); + Dictionary> visibleBuckets = new Dictionary>(); + Dictionary> invisibleBuckets = new Dictionary>(); + + Stopwatch start = Stopwatch.StartNew(); + DisplayableSubtitleHelper helper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15); + + int visibleParagraphsCount = 0; + for (var i = 0; i < subtitle.Paragraphs.Count; i++) { - displayableParagraphs = displayableParagraphs.Concat(invisibleParagraphs); + var p = subtitle.Paragraphs[i]; + + if (p.StartTime.IsMaxTime) + { + continue; + } + + helper.Add(p); } - _displayableParagraphs.AddRange(displayableParagraphs.Take(maxDisplayableParagraphs)); + + List selectedParagraphs = helper.GetParagraphs(100, 20); + _displayableParagraphs.AddRange(selectedParagraphs); + + // TODO: Just assign to displayable paragraphs + //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(visibleBuckets, maxDisplayableParagraphs, visibleParagraphsCount > maxDisplayableParagraphs)); + //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(invisibleBuckets, 20, true)); + //_displayableParagraphs.AddRange(displayableParagraphs); + + start.Stop(); + Console.WriteLine("Prune time (ms): " + start.ElapsedMilliseconds); var primaryParagraph = subtitle.GetParagraphOrDefault(primarySelectedIndex); if (primaryParagraph != null && !primaryParagraph.StartTime.IsMaxTime) @@ -495,6 +487,48 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie } } + private List SelectParagraphsFromBuckets(Dictionary> buckets, int numberOfParagraphs, bool pruneShortParagraphs) + { + foreach (List bucket in buckets.Values) + { + // Sort buckets with longest paragraphs first. + bucket.Sort((first, second) => { return (int)(second.DurationTotalSeconds - first.DurationTotalSeconds); }); + } + + List result = new List(); + while (result.Count < numberOfParagraphs && buckets.Count > 0) + { + List keys = buckets.Keys.ToList(); + //// Iterate over keys evenly spread over the timeline + //keys.Sort((a, b) => a % numberOfParagraphs - b % numberOfParagraphs); + foreach (int key in keys) + { + List bucket = buckets[key]; + Paragraph p; + while (bucket.Count > 0 && result.Count < numberOfParagraphs) + { + p = bucket[0]; + bucket.RemoveAt(0); + + if (pruneShortParagraphs && p.DurationTotalMilliseconds < 0.01) + { + continue; + } + + result.Add(p); + break; + } + + if (bucket.Count == 0) + { + buckets.Remove(key); + } + } + } + return result; + } + + public void SetPosition(double startPositionSeconds, Subtitle subtitle, double currentVideoPositionSeconds, int subtitleIndex, ListView.SelectedIndexCollection selectedIndexes) { if (TimeSpan.FromTicks(DateTime.UtcNow.Ticks - _lastMouseWheelScroll).TotalSeconds > 0.25) diff --git a/src/ui/Logic/DisplayableSubtitleHelper.cs b/src/ui/Logic/DisplayableSubtitleHelper.cs new file mode 100644 index 0000000000..32e33b31d7 --- /dev/null +++ b/src/ui/Logic/DisplayableSubtitleHelper.cs @@ -0,0 +1,443 @@ +using Nikse.SubtitleEdit.Core.Common; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Nikse.SubtitleEdit.Logic +{ + internal class DisplayableSubtitleHelper + { + // Map associating a time stamp with a number of paragraphs that start at that time stamp. + private readonly Dictionary _startParagraphCounts = new Dictionary(); + // Map associating a time stamp with a number of paragraphs that end at that time stamp. + private readonly Dictionary _endParagraphCounts = new Dictionary(); + + + private readonly List _paragraphs = new List(); + + + private readonly double _startThresholdMilliseconds; + private readonly double _endThresholdMilliseconds; + + private readonly double _startVisibleMilliseconds; + private readonly double _endVisibleMilliseconds; + + public DisplayableSubtitleHelper(double startMilliseconds, double endMilliseconds, double additionalSeconds) + { + _startThresholdMilliseconds = startMilliseconds - additionalSeconds * 1000; + _endThresholdMilliseconds = endMilliseconds + additionalSeconds * 1000; + + _startVisibleMilliseconds = startMilliseconds; + _endVisibleMilliseconds = endMilliseconds; + } + + public void Add(Paragraph p) + { + // This should take in all paragraphs and figure out which are important enough to render. + // TODO: Filter out visible / invisible paragraphs. + + if (IsInThreshold(p)) + { + AddStart(p.StartTime.TotalMilliseconds); + AddEnd(p.EndTime.TotalMilliseconds); + _paragraphs.Add(p); + } + } + + private void AddStart(double startMilliseconds) + { + if (_startParagraphCounts.TryGetValue(startMilliseconds, out int startCount)) + { + startCount++; + } + else + { + startCount = 1; + } + + _startParagraphCounts[startMilliseconds] = startCount; + } + + private void AddEnd(double endMilliseconds) + { + if (_endParagraphCounts.TryGetValue(endMilliseconds, out int endCount)) + { + endCount++; + } + else + { + endCount = 1; + } + + _endParagraphCounts[endMilliseconds] = endCount; + } + + private bool IsVisible(Paragraph p) + { + return IsInRange(p, _startVisibleMilliseconds, _endVisibleMilliseconds); + } + + private bool IsInThreshold(Paragraph p) + { + return IsInRange(p, _startThresholdMilliseconds, _endThresholdMilliseconds); + } + + private bool IsInRange(Paragraph p, double start, double end) + { + return p.StartTime.TotalMilliseconds <= end && p.EndTime.TotalMilliseconds >= start; + } + + private double CalculateAverageParagraphCoverage() + { + // Average coverage is average number of layers of paragraphs at any moment of the visible timeline. + // A single paragraph covering the entire visible timeline is equivalent to two layers of paragraphs + // covering one half of the timeline with no paragraphs covering the other half. + double average = 0; + + int numberOfVisibleParagraphs = 0; + + foreach (Paragraph p in _paragraphs) + { + if (IsVisible(p) && p.DurationTotalMilliseconds > 0) + { + numberOfVisibleParagraphs++; + average += CalculateVisibleDurationOfParagraph(p); + } + } + //Console.WriteLine($"Found {numberOfVisibleParagraphs} visible paragraphs with total {average} coverage of timeline."); + + return average; + } + + private double CalculateVisibleDurationOfParagraph(Paragraph p) + { + double startClamped = Math.Max(p.StartTime.TotalMilliseconds, _startVisibleMilliseconds); + double endClamped = Math.Min(p.EndTime.TotalMilliseconds, _endVisibleMilliseconds); + return (endClamped - startClamped) / (_endVisibleMilliseconds - _startVisibleMilliseconds); + } + + private double CalculateCoverageInRange(List currentCoverage, double startRange, double endRange) + { + // TODO: This whole method is ugly, needs refactoring. + CoverageRecord startRecord = new CoverageRecord(startRange); + + int startIndex = currentCoverage.BinarySearch(startRecord, new TimestampRecordComparer()); + double weightedCoverage = 0; + double previousTimestamp = startRange; + double previousNumberOfParagraphs = 0; + + if (startIndex < 0) + { + // Start of range has no record, need to add the overlap from the previous record. + startIndex = ~startIndex; + if (startIndex > 0) + { + if (startIndex >= currentCoverage.Count) + { + // The start index comes after all paragraphs have ended, so there can't be any coverage. + return 0; + } + CoverageRecord r = currentCoverage[startIndex - 1]; + previousTimestamp = startRange; + previousNumberOfParagraphs = r.numberOfParagraphs; + } + else + { + // Start happened before the first record - there is no overlap to count. + startIndex = 0; + previousTimestamp = startRange; + previousNumberOfParagraphs = 0; + } + } + else + { + if (startIndex >= currentCoverage.Count) + { + // The start index comes after all paragraphs have ended, so there can't be any coverage. + return 0; + } + CoverageRecord previousRecord = currentCoverage[startIndex]; + previousTimestamp = previousRecord.timestamp; + previousNumberOfParagraphs = previousRecord.numberOfParagraphs; + } + + + if (startIndex < currentCoverage.Count - 1) + { + int currentIndex = startIndex + 1; + while (currentIndex < currentCoverage.Count && currentCoverage[currentIndex].timestamp < endRange) + { + CoverageRecord currentRecord = currentCoverage[currentIndex]; + weightedCoverage += previousNumberOfParagraphs * (currentRecord.timestamp - previousTimestamp); + + previousTimestamp = currentRecord.timestamp; + previousNumberOfParagraphs = currentRecord.numberOfParagraphs; + currentIndex++; + } + } + + if (previousTimestamp != endRange) + { + // There was no record exactly matching the end range, so there was a little bit left over. + // It is also possible that no start record matched either, so this is calculating the time between startRange and endRange. + weightedCoverage += previousNumberOfParagraphs * (endRange - previousTimestamp); + } + + return weightedCoverage / (endRange - startRange); + } + + private Paragraph ChooseOneParagaph(double averageCoverage,double currentVisibleCoverage, int lowestCoverage, List candidates, List currentCoverage) + { + + double minimumCoverage = double.MaxValue; + int indexOfMinimum = -1; + Paragraph bestParagraph = null; + + for(var i = 0; i < candidates.Count;i++) + { + Paragraph p = candidates[i]; + // TODO: If existing coverage > averageCoverage, allow invisible paragraph. + double existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); + if (existingCoverage < minimumCoverage) + { + if ((currentVisibleCoverage > averageCoverage / 2 && IsInThreshold(p)) || IsVisible(p)) + { + // Prefer visible paragraphs until the visible range has at least reached the average coverage for the range + minimumCoverage = existingCoverage; + bestParagraph = p; + indexOfMinimum = i; + if (existingCoverage <= lowestCoverage) + { + break; + } + } + } + } + if (bestParagraph != null) + { + candidates.RemoveAt(indexOfMinimum); + } + + return bestParagraph; + } + + private Paragraph FindLeastOverlap(SortedDictionary overlaps, List paragraphs) + { + double lowestAverageOverlap = double.MaxValue; + Paragraph leastOverlappingParagraph = null; + + /* + * This algorithm tries to maximize the percentage of the timeline that is covered with paragraphs, so it will tend to cover everything as evenly as possible. + * Algorithm: + * 1. Select a candidate paragraph + * 2. Calculate average length of time this paragraph overlaps with already selected paragraphs. + * 3. If the number of overlaps is equal to the minimum overlap on the whole timeline, return early (this indicates the paragraph has minimum overlap and is a good candidate). + * 4. If not, try the next paragraph + * 5. Any time a paragraph is found, update the current overlaps with the new paragraph + * + * As the paragraph list is sorted with longest paragraph first, this ensures it chooses longest paragraphs first, with each new paragraph overlapping as few others as possible. + * This avoids situations where the displayed paragraphs: + * - Always start and end at the same time (stacked many layers deep). + * - Leave gaps between non-overlapping paragraphs (a problem with buckets and wrong bucket size) + * - Tend to become pruned unpredictably, especially when scrolling the timeline: any paragraphs of equal duration are sorted first-to-last + * + * The only drawback is that to the algorithm, there is no difference in priority between a paragraph that starts and ends at the same time as another paragraph vs. + * one that "straddles" two other paragraphs (assuming the two paragraphs have no gap between them). + * + * The only solution is to hope that paragraphs don't start and end at the exact same time... or to add a condition that starting or ending at the same time as + * another paragraph is less preferred. This should only take effect if there is a tie for least overlap, otherwise it may select a paragraph with no common + * start/end times but creating more overlaps. + * + */ + + /* + * Desired algorithm: + * - Compute average number of subtitles covering each second of the visible timeline (sum all paragraph time within the visible range, divide by visible time) + * - Sort all paragraphs by length + * - Select N paragraphs, choose a candidate, starting at the longest paragraph. For each candidate: + * - Compute average number of subtitles already existing for the length of the candidate. (number of subtitles * duration of overlap / duration of candidate) + * - Candidate must be in the visible range, unless the current amount of subtitle coverage is greater than or equal to the total average paragraph coverage generated in step 1. + * - Candidate with the lowest average overlap wins + * - If there is a tie, the candidate with fewest shared start and end time wins (i.e. prefer to not choose stacked subtitles). + * - After choosing a candidate, update the current paragraph coverage. + * + * Benefits: + * - No explicit choice of visible vs. invisible paragraphs + * - Allows choosing more invisible paragraphs when the visible range is exhausted (without risking setting too high of a limit on visible or invisible paragraphs) + * - Will only choose visible paragraphs until enough of them have been selected to cover the visible range, then allows invisible ones to be chosen if they are the best candidate + * - No special treatment or logic for invisible paragraphs (aside from checking that they are visible) + */ + + foreach (Paragraph p in paragraphs) + { + + double start = p.StartTime.TotalMilliseconds; + double end = p.EndTime.TotalMilliseconds; + + // These are guaranteed to exist because all paragraphs have been added to start / end Dictionaries. + int startOverlap = overlaps[start]; + int endOverlap = overlaps[end]; + + SortedDictionary.KeyCollection keys = overlaps.Keys; + List keyList = keys.ToList(); + + int startIndex = keyList.IndexOf(start); + int endIndex = keyList.IndexOf(end); + + double previousTime = start; + double previousOverlaps = overlaps[start]; + double averageOverlap = 0; + + for (int overlapIndex = startIndex + 1; overlapIndex <= endIndex; overlapIndex++) + { + double currentTime = keyList[overlapIndex]; + int currentOverlaps = overlaps[currentTime]; + double timeDelta = currentTime - previousTime; + averageOverlap += previousOverlaps / timeDelta; + + previousOverlaps = currentOverlaps; + previousTime = currentTime; + } + + if (averageOverlap < lowestAverageOverlap) + { + lowestAverageOverlap = averageOverlap; + leastOverlappingParagraph = p; + } + } + + return leastOverlappingParagraph; + } + + public List GetParagraphs(int limit, int invisibleLimit) + { + //Console.WriteLine($"Getting {limit} paragraphs."); + if (limit >= _paragraphs.Count) + { + return _paragraphs; + } + + List result = new List(); + Dictionary coverageCache = new Dictionary(); + + double averageCoverage = CalculateAverageParagraphCoverage(); + double currentVisibleCoverage = 0; + List records = new List(); + // How to figure out what to remove from cache? + + // Ensure that longer paragraphs are preferred. + _paragraphs.Sort(new ParagraphComparer()); + + int lowestCoverage = 0; + + while (result.Count < limit && _paragraphs.Count > 0) + { + Paragraph selection = ChooseOneParagaph(averageCoverage,currentVisibleCoverage,lowestCoverage, _paragraphs, records); + if (selection != null) + { + result.Add(selection); + UpdateCoverageRecords(records, selection); + if (IsVisible(selection)) + { + double coveragePercent = CalculateVisibleDurationOfParagraph(selection); + currentVisibleCoverage += coveragePercent; + //Console.WriteLine($"Paragraph selected, adding {coveragePercent} to current coverage. (for a total of {currentVisibleCoverage})"); + lowestCoverage = FindLowestCoverage(records); + } + } + } + + return result; + } + + private void UpdateCoverageRecords(List records, Paragraph newParagraph) + { + int startIndex = CreateAndGetRecordIndex(records, newParagraph.StartTime.TotalMilliseconds); + int endIndex = CreateAndGetRecordIndex(records,newParagraph.EndTime.TotalMilliseconds); + for (int i = startIndex; i < endIndex; i++) + { + records[i].numberOfParagraphs++; + } + } + + private int FindLowestCoverage(List records) + { + int min = int.MaxValue; + foreach (CoverageRecord record in records) + { + if (record.numberOfParagraphs < min) + { + min = record.numberOfParagraphs; + if (min == 0) + { + return 0; + } + } + } + return min; + } + + private int CreateAndGetRecordIndex(List records, double timestamp) + { + CoverageRecord newRecord = new CoverageRecord(timestamp); + + int recordIndex = records.BinarySearch(newRecord, new TimestampRecordComparer()); + if (recordIndex < 0) + { + recordIndex = ~recordIndex; + records.Insert(recordIndex, newRecord); + } + + return recordIndex; + } + + private class CoverageRecord + { + public double timestamp { get; } + public int numberOfParagraphs { get; set; } + private int numberOfStartMarks; + private int numberOfEndMarks; + + public CoverageRecord(double timestamp) + { + this.timestamp = timestamp; + } + + } + + private class TimestampRecordComparer : IComparer + { + public int Compare(CoverageRecord x, CoverageRecord y) + { + return x.timestamp.CompareTo(y.timestamp); + } + } + + /** + * A comparer for paragraphs, prioritizing those that are: + * 1. Longer + * 2. Have a smaller (earlier) start time. + */ + private class ParagraphComparer : IComparer + { + public int Compare(Paragraph x, Paragraph y) + { + // Calculation is (y.duration - x.duration) so that if x.duration is larger, the difference is < 0 and x comes first. + double lengthComparison = y.DurationTotalMilliseconds - x.DurationTotalMilliseconds; + if (lengthComparison > 0) + { + return 1; + }else if (lengthComparison < 0) + { + return -1; + } + + // Calculation is (x.start - y.start) so that if x comes first, difference is < 0. + return Math.Sign(x.StartTime.TotalMilliseconds - y.StartTime.TotalMilliseconds); + } + } + + + } +} diff --git a/src/ui/SubtitleEdit.csproj b/src/ui/SubtitleEdit.csproj index c8c99a2929..43435fa1fc 100644 --- a/src/ui/SubtitleEdit.csproj +++ b/src/ui/SubtitleEdit.csproj @@ -1483,6 +1483,7 @@ + From 3552ac3b6da5c9a7580d98b8bb8fc0fc98c79d0d Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Fri, 15 Sep 2023 19:35:30 -0400 Subject: [PATCH 04/11] Adds a few optimizations to pruning. Caches coverage calculations to avoid iterating over the coverage records again. The results depend on the subtitles currently visible, but it sometimes cuts prune time from around 70 ms to around 35 ms (about 50% faster) and sometimes has little or no effect (66 ms to 50 ms) or even a bit slower (from 70 ms to 72 ms). It is extremely rare to see this, though. It almost always has a noticeable speed gain from caching. Also keep in mind that this is with the caching code executing first. If the runtime is caching the DisplaySubtitleHelper code, the non-caching version would already be at an advantage but it is still slower. This is still in progress and needs cleaning up. --- src/ui/Controls/AudioVisualizer.cs | 23 ++++++--- src/ui/Logic/DisplayableSubtitleHelper.cs | 59 +++++++++++++++++++---- 2 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index 3bece35c00..211090bce2 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -437,14 +437,12 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie double startVisibleMilliseconds = _startPositionSeconds * TimeCode.BaseUnit; double endVisibleMilliseconds = EndPositionSeconds * TimeCode.BaseUnit; - double bucketSize = 1 / Configuration.Settings.General.CurrentFrameRate; - List displayableParagraphs = new List(); Dictionary> visibleBuckets = new Dictionary>(); Dictionary> invisibleBuckets = new Dictionary>(); - Stopwatch start = Stopwatch.StartNew(); - DisplayableSubtitleHelper helper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15); + DisplayableSubtitleHelper cachingHelper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15,true); + DisplayableSubtitleHelper noCachingHelper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15, false); int visibleParagraphsCount = 0; for (var i = 0; i < subtitle.Paragraphs.Count; i++) @@ -456,19 +454,28 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie continue; } - helper.Add(p); + cachingHelper.Add(p); + noCachingHelper.Add(p); } + Stopwatch cachingTimer = Stopwatch.StartNew(); + List selectedParagraphs = cachingHelper.GetParagraphs(100, 20); + cachingTimer.Stop(); + + Stopwatch noCachingTimer = Stopwatch.StartNew(); + List noCacheSelectedParagraphs = noCachingHelper.GetParagraphs(100, 20); + noCachingTimer.Stop(); + + Console.WriteLine($"Prune time (ms) - Cache: {cachingTimer.ElapsedMilliseconds}\tNo cache: {noCachingTimer.ElapsedMilliseconds}"); + - List selectedParagraphs = helper.GetParagraphs(100, 20); _displayableParagraphs.AddRange(selectedParagraphs); + _displayableParagraphs.AddRange(noCacheSelectedParagraphs); // TODO: Just assign to displayable paragraphs //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(visibleBuckets, maxDisplayableParagraphs, visibleParagraphsCount > maxDisplayableParagraphs)); //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(invisibleBuckets, 20, true)); //_displayableParagraphs.AddRange(displayableParagraphs); - start.Stop(); - Console.WriteLine("Prune time (ms): " + start.ElapsedMilliseconds); var primaryParagraph = subtitle.GetParagraphOrDefault(primarySelectedIndex); if (primaryParagraph != null && !primaryParagraph.StartTime.IsMaxTime) diff --git a/src/ui/Logic/DisplayableSubtitleHelper.cs b/src/ui/Logic/DisplayableSubtitleHelper.cs index 32e33b31d7..08ce6151be 100644 --- a/src/ui/Logic/DisplayableSubtitleHelper.cs +++ b/src/ui/Logic/DisplayableSubtitleHelper.cs @@ -23,13 +23,16 @@ internal class DisplayableSubtitleHelper private readonly double _startVisibleMilliseconds; private readonly double _endVisibleMilliseconds; - public DisplayableSubtitleHelper(double startMilliseconds, double endMilliseconds, double additionalSeconds) + private bool _useCache; + + public DisplayableSubtitleHelper(double startMilliseconds, double endMilliseconds, double additionalSeconds, bool useCache) { _startThresholdMilliseconds = startMilliseconds - additionalSeconds * 1000; _endThresholdMilliseconds = endMilliseconds + additionalSeconds * 1000; _startVisibleMilliseconds = startMilliseconds; _endVisibleMilliseconds = endMilliseconds; + _useCache = useCache; } public void Add(Paragraph p) @@ -88,6 +91,11 @@ private bool IsInRange(Paragraph p, double start, double end) return p.StartTime.TotalMilliseconds <= end && p.EndTime.TotalMilliseconds >= start; } + private bool ParagraphsOverlap(Paragraph p1, Paragraph p2) + { + return IsInRange(p1, p2.StartTime.TotalMilliseconds, p2.EndTime.TotalMilliseconds); + } + private double CalculateAverageParagraphCoverage() { // Average coverage is average number of layers of paragraphs at any moment of the visible timeline. @@ -187,18 +195,31 @@ private double CalculateCoverageInRange(List currentCoverage, do return weightedCoverage / (endRange - startRange); } - private Paragraph ChooseOneParagaph(double averageCoverage,double currentVisibleCoverage, int lowestCoverage, List candidates, List currentCoverage) + private Paragraph ChooseOneParagaph(double averageCoverage, double currentVisibleCoverage, int lowestCoverage, ICollection candidates, + List currentCoverage, Dictionary coverageCache) { double minimumCoverage = double.MaxValue; - int indexOfMinimum = -1; + //int indexOfMinimum = -1; Paragraph bestParagraph = null; - for(var i = 0; i < candidates.Count;i++) + //for(var i = 0; i < candidates.Count;i++) + foreach (Paragraph p in candidates) { - Paragraph p = candidates[i]; - // TODO: If existing coverage > averageCoverage, allow invisible paragraph. - double existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); + //Paragraph p = candidates[i]; + double existingCoverage; + if (_useCache) + { + if (!coverageCache.TryGetValue(p, out existingCoverage)) + { + existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); + coverageCache.Add(p, existingCoverage); + } + } + else + { + existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); + } if (existingCoverage < minimumCoverage) { if ((currentVisibleCoverage > averageCoverage / 2 && IsInThreshold(p)) || IsVisible(p)) @@ -206,7 +227,7 @@ private Paragraph ChooseOneParagaph(double averageCoverage,double currentVisible // Prefer visible paragraphs until the visible range has at least reached the average coverage for the range minimumCoverage = existingCoverage; bestParagraph = p; - indexOfMinimum = i; + //indexOfMinimum = i; if (existingCoverage <= lowestCoverage) { break; @@ -216,7 +237,8 @@ private Paragraph ChooseOneParagaph(double averageCoverage,double currentVisible } if (bestParagraph != null) { - candidates.RemoveAt(indexOfMinimum); + candidates.Remove(bestParagraph); + //candidates.RemoveAt(indexOfMinimum); } return bestParagraph; @@ -328,12 +350,13 @@ public List GetParagraphs(int limit, int invisibleLimit) // Ensure that longer paragraphs are preferred. _paragraphs.Sort(new ParagraphComparer()); + LinkedList candidates = new LinkedList(_paragraphs); int lowestCoverage = 0; while (result.Count < limit && _paragraphs.Count > 0) { - Paragraph selection = ChooseOneParagaph(averageCoverage,currentVisibleCoverage,lowestCoverage, _paragraphs, records); + Paragraph selection = ChooseOneParagaph(averageCoverage,currentVisibleCoverage,lowestCoverage, candidates, records, coverageCache); if (selection != null) { result.Add(selection); @@ -345,12 +368,28 @@ public List GetParagraphs(int limit, int invisibleLimit) //Console.WriteLine($"Paragraph selected, adding {coveragePercent} to current coverage. (for a total of {currentVisibleCoverage})"); lowestCoverage = FindLowestCoverage(records); } + if (_useCache) + { + invalidateCache(coverageCache, selection.StartTime.TotalMilliseconds, selection.EndTime.TotalMilliseconds); + } } } return result; } + private void invalidateCache(Dictionary coverageCache, double startRange, double endRange) + { + List keys = coverageCache.Keys.ToList(); + foreach (var key in keys) + { + if (IsInRange(key, startRange, endRange)) + { + coverageCache.Remove(key); + } + } + } + private void UpdateCoverageRecords(List records, Paragraph newParagraph) { int startIndex = CreateAndGetRecordIndex(records, newParagraph.StartTime.TotalMilliseconds); From ddcaa3796f4814d1e4d3e51ecf8ef8718a5bf3d5 Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Sat, 16 Sep 2023 16:32:16 -0400 Subject: [PATCH 05/11] Fix a bug in calculating paragraph coverage. Was incorrectly using the previous # of paragraphs and the start range, skipping processing the first record. --- src/ui/Controls/AudioVisualizer.cs | 21 ++-- src/ui/Logic/DisplayableSubtitleHelper.cs | 125 ++++++++++++---------- 2 files changed, 76 insertions(+), 70 deletions(-) diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index 211090bce2..096540405d 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -441,10 +441,8 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie Dictionary> visibleBuckets = new Dictionary>(); Dictionary> invisibleBuckets = new Dictionary>(); - DisplayableSubtitleHelper cachingHelper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15,true); - DisplayableSubtitleHelper noCachingHelper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15, false); + DisplayableSubtitleHelper paragraphHelper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15); - int visibleParagraphsCount = 0; for (var i = 0; i < subtitle.Paragraphs.Count; i++) { var p = subtitle.Paragraphs[i]; @@ -454,22 +452,15 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie continue; } - cachingHelper.Add(p); - noCachingHelper.Add(p); + paragraphHelper.Add(p); } - Stopwatch cachingTimer = Stopwatch.StartNew(); - List selectedParagraphs = cachingHelper.GetParagraphs(100, 20); - cachingTimer.Stop(); - - Stopwatch noCachingTimer = Stopwatch.StartNew(); - List noCacheSelectedParagraphs = noCachingHelper.GetParagraphs(100, 20); - noCachingTimer.Stop(); - - Console.WriteLine($"Prune time (ms) - Cache: {cachingTimer.ElapsedMilliseconds}\tNo cache: {noCachingTimer.ElapsedMilliseconds}"); + Stopwatch timer = Stopwatch.StartNew(); + List selectedParagraphs = paragraphHelper.GetParagraphs(50); + timer.Stop(); + Console.WriteLine($"Prune time (ms): {timer.ElapsedMilliseconds}"); _displayableParagraphs.AddRange(selectedParagraphs); - _displayableParagraphs.AddRange(noCacheSelectedParagraphs); // TODO: Just assign to displayable paragraphs //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(visibleBuckets, maxDisplayableParagraphs, visibleParagraphsCount > maxDisplayableParagraphs)); diff --git a/src/ui/Logic/DisplayableSubtitleHelper.cs b/src/ui/Logic/DisplayableSubtitleHelper.cs index 08ce6151be..144de8fea6 100644 --- a/src/ui/Logic/DisplayableSubtitleHelper.cs +++ b/src/ui/Logic/DisplayableSubtitleHelper.cs @@ -3,11 +3,15 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Runtime.CompilerServices; +using System.Windows.Forms; namespace Nikse.SubtitleEdit.Logic { internal class DisplayableSubtitleHelper { + private const double VisibleSelectionRequirement = 0.5; + // Map associating a time stamp with a number of paragraphs that start at that time stamp. private readonly Dictionary _startParagraphCounts = new Dictionary(); // Map associating a time stamp with a number of paragraphs that end at that time stamp. @@ -23,23 +27,17 @@ internal class DisplayableSubtitleHelper private readonly double _startVisibleMilliseconds; private readonly double _endVisibleMilliseconds; - private bool _useCache; - - public DisplayableSubtitleHelper(double startMilliseconds, double endMilliseconds, double additionalSeconds, bool useCache) + public DisplayableSubtitleHelper(double startMilliseconds, double endMilliseconds, double additionalSeconds) { _startThresholdMilliseconds = startMilliseconds - additionalSeconds * 1000; _endThresholdMilliseconds = endMilliseconds + additionalSeconds * 1000; _startVisibleMilliseconds = startMilliseconds; _endVisibleMilliseconds = endMilliseconds; - _useCache = useCache; } public void Add(Paragraph p) { - // This should take in all paragraphs and figure out which are important enough to render. - // TODO: Filter out visible / invisible paragraphs. - if (IsInThreshold(p)) { AddStart(p.StartTime.TotalMilliseconds); @@ -76,21 +74,25 @@ private void AddEnd(double endMilliseconds) _endParagraphCounts[endMilliseconds] = endCount; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool IsVisible(Paragraph p) { return IsInRange(p, _startVisibleMilliseconds, _endVisibleMilliseconds); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool IsInThreshold(Paragraph p) { return IsInRange(p, _startThresholdMilliseconds, _endThresholdMilliseconds); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool IsInRange(Paragraph p, double start, double end) { return p.StartTime.TotalMilliseconds <= end && p.EndTime.TotalMilliseconds >= start; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ParagraphsOverlap(Paragraph p1, Paragraph p2) { return IsInRange(p1, p2.StartTime.TotalMilliseconds, p2.EndTime.TotalMilliseconds); @@ -127,50 +129,69 @@ private double CalculateVisibleDurationOfParagraph(Paragraph p) private double CalculateCoverageInRange(List currentCoverage, double startRange, double endRange) { - // TODO: This whole method is ugly, needs refactoring. - CoverageRecord startRecord = new CoverageRecord(startRange); - int startIndex = currentCoverage.BinarySearch(startRecord, new TimestampRecordComparer()); + if (currentCoverage.Count == 0) + { + // There are no coverage records, so by default the answer is 0. + // Prevents array out-of-bounds exceptions as well. + return 0; + } + + double previousTimestamp; + double previousNumberOfParagraphs; + double weightedCoverage = 0; - double previousTimestamp = startRange; - double previousNumberOfParagraphs = 0; + CoverageRecord startRecord = new CoverageRecord(startRange); + int startIndex = currentCoverage.BinarySearch(startRecord, new TimestampRecordComparer()); if (startIndex < 0) { - // Start of range has no record, need to add the overlap from the previous record. + // Start of range has no record, need to build the information from the record we would have found. startIndex = ~startIndex; if (startIndex > 0) { + // TODO: This has a bug. This portion should be calculating the leading coverage. The record at startIndex is skipped over + // and is calculated incorrectly. Also need to take into account that endRange may be before the record at startIndex. if (startIndex >= currentCoverage.Count) { // The start index comes after all paragraphs have ended, so there can't be any coverage. return 0; } - CoverageRecord r = currentCoverage[startIndex - 1]; - previousTimestamp = startRange; - previousNumberOfParagraphs = r.numberOfParagraphs; + // Any start record that would have existed at startIndex would have the same number of paragraphs + // as the previous record. + previousNumberOfParagraphs = currentCoverage[startIndex - 1].numberOfParagraphs; + if (endRange <= currentCoverage[startIndex].timestamp) + { + // The start and end both happen before the same record. Average coverage over the entire range is trivial. + return previousNumberOfParagraphs; + } + weightedCoverage = previousNumberOfParagraphs * (currentCoverage[startIndex].timestamp - startRange); } else { - // Start happened before the first record - there is no overlap to count. - startIndex = 0; - previousTimestamp = startRange; + // startIndex is 0. + // The start range is before the first record - there cannot be any paragraph coverage yet. previousNumberOfParagraphs = 0; } + // We are guaranteed to have at least one item in the array because of the checks above. + previousTimestamp = currentCoverage[startIndex].timestamp; } else { + // The start timestamp matches an existing record, so there is no leading coverage to calculate. if (startIndex >= currentCoverage.Count) { - // The start index comes after all paragraphs have ended, so there can't be any coverage. + // We can't combine with the above check because building the previous record data is + // very different depending on the value of startIndex. return 0; } + // Prepare for calculating coverage between existing recods. CoverageRecord previousRecord = currentCoverage[startIndex]; previousTimestamp = previousRecord.timestamp; previousNumberOfParagraphs = previousRecord.numberOfParagraphs; } - + if (startIndex < currentCoverage.Count - 1) { int currentIndex = startIndex + 1; @@ -195,39 +216,32 @@ private double CalculateCoverageInRange(List currentCoverage, do return weightedCoverage / (endRange - startRange); } - private Paragraph ChooseOneParagaph(double averageCoverage, double currentVisibleCoverage, int lowestCoverage, ICollection candidates, + private Paragraph ChooseOneParagaph(double averageCoverage, double currentVisibleCoverage, int lowestCoverage, List candidates, List currentCoverage, Dictionary coverageCache) { double minimumCoverage = double.MaxValue; - //int indexOfMinimum = -1; + int indexOfMinimum = -1; Paragraph bestParagraph = null; - //for(var i = 0; i < candidates.Count;i++) - foreach (Paragraph p in candidates) + for (var i = 0; i < candidates.Count; i++) { - //Paragraph p = candidates[i]; - double existingCoverage; - if (_useCache) + Paragraph p = candidates[i]; + bool paragraphVisible = IsVisible(p); + // Only consider visible paragraphs until a minimum portion of the visible area is covered. + if (currentVisibleCoverage > averageCoverage * VisibleSelectionRequirement || paragraphVisible) { + double existingCoverage; if (!coverageCache.TryGetValue(p, out existingCoverage)) { existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); coverageCache.Add(p, existingCoverage); } - } - else - { - existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); - } - if (existingCoverage < minimumCoverage) - { - if ((currentVisibleCoverage > averageCoverage / 2 && IsInThreshold(p)) || IsVisible(p)) + if (existingCoverage < minimumCoverage) { - // Prefer visible paragraphs until the visible range has at least reached the average coverage for the range minimumCoverage = existingCoverage; bestParagraph = p; - //indexOfMinimum = i; + indexOfMinimum = i; if (existingCoverage <= lowestCoverage) { break; @@ -237,8 +251,7 @@ private double CalculateCoverageInRange(List currentCoverage, do } if (bestParagraph != null) { - candidates.Remove(bestParagraph); - //candidates.RemoveAt(indexOfMinimum); + candidates.RemoveAt(indexOfMinimum); } return bestParagraph; @@ -332,7 +345,7 @@ private Paragraph FindLeastOverlap(SortedDictionary overlaps, List< return leastOverlappingParagraph; } - public List GetParagraphs(int limit, int invisibleLimit) + public List GetParagraphs(int limit) { //Console.WriteLine($"Getting {limit} paragraphs."); if (limit >= _paragraphs.Count) @@ -346,17 +359,15 @@ public List GetParagraphs(int limit, int invisibleLimit) double averageCoverage = CalculateAverageParagraphCoverage(); double currentVisibleCoverage = 0; List records = new List(); - // How to figure out what to remove from cache? // Ensure that longer paragraphs are preferred. _paragraphs.Sort(new ParagraphComparer()); - LinkedList candidates = new LinkedList(_paragraphs); int lowestCoverage = 0; while (result.Count < limit && _paragraphs.Count > 0) { - Paragraph selection = ChooseOneParagaph(averageCoverage,currentVisibleCoverage,lowestCoverage, candidates, records, coverageCache); + Paragraph selection = ChooseOneParagaph(averageCoverage, currentVisibleCoverage, lowestCoverage, _paragraphs, records, coverageCache); if (selection != null) { result.Add(selection); @@ -368,32 +379,35 @@ public List GetParagraphs(int limit, int invisibleLimit) //Console.WriteLine($"Paragraph selected, adding {coveragePercent} to current coverage. (for a total of {currentVisibleCoverage})"); lowestCoverage = FindLowestCoverage(records); } - if (_useCache) - { - invalidateCache(coverageCache, selection.StartTime.TotalMilliseconds, selection.EndTime.TotalMilliseconds); - } + InvalidateCacheForParagraph(coverageCache, selection); } } return result; } - private void invalidateCache(Dictionary coverageCache, double startRange, double endRange) + private void InvalidateCacheForParagraph(Dictionary coverageCache, Paragraph p) { - List keys = coverageCache.Keys.ToList(); - foreach (var key in keys) + List keysToRemove = new List(); + foreach (var key in coverageCache.Keys) { - if (IsInRange(key, startRange, endRange)) + if (ParagraphsOverlap(key, p)) { - coverageCache.Remove(key); + // Assume it is faster to save a few items from a longer list and remove them later than it is + // to copy the entire list up front. + keysToRemove.Add(key); } } + foreach (Paragraph key in keysToRemove) + { + coverageCache.Remove(key); + } } private void UpdateCoverageRecords(List records, Paragraph newParagraph) { int startIndex = CreateAndGetRecordIndex(records, newParagraph.StartTime.TotalMilliseconds); - int endIndex = CreateAndGetRecordIndex(records,newParagraph.EndTime.TotalMilliseconds); + int endIndex = CreateAndGetRecordIndex(records, newParagraph.EndTime.TotalMilliseconds); for (int i = startIndex; i < endIndex; i++) { records[i].numberOfParagraphs++; @@ -467,7 +481,8 @@ public int Compare(Paragraph x, Paragraph y) if (lengthComparison > 0) { return 1; - }else if (lengthComparison < 0) + } + else if (lengthComparison < 0) { return -1; } From cf5ba0e37568aa75519d104e68c9f20b535a023d Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Sat, 16 Sep 2023 22:14:07 -0400 Subject: [PATCH 06/11] Minor cache improvement and boundary condition checks. Instead of throwing away cache entries when adding new paragraphs, update them with the newly selected paragraph so they can be re-used. In testing, this improved selection speed by about 5-7 percent (but with a standard deviation of up to 19 percent so there is quite a bit of variation here, sometimes making performance worse). --- src/ui/Controls/AudioVisualizer.cs | 3 +- src/ui/Logic/DisplayableSubtitleHelper.cs | 94 +++++++++++------------ 2 files changed, 45 insertions(+), 52 deletions(-) diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index 096540405d..230f528a87 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -455,12 +455,13 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie paragraphHelper.Add(p); } Stopwatch timer = Stopwatch.StartNew(); - List selectedParagraphs = paragraphHelper.GetParagraphs(50); + List selectedParagraphs = paragraphHelper.GetParagraphs(100); timer.Stop(); Console.WriteLine($"Prune time (ms): {timer.ElapsedMilliseconds}"); _displayableParagraphs.AddRange(selectedParagraphs); + //_displayableParagraphs.AddRange(updatingParagraphs); // TODO: Just assign to displayable paragraphs //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(visibleBuckets, maxDisplayableParagraphs, visibleParagraphsCount > maxDisplayableParagraphs)); diff --git a/src/ui/Logic/DisplayableSubtitleHelper.cs b/src/ui/Logic/DisplayableSubtitleHelper.cs index 144de8fea6..312c295ba2 100644 --- a/src/ui/Logic/DisplayableSubtitleHelper.cs +++ b/src/ui/Logic/DisplayableSubtitleHelper.cs @@ -1,4 +1,5 @@ using Nikse.SubtitleEdit.Core.Common; +using Nikse.SubtitleEdit.Core.NetflixQualityCheck; using System; using System.Collections.Generic; using System.Diagnostics; @@ -12,15 +13,8 @@ internal class DisplayableSubtitleHelper { private const double VisibleSelectionRequirement = 0.5; - // Map associating a time stamp with a number of paragraphs that start at that time stamp. - private readonly Dictionary _startParagraphCounts = new Dictionary(); - // Map associating a time stamp with a number of paragraphs that end at that time stamp. - private readonly Dictionary _endParagraphCounts = new Dictionary(); - - private readonly List _paragraphs = new List(); - private readonly double _startThresholdMilliseconds; private readonly double _endThresholdMilliseconds; @@ -40,40 +34,10 @@ public void Add(Paragraph p) { if (IsInThreshold(p)) { - AddStart(p.StartTime.TotalMilliseconds); - AddEnd(p.EndTime.TotalMilliseconds); _paragraphs.Add(p); } } - private void AddStart(double startMilliseconds) - { - if (_startParagraphCounts.TryGetValue(startMilliseconds, out int startCount)) - { - startCount++; - } - else - { - startCount = 1; - } - - _startParagraphCounts[startMilliseconds] = startCount; - } - - private void AddEnd(double endMilliseconds) - { - if (_endParagraphCounts.TryGetValue(endMilliseconds, out int endCount)) - { - endCount++; - } - else - { - endCount = 1; - } - - _endParagraphCounts[endMilliseconds] = endCount; - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool IsVisible(Paragraph p) { @@ -150,8 +114,6 @@ private double CalculateCoverageInRange(List currentCoverage, do startIndex = ~startIndex; if (startIndex > 0) { - // TODO: This has a bug. This portion should be calculating the leading coverage. The record at startIndex is skipped over - // and is calculated incorrectly. Also need to take into account that endRange may be before the record at startIndex. if (startIndex >= currentCoverage.Count) { // The start index comes after all paragraphs have ended, so there can't be any coverage. @@ -166,6 +128,7 @@ private double CalculateCoverageInRange(List currentCoverage, do return previousNumberOfParagraphs; } weightedCoverage = previousNumberOfParagraphs * (currentCoverage[startIndex].timestamp - startRange); + previousNumberOfParagraphs = currentCoverage[startIndex].numberOfParagraphs; } else { @@ -195,7 +158,7 @@ private double CalculateCoverageInRange(List currentCoverage, do if (startIndex < currentCoverage.Count - 1) { int currentIndex = startIndex + 1; - while (currentIndex < currentCoverage.Count && currentCoverage[currentIndex].timestamp < endRange) + while (currentIndex < currentCoverage.Count && currentCoverage[currentIndex].timestamp <= endRange) { CoverageRecord currentRecord = currentCoverage[currentIndex]; weightedCoverage += previousNumberOfParagraphs * (currentRecord.timestamp - previousTimestamp); @@ -219,6 +182,10 @@ private double CalculateCoverageInRange(List currentCoverage, do private Paragraph ChooseOneParagaph(double averageCoverage, double currentVisibleCoverage, int lowestCoverage, List candidates, List currentCoverage, Dictionary coverageCache) { + if (candidates.Count == 0) + { + return null; + } double minimumCoverage = double.MaxValue; int indexOfMinimum = -1; @@ -231,8 +198,7 @@ private double CalculateCoverageInRange(List currentCoverage, do // Only consider visible paragraphs until a minimum portion of the visible area is covered. if (currentVisibleCoverage > averageCoverage * VisibleSelectionRequirement || paragraphVisible) { - double existingCoverage; - if (!coverageCache.TryGetValue(p, out existingCoverage)) + if (!coverageCache.TryGetValue(p, out double existingCoverage)) { existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); coverageCache.Add(p, existingCoverage); @@ -240,8 +206,8 @@ private double CalculateCoverageInRange(List currentCoverage, do if (existingCoverage < minimumCoverage) { minimumCoverage = existingCoverage; - bestParagraph = p; indexOfMinimum = i; + bestParagraph = p; if (existingCoverage <= lowestCoverage) { break; @@ -249,10 +215,12 @@ private double CalculateCoverageInRange(List currentCoverage, do } } } - if (bestParagraph != null) + if (bestParagraph == null) { - candidates.RemoveAt(indexOfMinimum); + bestParagraph = candidates[0]; + indexOfMinimum = 0; } + candidates.RemoveAt(indexOfMinimum); return bestParagraph; } @@ -379,31 +347,43 @@ public List GetParagraphs(int limit) //Console.WriteLine($"Paragraph selected, adding {coveragePercent} to current coverage. (for a total of {currentVisibleCoverage})"); lowestCoverage = FindLowestCoverage(records); } - InvalidateCacheForParagraph(coverageCache, selection); + if (result.Count < limit) + { + UpdateCacheForParagraph(coverageCache, selection); + } } } return result; } - private void InvalidateCacheForParagraph(Dictionary coverageCache, Paragraph p) + private void UpdateCacheForParagraph(Dictionary coverageCache, Paragraph p) { - List keysToRemove = new List(); + coverageCache.Remove(p); + List keysToUpdate = new List(); foreach (var key in coverageCache.Keys) { if (ParagraphsOverlap(key, p)) { // Assume it is faster to save a few items from a longer list and remove them later than it is // to copy the entire list up front. - keysToRemove.Add(key); + keysToUpdate.Add(key); } } - foreach (Paragraph key in keysToRemove) + foreach (Paragraph key in keysToUpdate) { - coverageCache.Remove(key); + double overlapMillis = CalculateOverlapLength(key, p); + coverageCache[key] += overlapMillis / key.DurationTotalMilliseconds; } } + private double CalculateOverlapLength(Paragraph p1, Paragraph p2) + { + double overlapStart = Math.Max(p1.StartTime.TotalMilliseconds , p2.StartTime.TotalMilliseconds); + double overlapEnd = Math.Min(p1.EndTime.TotalMilliseconds, p2.EndTime.TotalMilliseconds); + return overlapEnd - overlapStart; + } + private void UpdateCoverageRecords(List records, Paragraph newParagraph) { int startIndex = CreateAndGetRecordIndex(records, newParagraph.StartTime.TotalMilliseconds); @@ -439,6 +419,13 @@ private int CreateAndGetRecordIndex(List records, double timesta if (recordIndex < 0) { recordIndex = ~recordIndex; + + if(recordIndex > 0) + { + // Carry over the overlap from the previous item to keep layers correct. + newRecord.numberOfParagraphs = records[recordIndex - 1].numberOfParagraphs; + } + records.Insert(recordIndex, newRecord); } @@ -457,6 +444,11 @@ public CoverageRecord(double timestamp) this.timestamp = timestamp; } + public override string ToString() + { + return $"Record - {timestamp} millis / {numberOfParagraphs} paragraphs"; + } + } private class TimestampRecordComparer : IComparer From 0542a3eb8e5cfa301b1e772ef16de37216e2d237 Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Sun, 17 Sep 2023 14:31:46 -0400 Subject: [PATCH 07/11] Documentation in progress, small miscellaneous refactoring. --- src/ui/Controls/AudioVisualizer.cs | 2 +- ...elper.cs => DisplayableParagraphHelper.cs} | 326 +++++++++--------- src/ui/SubtitleEdit.csproj | 2 +- 3 files changed, 168 insertions(+), 162 deletions(-) rename src/ui/Logic/{DisplayableSubtitleHelper.cs => DisplayableParagraphHelper.cs} (56%) diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index 230f528a87..2583810330 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -441,7 +441,7 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie Dictionary> visibleBuckets = new Dictionary>(); Dictionary> invisibleBuckets = new Dictionary>(); - DisplayableSubtitleHelper paragraphHelper = new DisplayableSubtitleHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15); + DisplayableParagraphHelper paragraphHelper = new DisplayableParagraphHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15 * TimeCode.BaseUnit); for (var i = 0; i < subtitle.Paragraphs.Count; i++) { diff --git a/src/ui/Logic/DisplayableSubtitleHelper.cs b/src/ui/Logic/DisplayableParagraphHelper.cs similarity index 56% rename from src/ui/Logic/DisplayableSubtitleHelper.cs rename to src/ui/Logic/DisplayableParagraphHelper.cs index 312c295ba2..7764d24503 100644 --- a/src/ui/Logic/DisplayableSubtitleHelper.cs +++ b/src/ui/Logic/DisplayableParagraphHelper.cs @@ -1,5 +1,4 @@ using Nikse.SubtitleEdit.Core.Common; -using Nikse.SubtitleEdit.Core.NetflixQualityCheck; using System; using System.Collections.Generic; using System.Diagnostics; @@ -9,27 +8,83 @@ namespace Nikse.SubtitleEdit.Logic { - internal class DisplayableSubtitleHelper + /** + * + * + * A class that helps determine which paragraphs should be displayed when there may be too many to + * efficiently render on the timeline at the same time. + * + * + * It assumes that: + * + * It is good to select paragraphs slightly outside the visible area so that there is something to see while scrolling, + * but the timeline will be stationary most of the time, so it is better to select paragraphs that are currently visible. + * It is more useful to have paragraphs that cover a large area of the timeline rather than a small area with 10 paragraphs layered on top of each other. + * There are situtations where paragraphs may overlap, but it is useful to see most or all of them + * (such as dialogue shown at the same time as a paragraph shown next to text in the video). + * More predictable behavior is better - pruning a large paragraph is more noticeable than pruning a small one. + * + * + * + * Therefore, this class aims to maximize the amount of coverage of the timeline. + * Non-overlapping paragraphs are preferred first to prevent a stack of overlapping paragraphs with a large blank space. Paragraphs outside the visible area are only + * choosen once enough visible paragraphs have been chosen, to prevent a blank timeline. This class may select paragraphs that are very close together, if all other preferred + * paragraphs have been chosen already. + * + * + */ + internal class DisplayableParagraphHelper { + /// + /// The percentage of the visible timeline that must be covered by paragraphs before a paragraph outside the visible area may be chosen. + /// + /// Note that this is cumulative: two paragraphs stacked on top of each other count exactly the same as the same two paragraphs with no overlap. + /// + /// private const double VisibleSelectionRequirement = 0.5; + /// + /// Paragraphs that may be chosen when requested later. + /// private readonly List _paragraphs = new List(); + /// + /// The beginning of the invisible area that paragraphs may be chosen from to improve scrolling. + /// private readonly double _startThresholdMilliseconds; + /// + /// The end of the invisible area. + /// private readonly double _endThresholdMilliseconds; + /// + /// The beginning of the visible area of the timeline. + /// private readonly double _startVisibleMilliseconds; + /// + /// The end of the visible area of the timeline. + /// private readonly double _endVisibleMilliseconds; - public DisplayableSubtitleHelper(double startMilliseconds, double endMilliseconds, double additionalSeconds) + /// + /// Creates a new displayable paragraph helper that will choose paragraphs between the start and end time. + /// + /// The start of the visible area of the timeline in milliseconds. + /// The end of the visible area of the timeline in milliseconds. + /// Additional time outside of the visible area to include, to improve rendering while scrolling. + public DisplayableParagraphHelper(double startMilliseconds, double endMilliseconds, double additionalMilliseconds) { - _startThresholdMilliseconds = startMilliseconds - additionalSeconds * 1000; - _endThresholdMilliseconds = endMilliseconds + additionalSeconds * 1000; + _startThresholdMilliseconds = startMilliseconds - additionalMilliseconds; + _endThresholdMilliseconds = endMilliseconds + additionalMilliseconds; _startVisibleMilliseconds = startMilliseconds; _endVisibleMilliseconds = endMilliseconds; } + /// + /// Adds a paragraph to the pool of available paragraphs the helper will choose from. + /// + /// public void Add(Paragraph p) { if (IsInThreshold(p)) @@ -38,60 +93,102 @@ public void Add(Paragraph p) } } + /// + /// Determines whether the paragraph is in the visible area. + /// + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool IsVisible(Paragraph p) { return IsInRange(p, _startVisibleMilliseconds, _endVisibleMilliseconds); } + /// + /// Determines whether the paragraph is visible in the area just outside the visible area. + /// Note that a paragraph that passes this test may also be in the visible area, so + /// !IsInThreshold(p) is not necessarily the same as IsVisible(p). + /// + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool IsInThreshold(Paragraph p) { return IsInRange(p, _startThresholdMilliseconds, _endThresholdMilliseconds); } + /// + /// Determines whether any portion of a paragraph is within the start and end range. + /// + /// + /// Start time of range, in milliseconds. + /// End time of range, in milliseconds. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool IsInRange(Paragraph p, double start, double end) + private bool IsInRange(Paragraph p, double startMilliseconds, double endMilliseconds) { - return p.StartTime.TotalMilliseconds <= end && p.EndTime.TotalMilliseconds >= start; + return p.StartTime.TotalMilliseconds <= endMilliseconds && p.EndTime.TotalMilliseconds >= startMilliseconds; } + /// + /// Determines whether two paragraphs overlap. + /// + /// + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ParagraphsOverlap(Paragraph p1, Paragraph p2) { return IsInRange(p1, p2.StartTime.TotalMilliseconds, p2.EndTime.TotalMilliseconds); } + /** + * Calculates the average number of layers of paragraphs in the visible portion of the timeline. + * Two paragraphs covering the left half of the timeline count the same as a single paragraph covering the whole time. + * This has the benefit of being easier to calculate, and allows coverage percentage to be built up over time (when selecting paragraphs). + */ private double CalculateAverageParagraphCoverage() { - // Average coverage is average number of layers of paragraphs at any moment of the visible timeline. - // A single paragraph covering the entire visible timeline is equivalent to two layers of paragraphs - // covering one half of the timeline with no paragraphs covering the other half. double average = 0; - - int numberOfVisibleParagraphs = 0; - foreach (Paragraph p in _paragraphs) { if (IsVisible(p) && p.DurationTotalMilliseconds > 0) { - numberOfVisibleParagraphs++; - average += CalculateVisibleDurationOfParagraph(p); + average += CalculateVisiblePercentOfTimeline(p); } } - //Console.WriteLine($"Found {numberOfVisibleParagraphs} visible paragraphs with total {average} coverage of timeline."); - return average; } - private double CalculateVisibleDurationOfParagraph(Paragraph p) + /// + /// Calculates the percent of the visible timeline that a paragraph is visible. + /// If the paragraph is partially invisible, parts outside the visible area are not considered. + /// + /// + /// + private double CalculateVisiblePercentOfTimeline(Paragraph p) { double startClamped = Math.Max(p.StartTime.TotalMilliseconds, _startVisibleMilliseconds); double endClamped = Math.Min(p.EndTime.TotalMilliseconds, _endVisibleMilliseconds); return (endClamped - startClamped) / (_endVisibleMilliseconds - _startVisibleMilliseconds); } - private double CalculateCoverageInRange(List currentCoverage, double startRange, double endRange) + /// + /// Calculates the amount of paragraph coverage in the given range. + /// + /// Amount of coverage is defined as the number of layers of paragraphs times the duration of the overlap, divided by + /// the duration of the paragraph. + /// This is computed piecewise - a time range that contains a single paragraph for 1 millisecond and no paragraphs + /// for the other 1000 milliseconds will be very close to 0. + /// A time range half covered with one paragraph and half covered with two paragraphs will have a coverage of 1.5. + /// + /// + /// + /// An ordered list indicating how many paragraphs are visible at each location on the timeline. + /// + /// + /// The average amount of paragraph coverage in the given range. This is a floating point number greater than 0. + private double CalculateCoverageInRange(List currentCoverage, double startMillis, double endMillis) { if (currentCoverage.Count == 0) @@ -104,9 +201,10 @@ private double CalculateCoverageInRange(List currentCoverage, do double previousTimestamp; double previousNumberOfParagraphs; + // The sum of number of paragraphs times the duration of the overlap. double weightedCoverage = 0; - CoverageRecord startRecord = new CoverageRecord(startRange); + CoverageRecord startRecord = new CoverageRecord(startMillis); int startIndex = currentCoverage.BinarySearch(startRecord, new TimestampRecordComparer()); if (startIndex < 0) { @@ -122,12 +220,12 @@ private double CalculateCoverageInRange(List currentCoverage, do // Any start record that would have existed at startIndex would have the same number of paragraphs // as the previous record. previousNumberOfParagraphs = currentCoverage[startIndex - 1].numberOfParagraphs; - if (endRange <= currentCoverage[startIndex].timestamp) + if (endMillis <= currentCoverage[startIndex].timestamp) { // The start and end both happen before the same record. Average coverage over the entire range is trivial. return previousNumberOfParagraphs; } - weightedCoverage = previousNumberOfParagraphs * (currentCoverage[startIndex].timestamp - startRange); + weightedCoverage = previousNumberOfParagraphs * (currentCoverage[startIndex].timestamp - startMillis); previousNumberOfParagraphs = currentCoverage[startIndex].numberOfParagraphs; } else @@ -148,7 +246,6 @@ private double CalculateCoverageInRange(List currentCoverage, do // very different depending on the value of startIndex. return 0; } - // Prepare for calculating coverage between existing recods. CoverageRecord previousRecord = currentCoverage[startIndex]; previousTimestamp = previousRecord.timestamp; previousNumberOfParagraphs = previousRecord.numberOfParagraphs; @@ -158,7 +255,7 @@ private double CalculateCoverageInRange(List currentCoverage, do if (startIndex < currentCoverage.Count - 1) { int currentIndex = startIndex + 1; - while (currentIndex < currentCoverage.Count && currentCoverage[currentIndex].timestamp <= endRange) + while (currentIndex < currentCoverage.Count && currentCoverage[currentIndex].timestamp <= endMillis) { CoverageRecord currentRecord = currentCoverage[currentIndex]; weightedCoverage += previousNumberOfParagraphs * (currentRecord.timestamp - previousTimestamp); @@ -169,27 +266,32 @@ private double CalculateCoverageInRange(List currentCoverage, do } } - if (previousTimestamp != endRange) + if (previousTimestamp != endMillis) { // There was no record exactly matching the end range, so there was a little bit left over. - // It is also possible that no start record matched either, so this is calculating the time between startRange and endRange. - weightedCoverage += previousNumberOfParagraphs * (endRange - previousTimestamp); + weightedCoverage += previousNumberOfParagraphs * (endMillis - previousTimestamp); } - return weightedCoverage / (endRange - startRange); + return weightedCoverage / (endMillis - startMillis); } - private Paragraph ChooseOneParagaph(double averageCoverage, double currentVisibleCoverage, int lowestCoverage, List candidates, - List currentCoverage, Dictionary coverageCache) + /// + /// Chooses the best paragraph from the list of candidate paragraphs. + /// + /// The total coverage of the visible timeline, to prefer visible paragraphs + /// until the timeline has enough coverage. + /// The current coverage of the visible timeline. If this is high enough, + /// invisible paragraphs may be chosen. + /// The minimum amount of coverage in the + /// + /// + /// + /// + private int GetBestParagraphIndex(double averageCoverage, double currentVisibleCoverage, double lowestCoverage, + List candidates, List currentCoverage, Dictionary coverageCache) { - if (candidates.Count == 0) - { - return null; - } - double minimumCoverage = double.MaxValue; int indexOfMinimum = -1; - Paragraph bestParagraph = null; for (var i = 0; i < candidates.Count; i++) { @@ -207,110 +309,15 @@ private double CalculateCoverageInRange(List currentCoverage, do { minimumCoverage = existingCoverage; indexOfMinimum = i; - bestParagraph = p; if (existingCoverage <= lowestCoverage) { - break; + return indexOfMinimum; } } } } - if (bestParagraph == null) - { - bestParagraph = candidates[0]; - indexOfMinimum = 0; - } - candidates.RemoveAt(indexOfMinimum); - return bestParagraph; - } - - private Paragraph FindLeastOverlap(SortedDictionary overlaps, List paragraphs) - { - double lowestAverageOverlap = double.MaxValue; - Paragraph leastOverlappingParagraph = null; - - /* - * This algorithm tries to maximize the percentage of the timeline that is covered with paragraphs, so it will tend to cover everything as evenly as possible. - * Algorithm: - * 1. Select a candidate paragraph - * 2. Calculate average length of time this paragraph overlaps with already selected paragraphs. - * 3. If the number of overlaps is equal to the minimum overlap on the whole timeline, return early (this indicates the paragraph has minimum overlap and is a good candidate). - * 4. If not, try the next paragraph - * 5. Any time a paragraph is found, update the current overlaps with the new paragraph - * - * As the paragraph list is sorted with longest paragraph first, this ensures it chooses longest paragraphs first, with each new paragraph overlapping as few others as possible. - * This avoids situations where the displayed paragraphs: - * - Always start and end at the same time (stacked many layers deep). - * - Leave gaps between non-overlapping paragraphs (a problem with buckets and wrong bucket size) - * - Tend to become pruned unpredictably, especially when scrolling the timeline: any paragraphs of equal duration are sorted first-to-last - * - * The only drawback is that to the algorithm, there is no difference in priority between a paragraph that starts and ends at the same time as another paragraph vs. - * one that "straddles" two other paragraphs (assuming the two paragraphs have no gap between them). - * - * The only solution is to hope that paragraphs don't start and end at the exact same time... or to add a condition that starting or ending at the same time as - * another paragraph is less preferred. This should only take effect if there is a tie for least overlap, otherwise it may select a paragraph with no common - * start/end times but creating more overlaps. - * - */ - - /* - * Desired algorithm: - * - Compute average number of subtitles covering each second of the visible timeline (sum all paragraph time within the visible range, divide by visible time) - * - Sort all paragraphs by length - * - Select N paragraphs, choose a candidate, starting at the longest paragraph. For each candidate: - * - Compute average number of subtitles already existing for the length of the candidate. (number of subtitles * duration of overlap / duration of candidate) - * - Candidate must be in the visible range, unless the current amount of subtitle coverage is greater than or equal to the total average paragraph coverage generated in step 1. - * - Candidate with the lowest average overlap wins - * - If there is a tie, the candidate with fewest shared start and end time wins (i.e. prefer to not choose stacked subtitles). - * - After choosing a candidate, update the current paragraph coverage. - * - * Benefits: - * - No explicit choice of visible vs. invisible paragraphs - * - Allows choosing more invisible paragraphs when the visible range is exhausted (without risking setting too high of a limit on visible or invisible paragraphs) - * - Will only choose visible paragraphs until enough of them have been selected to cover the visible range, then allows invisible ones to be chosen if they are the best candidate - * - No special treatment or logic for invisible paragraphs (aside from checking that they are visible) - */ - - foreach (Paragraph p in paragraphs) - { - - double start = p.StartTime.TotalMilliseconds; - double end = p.EndTime.TotalMilliseconds; - - // These are guaranteed to exist because all paragraphs have been added to start / end Dictionaries. - int startOverlap = overlaps[start]; - int endOverlap = overlaps[end]; - - SortedDictionary.KeyCollection keys = overlaps.Keys; - List keyList = keys.ToList(); - - int startIndex = keyList.IndexOf(start); - int endIndex = keyList.IndexOf(end); - - double previousTime = start; - double previousOverlaps = overlaps[start]; - double averageOverlap = 0; - - for (int overlapIndex = startIndex + 1; overlapIndex <= endIndex; overlapIndex++) - { - double currentTime = keyList[overlapIndex]; - int currentOverlaps = overlaps[currentTime]; - double timeDelta = currentTime - previousTime; - averageOverlap += previousOverlaps / timeDelta; - - previousOverlaps = currentOverlaps; - previousTime = currentTime; - } - - if (averageOverlap < lowestAverageOverlap) - { - lowestAverageOverlap = averageOverlap; - leastOverlappingParagraph = p; - } - } - - return leastOverlappingParagraph; + return indexOfMinimum; } public List GetParagraphs(int limit) @@ -326,26 +333,30 @@ public List GetParagraphs(int limit) double averageCoverage = CalculateAverageParagraphCoverage(); double currentVisibleCoverage = 0; - List records = new List(); + List records = new List(limit * 2); // Ensure that longer paragraphs are preferred. _paragraphs.Sort(new ParagraphComparer()); - int lowestCoverage = 0; + double lowestCoverage = 0; while (result.Count < limit && _paragraphs.Count > 0) { - Paragraph selection = ChooseOneParagaph(averageCoverage, currentVisibleCoverage, lowestCoverage, _paragraphs, records, coverageCache); - if (selection != null) + int bestParagraphIndex = GetBestParagraphIndex(averageCoverage, currentVisibleCoverage, lowestCoverage, _paragraphs, records, coverageCache); + if (bestParagraphIndex != -1) { + Paragraph selection = _paragraphs[bestParagraphIndex]; + _paragraphs.RemoveAt(bestParagraphIndex); + lowestCoverage = coverageCache[selection]; + result.Add(selection); UpdateCoverageRecords(records, selection); if (IsVisible(selection)) { - double coveragePercent = CalculateVisibleDurationOfParagraph(selection); + double coveragePercent = CalculateVisiblePercentOfTimeline(selection); currentVisibleCoverage += coveragePercent; //Console.WriteLine($"Paragraph selected, adding {coveragePercent} to current coverage. (for a total of {currentVisibleCoverage})"); - lowestCoverage = FindLowestCoverage(records); + //lowestCoverage = 0; // FindLowestCoverage(records); } if (result.Count < limit) { @@ -357,17 +368,29 @@ public List GetParagraphs(int limit) return result; } + /// + /// Given a paragraph that has just been selected, find cached coverage values for paragraphs that overlap with the + /// new paragraph and adjust the value. + /// + /// This is an O(1) operation per cache value, versus an O(n log n) operation to calculate from scratch. + /// However, updating the cache requires scanning through all cached paragraphs (worst case is equal to + /// the number of paragraphs being loaded), while calculating from scratch only requires a binary search + /// followed by a linear search through all paragraphs selected so far. If the cache is really full, updating + /// it can be pretty slow. + /// + /// + /// + /// private void UpdateCacheForParagraph(Dictionary coverageCache, Paragraph p) { + // The paragraph has already been selected, so we no longer need its cache entry. coverageCache.Remove(p); List keysToUpdate = new List(); - foreach (var key in coverageCache.Keys) + foreach (Paragraph key in coverageCache.Keys) { if (ParagraphsOverlap(key, p)) { - // Assume it is faster to save a few items from a longer list and remove them later than it is - // to copy the entire list up front. - keysToUpdate.Add(key); + keysToUpdate.Add(key); } } foreach (Paragraph key in keysToUpdate) @@ -394,23 +417,6 @@ private void UpdateCoverageRecords(List records, Paragraph newPa } } - private int FindLowestCoverage(List records) - { - int min = int.MaxValue; - foreach (CoverageRecord record in records) - { - if (record.numberOfParagraphs < min) - { - min = record.numberOfParagraphs; - if (min == 0) - { - return 0; - } - } - } - return min; - } - private int CreateAndGetRecordIndex(List records, double timestamp) { CoverageRecord newRecord = new CoverageRecord(timestamp); diff --git a/src/ui/SubtitleEdit.csproj b/src/ui/SubtitleEdit.csproj index 43435fa1fc..186a354a57 100644 --- a/src/ui/SubtitleEdit.csproj +++ b/src/ui/SubtitleEdit.csproj @@ -1483,7 +1483,7 @@ - + From 7bbe665e3d65da7394bcf0453fdd2995e5d9463f Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Sun, 17 Sep 2023 21:45:15 -0400 Subject: [PATCH 08/11] Improve paragraph pruning by grouping paragraphs into partitions. This reduces the seek time when updating cache entries and can take the average prune time from around 45 ms to 29 or 36 ms on a debug build (depending on the number of partitions). --- src/ui/Logic/DisplayableParagraphHelper.cs | 132 +++++++++++++++++++-- 1 file changed, 125 insertions(+), 7 deletions(-) diff --git a/src/ui/Logic/DisplayableParagraphHelper.cs b/src/ui/Logic/DisplayableParagraphHelper.cs index 7764d24503..247dd35741 100644 --- a/src/ui/Logic/DisplayableParagraphHelper.cs +++ b/src/ui/Logic/DisplayableParagraphHelper.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Linq; using System.Runtime.CompilerServices; +using System.Security.Cryptography.X509Certificates; using System.Windows.Forms; namespace Nikse.SubtitleEdit.Logic @@ -48,6 +49,8 @@ internal class DisplayableParagraphHelper /// private readonly List _paragraphs = new List(); + private TimelinePartition paragraphPartition; + /// /// The beginning of the invisible area that paragraphs may be chosen from to improve scrolling. /// @@ -79,6 +82,8 @@ public DisplayableParagraphHelper(double startMilliseconds, double endMillisecon _startVisibleMilliseconds = startMilliseconds; _endVisibleMilliseconds = endMilliseconds; + + paragraphPartition = new TimelinePartition(_startThresholdMilliseconds, _endThresholdMilliseconds, 100); } /// @@ -90,6 +95,7 @@ public void Add(Paragraph p) if (IsInThreshold(p)) { _paragraphs.Add(p); + paragraphPartition.Add(p); } } @@ -251,7 +257,7 @@ private double CalculateCoverageInRange(List currentCoverage, do previousNumberOfParagraphs = previousRecord.numberOfParagraphs; } - + if (startIndex < currentCoverage.Count - 1) { int currentIndex = startIndex + 1; @@ -386,23 +392,41 @@ private void UpdateCacheForParagraph(Dictionary coverageCache // The paragraph has already been selected, so we no longer need its cache entry. coverageCache.Remove(p); List keysToUpdate = new List(); - foreach (Paragraph key in coverageCache.Keys) + HashSet partitionedParagraphs = paragraphPartition.GetPartitionedParagraphs(p); + partitionedParagraphs.Remove(p); + //partitionedParagraphs.IntersectWith(coverageCache.Keys); + + foreach (Paragraph key in partitionedParagraphs) { if (ParagraphsOverlap(key, p)) { - keysToUpdate.Add(key); + keysToUpdate.Add(key); } } + + //List realKeysToUpdate = new List(); + //foreach (Paragraph key in coverageCache.Keys) + //{ + // if (ParagraphsOverlap(key, p)) + // { + // realKeysToUpdate.Add(key); + // } + //} + foreach (Paragraph key in keysToUpdate) { - double overlapMillis = CalculateOverlapLength(key, p); - coverageCache[key] += overlapMillis / key.DurationTotalMilliseconds; + if (coverageCache.TryGetValue(key, out double coverage)) + { + double overlapMillis = CalculateOverlapLength(key, p); + coverageCache[key] = coverage + overlapMillis / key.DurationTotalMilliseconds; + } } } + private double CalculateOverlapLength(Paragraph p1, Paragraph p2) { - double overlapStart = Math.Max(p1.StartTime.TotalMilliseconds , p2.StartTime.TotalMilliseconds); + double overlapStart = Math.Max(p1.StartTime.TotalMilliseconds, p2.StartTime.TotalMilliseconds); double overlapEnd = Math.Min(p1.EndTime.TotalMilliseconds, p2.EndTime.TotalMilliseconds); return overlapEnd - overlapStart; } @@ -426,7 +450,7 @@ private int CreateAndGetRecordIndex(List records, double timesta { recordIndex = ~recordIndex; - if(recordIndex > 0) + if (recordIndex > 0) { // Carry over the overlap from the previous item to keep layers correct. newRecord.numberOfParagraphs = records[recordIndex - 1].numberOfParagraphs; @@ -490,6 +514,100 @@ public int Compare(Paragraph x, Paragraph y) } } + private class TimelinePartition + { + + private double _startMillis; + private double _endMillis; + private int _partitionCount; + + private HashSet[] _partitions; + + public TimelinePartition(double startMillis, double endMillis, int partitionCount) + { + _startMillis = startMillis; + _endMillis = endMillis; + _partitionCount = partitionCount; + + _partitions = new HashSet[_partitionCount]; + } + + public void Add(Paragraph p) + { + PartitionRange insertRange = GetPartitionRange(p); + for (var i = insertRange.StartIndex; i <= insertRange.EndIndex; i++) + { + if (_partitions[i] == null) + { + _partitions[i] = new HashSet(); + } + _partitions[i].Add(p); + } + } + + public HashSet GetPartitionedParagraphs(Paragraph p) + { + PartitionRange range = GetPartitionRange(p); + HashSet result = new HashSet(); + for (var i = range.StartIndex; i < range.EndIndex; i++) + { + HashSet partition = _partitions[i]; + if (partition != null) + { + result.UnionWith(partition); + } + } + return result; + } + + private int GetPartitionNumber(double timestampMillis, bool roundUp) + { + double timeSpan = _endMillis - _startMillis; + double partitionWidth = timeSpan / _partitionCount; + double partitionNumberFraction = (timestampMillis - _startMillis) / partitionWidth; + int partitionNumber; + if (roundUp) + { + partitionNumber = (int)Math.Ceiling(partitionNumberFraction); + } + else + { + partitionNumber = (int)Math.Floor(partitionNumberFraction); + } + + if (partitionNumber < 0) + { + partitionNumber = 0; + } + else if (partitionNumber >= _partitionCount) + { + partitionNumber = _partitionCount - 1; + } + + return partitionNumber; + } + + public PartitionRange GetPartitionRange(Paragraph p) + { + int startPartition = GetPartitionNumber(p.StartTime.TotalMilliseconds, false); + int endPartition = GetPartitionNumber(p.EndTime.TotalMilliseconds, true); + return new PartitionRange(startPartition, endPartition); + } + + + public class PartitionRange + { + public PartitionRange(int startIndex, int endIndex) + { + StartIndex = startIndex; + EndIndex = endIndex; + } + public int StartIndex { get; } + public int EndIndex { get; } + } + + } + } } From b7a44c9b382bf11ef1be9894139af862bd6bad12 Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Sun, 17 Sep 2023 22:25:13 -0400 Subject: [PATCH 09/11] Clean up old or unused code in displayable paragraph helper / related portion of audio visualizer. --- src/ui/Controls/AudioVisualizer.cs | 65 +--------------------- src/ui/Logic/DisplayableParagraphHelper.cs | 53 +++++------------- 2 files changed, 17 insertions(+), 101 deletions(-) diff --git a/src/ui/Controls/AudioVisualizer.cs b/src/ui/Controls/AudioVisualizer.cs index 2583810330..aebfb5fb34 100644 --- a/src/ui/Controls/AudioVisualizer.cs +++ b/src/ui/Controls/AudioVisualizer.cs @@ -429,19 +429,11 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie return; } - const int maxDisplayableParagraphs = 100; - const double additionalSeconds = 15.0; // Helps when scrolling - var startThresholdMilliseconds = (_startPositionSeconds - additionalSeconds) * TimeCode.BaseUnit; - var endThresholdMilliseconds = (EndPositionSeconds + additionalSeconds) * TimeCode.BaseUnit; - double startVisibleMilliseconds = _startPositionSeconds * TimeCode.BaseUnit; double endVisibleMilliseconds = EndPositionSeconds * TimeCode.BaseUnit; - List displayableParagraphs = new List(); - Dictionary> visibleBuckets = new Dictionary>(); - Dictionary> invisibleBuckets = new Dictionary>(); - - DisplayableParagraphHelper paragraphHelper = new DisplayableParagraphHelper(startVisibleMilliseconds, endVisibleMilliseconds, 15 * TimeCode.BaseUnit); + DisplayableParagraphHelper paragraphHelper = new DisplayableParagraphHelper( + startVisibleMilliseconds, endVisibleMilliseconds, 15 * TimeCode.BaseUnit); for (var i = 0; i < subtitle.Paragraphs.Count; i++) { @@ -454,19 +446,8 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie paragraphHelper.Add(p); } - Stopwatch timer = Stopwatch.StartNew(); List selectedParagraphs = paragraphHelper.GetParagraphs(100); - timer.Stop(); - - Console.WriteLine($"Prune time (ms): {timer.ElapsedMilliseconds}"); - _displayableParagraphs.AddRange(selectedParagraphs); - //_displayableParagraphs.AddRange(updatingParagraphs); - - // TODO: Just assign to displayable paragraphs - //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(visibleBuckets, maxDisplayableParagraphs, visibleParagraphsCount > maxDisplayableParagraphs)); - //displayableParagraphs.AddRange(SelectParagraphsFromBuckets(invisibleBuckets, 20, true)); - //_displayableParagraphs.AddRange(displayableParagraphs); var primaryParagraph = subtitle.GetParagraphOrDefault(primarySelectedIndex); @@ -486,48 +467,6 @@ private void LoadParagraphs(Subtitle subtitle, int primarySelectedIndex, ListVie } } - private List SelectParagraphsFromBuckets(Dictionary> buckets, int numberOfParagraphs, bool pruneShortParagraphs) - { - foreach (List bucket in buckets.Values) - { - // Sort buckets with longest paragraphs first. - bucket.Sort((first, second) => { return (int)(second.DurationTotalSeconds - first.DurationTotalSeconds); }); - } - - List result = new List(); - while (result.Count < numberOfParagraphs && buckets.Count > 0) - { - List keys = buckets.Keys.ToList(); - //// Iterate over keys evenly spread over the timeline - //keys.Sort((a, b) => a % numberOfParagraphs - b % numberOfParagraphs); - foreach (int key in keys) - { - List bucket = buckets[key]; - Paragraph p; - while (bucket.Count > 0 && result.Count < numberOfParagraphs) - { - p = bucket[0]; - bucket.RemoveAt(0); - - if (pruneShortParagraphs && p.DurationTotalMilliseconds < 0.01) - { - continue; - } - - result.Add(p); - break; - } - - if (bucket.Count == 0) - { - buckets.Remove(key); - } - } - } - return result; - } - - public void SetPosition(double startPositionSeconds, Subtitle subtitle, double currentVideoPositionSeconds, int subtitleIndex, ListView.SelectedIndexCollection selectedIndexes) { if (TimeSpan.FromTicks(DateTime.UtcNow.Ticks - _lastMouseWheelScroll).TotalSeconds > 0.25) diff --git a/src/ui/Logic/DisplayableParagraphHelper.cs b/src/ui/Logic/DisplayableParagraphHelper.cs index 247dd35741..2dd1a54565 100644 --- a/src/ui/Logic/DisplayableParagraphHelper.cs +++ b/src/ui/Logic/DisplayableParagraphHelper.cs @@ -1,11 +1,7 @@ using Nikse.SubtitleEdit.Core.Common; using System; using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; using System.Runtime.CompilerServices; -using System.Security.Cryptography.X509Certificates; -using System.Windows.Forms; namespace Nikse.SubtitleEdit.Logic { @@ -23,7 +19,8 @@ namespace Nikse.SubtitleEdit.Logic * It is more useful to have paragraphs that cover a large area of the timeline rather than a small area with 10 paragraphs layered on top of each other. * There are situtations where paragraphs may overlap, but it is useful to see most or all of them * (such as dialogue shown at the same time as a paragraph shown next to text in the video). - * More predictable behavior is better - pruning a large paragraph is more noticeable than pruning a small one. + * More predictable behavior is better - pruning a large paragraph is more noticeable than pruning a small one, and the visible paragraphs should stay + * constant as much as possible while scrolling. * * * @@ -49,7 +46,7 @@ internal class DisplayableParagraphHelper /// private readonly List _paragraphs = new List(); - private TimelinePartition paragraphPartition; + private TimelinePartition _cachedParagraphPartitions; /// /// The beginning of the invisible area that paragraphs may be chosen from to improve scrolling. @@ -83,7 +80,7 @@ public DisplayableParagraphHelper(double startMilliseconds, double endMillisecon _startVisibleMilliseconds = startMilliseconds; _endVisibleMilliseconds = endMilliseconds; - paragraphPartition = new TimelinePartition(_startThresholdMilliseconds, _endThresholdMilliseconds, 100); + _cachedParagraphPartitions = new TimelinePartition(_startThresholdMilliseconds, _endThresholdMilliseconds, 500); } /// @@ -95,7 +92,7 @@ public void Add(Paragraph p) if (IsInThreshold(p)) { _paragraphs.Add(p); - paragraphPartition.Add(p); + //paragraphPartition.Add(p); } } @@ -310,6 +307,7 @@ private double CalculateCoverageInRange(List currentCoverage, do { existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); coverageCache.Add(p, existingCoverage); + _cachedParagraphPartitions.Add(p); } if (existingCoverage < minimumCoverage) { @@ -328,7 +326,6 @@ private double CalculateCoverageInRange(List currentCoverage, do public List GetParagraphs(int limit) { - //Console.WriteLine($"Getting {limit} paragraphs."); if (limit >= _paragraphs.Count) { return _paragraphs; @@ -361,8 +358,6 @@ public List GetParagraphs(int limit) { double coveragePercent = CalculateVisiblePercentOfTimeline(selection); currentVisibleCoverage += coveragePercent; - //Console.WriteLine($"Paragraph selected, adding {coveragePercent} to current coverage. (for a total of {currentVisibleCoverage})"); - //lowestCoverage = 0; // FindLowestCoverage(records); } if (result.Count < limit) { @@ -379,43 +374,25 @@ public List GetParagraphs(int limit) /// new paragraph and adjust the value. /// /// This is an O(1) operation per cache value, versus an O(n log n) operation to calculate from scratch. - /// However, updating the cache requires scanning through all cached paragraphs (worst case is equal to - /// the number of paragraphs being loaded), while calculating from scratch only requires a binary search - /// followed by a linear search through all paragraphs selected so far. If the cache is really full, updating - /// it can be pretty slow. + /// As the cache fills up, the number of values to update increases, making the scan for overlapping paragraphs slower. + /// This is optimized by checking only those paragraphs from the same partition as the newly added paragraph. This + /// is a much smaller set than the set of all cached paragraphs. /// /// /// /// private void UpdateCacheForParagraph(Dictionary coverageCache, Paragraph p) { - // The paragraph has already been selected, so we no longer need its cache entry. - coverageCache.Remove(p); - List keysToUpdate = new List(); - HashSet partitionedParagraphs = paragraphPartition.GetPartitionedParagraphs(p); + HashSet partitionedParagraphs = _cachedParagraphPartitions.GetPartitionedParagraphs(p); + // We don't want to update the cache entry for the selected paragraph on this iteration or any further iterations. partitionedParagraphs.Remove(p); - //partitionedParagraphs.IntersectWith(coverageCache.Keys); + coverageCache.Remove(p); foreach (Paragraph key in partitionedParagraphs) { - if (ParagraphsOverlap(key, p)) - { - keysToUpdate.Add(key); - } - } - - //List realKeysToUpdate = new List(); - //foreach (Paragraph key in coverageCache.Keys) - //{ - // if (ParagraphsOverlap(key, p)) - // { - // realKeysToUpdate.Add(key); - // } - //} - - foreach (Paragraph key in keysToUpdate) - { - if (coverageCache.TryGetValue(key, out double coverage)) + // The partition may contain paragraphs that have been selected and evicted from the cache, + // so this isn't guaranteed to exist. + if (ParagraphsOverlap(key, p) && coverageCache.TryGetValue(key, out double coverage)) { double overlapMillis = CalculateOverlapLength(key, p); coverageCache[key] = coverage + overlapMillis / key.DurationTotalMilliseconds; From 8312971eabd8160ba1544241764dbdd4e3bc16dc Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Mon, 18 Sep 2023 13:42:22 -0400 Subject: [PATCH 10/11] Cleanup of displayable paragraph helper. Finish documentation of methods, rename variables, refactor parameter order. Generally tries to be more consistent with itself and C# style guidlelines. --- src/ui/Logic/DisplayableParagraphHelper.cs | 403 +++++++++++++-------- 1 file changed, 247 insertions(+), 156 deletions(-) diff --git a/src/ui/Logic/DisplayableParagraphHelper.cs b/src/ui/Logic/DisplayableParagraphHelper.cs index 2dd1a54565..9c0c24d2f5 100644 --- a/src/ui/Logic/DisplayableParagraphHelper.cs +++ b/src/ui/Logic/DisplayableParagraphHelper.cs @@ -16,17 +16,19 @@ namespace Nikse.SubtitleEdit.Logic * * It is good to select paragraphs slightly outside the visible area so that there is something to see while scrolling, * but the timeline will be stationary most of the time, so it is better to select paragraphs that are currently visible. - * It is more useful to have paragraphs that cover a large area of the timeline rather than a small area with 10 paragraphs layered on top of each other. + * It is more useful to have paragraphs that cover a large area of the timeline rather than a small area with 10 paragraphs + * layered on top of each other. * There are situtations where paragraphs may overlap, but it is useful to see most or all of them * (such as dialogue shown at the same time as a paragraph shown next to text in the video). - * More predictable behavior is better - pruning a large paragraph is more noticeable than pruning a small one, and the visible paragraphs should stay - * constant as much as possible while scrolling. + * More predictable behavior is better - pruning a large paragraph is more noticeable than pruning a small one, and the + * visible paragraphs should stay constant as much as possible while scrolling. * * * * Therefore, this class aims to maximize the amount of coverage of the timeline. - * Non-overlapping paragraphs are preferred first to prevent a stack of overlapping paragraphs with a large blank space. Paragraphs outside the visible area are only - * choosen once enough visible paragraphs have been chosen, to prevent a blank timeline. This class may select paragraphs that are very close together, if all other preferred + * Non-overlapping paragraphs are preferred first to prevent a stack of overlapping paragraphs with a large blank space. + * Paragraphs outside the visible area are only choosen once enough visible paragraphs have been chosen, to prevent a blank timeline. + * This class may select paragraphs that are very close together if all other preferred * paragraphs have been chosen already. * * @@ -41,19 +43,24 @@ internal class DisplayableParagraphHelper /// private const double VisibleSelectionRequirement = 0.5; + /// + /// How many partitions to divide processed paragraphs into. This helps reduce the number of paragraphs processed each time + /// the cache needs to be updated. The number is somewhat arbitrary, with similar results obtained with both 100 and 1000 + /// partitions (though performance was better with a larger partition count). + /// + private const int NumberOfPartitions = 500; + /// /// Paragraphs that may be chosen when requested later. /// private readonly List _paragraphs = new List(); - private TimelinePartition _cachedParagraphPartitions; - /// /// The beginning of the invisible area that paragraphs may be chosen from to improve scrolling. /// private readonly double _startThresholdMilliseconds; /// - /// The end of the invisible area. + /// The end of the invisible area of the timeline to consider. /// private readonly double _endThresholdMilliseconds; @@ -67,7 +74,7 @@ internal class DisplayableParagraphHelper private readonly double _endVisibleMilliseconds; /// - /// Creates a new displayable paragraph helper that will choose paragraphs between the start and end time. + /// Creates a new displayable paragraph helper that will choose paragraphs between the start and end time, with some additional padding on either side. /// /// The start of the visible area of the timeline in milliseconds. /// The end of the visible area of the timeline in milliseconds. @@ -79,8 +86,6 @@ public DisplayableParagraphHelper(double startMilliseconds, double endMillisecon _startVisibleMilliseconds = startMilliseconds; _endVisibleMilliseconds = endMilliseconds; - - _cachedParagraphPartitions = new TimelinePartition(_startThresholdMilliseconds, _endThresholdMilliseconds, 500); } /// @@ -92,10 +97,79 @@ public void Add(Paragraph p) if (IsInThreshold(p)) { _paragraphs.Add(p); - //paragraphPartition.Add(p); } } + /// + /// Gets up to a maximum number of paragraphs from the displayable paragraph helper. Paragraphs retrieved will provide + /// the best overall coverage of the range provided in the constructor, avoiding heavily layered areas until the rest + /// of the timeline is well covered. + /// + /// + /// + public List GetParagraphs(int limit) + { + if (limit >= _paragraphs.Count) + { + return _paragraphs; + } + + // Ensure that longer paragraphs are preferred. + _paragraphs.Sort(new ParagraphComparer()); + + // Remember the average amount of paragraph overlap for the duration of each paragraph. + var coverageCache = new Dictionary(); + + // Improve efficiency of updating cache as the cache grows. + var cachedParagraphPartitions = new TimelineMap( + _startThresholdMilliseconds, _endThresholdMilliseconds, NumberOfPartitions); + + // Remember how many layers of paragraphs exist at the start and end time of each processed paragraph. + // This needs to be a regular list because we need the BinarySearch method that returns the + // proper index for new items and we need to be able to iterate over subsequent entries. + // A SortedList doesn't allow this, and a dictionary is less efficient to iterate over. + var records = new List(limit * 2); + + double totalParagraphCoverage = CalculateAverageParagraphCoverage(); + var currentVisibleCoverage = 0d; + var lowestParagraphOverlap = 0d; + + var result = new List(); + while (result.Count < limit && _paragraphs.Count > 0) + { + int bestParagraphIndex = GetBestParagraphIndex( + _paragraphs, totalParagraphCoverage, currentVisibleCoverage, lowestParagraphOverlap, records, + coverageCache, cachedParagraphPartitions); + + if (bestParagraphIndex == -1) + { + // This shouldn't happen, but choose the first paragraph just in case. + bestParagraphIndex = 0; + } + Paragraph selection = _paragraphs[bestParagraphIndex]; + _paragraphs.RemoveAt(bestParagraphIndex); + lowestParagraphOverlap = coverageCache[selection]; + + result.Add(selection); + UpdateTimestampLayers(records, selection); + // Update running total of coverage when the paragraph is visible. + if (IsVisible(selection)) + { + double coveragePercent = CalculateVisiblePercentOfTimeline(selection); + currentVisibleCoverage += coveragePercent; + } + + // Update cache if this isn't the last loop. + if (result.Count < limit) + { + UpdateCacheForParagraph(selection, coverageCache, cachedParagraphPartitions); + } + } + + return result; + } + + /// /// Determines whether the paragraph is in the visible area. /// @@ -120,6 +194,18 @@ private bool IsInThreshold(Paragraph p) return IsInRange(p, _startThresholdMilliseconds, _endThresholdMilliseconds); } + /// + /// Determines whether two paragraphs overlap. + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool ParagraphsOverlap(Paragraph p1, Paragraph p2) + { + return IsInRange(p1, p2.StartTime.TotalMilliseconds, p2.EndTime.TotalMilliseconds); + } + /// /// Determines whether any portion of a paragraph is within the start and end range. /// @@ -134,22 +220,13 @@ private bool IsInRange(Paragraph p, double startMilliseconds, double endMillisec } /// - /// Determines whether two paragraphs overlap. + /// Calculates the average number of layers of paragraphs at any time in the visible portion of the timeline. + /// + /// For example, two paragraphs covering the left half of the timeline count the same as a single paragraph + /// covering the whole timeline. This has the benefit of being easier to calculate, and allows + /// coverage percentage to be built up over time (when selecting paragraphs). + /// /// - /// - /// - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool ParagraphsOverlap(Paragraph p1, Paragraph p2) - { - return IsInRange(p1, p2.StartTime.TotalMilliseconds, p2.EndTime.TotalMilliseconds); - } - - /** - * Calculates the average number of layers of paragraphs in the visible portion of the timeline. - * Two paragraphs covering the left half of the timeline count the same as a single paragraph covering the whole time. - * This has the benefit of being easier to calculate, and allows coverage percentage to be built up over time (when selecting paragraphs). - */ private double CalculateAverageParagraphCoverage() { double average = 0; @@ -188,10 +265,10 @@ private double CalculateVisiblePercentOfTimeline(Paragraph p) /// /// /// An ordered list indicating how many paragraphs are visible at each location on the timeline. - /// - /// - /// The average amount of paragraph coverage in the given range. This is a floating point number greater than 0. - private double CalculateCoverageInRange(List currentCoverage, double startMillis, double endMillis) + /// Start of range, in milliseconds + /// End of range, in milliseconds + /// The average amount of paragraph coverage in the given range. This is a floating point number greater than or equal to 0. + private double CalculateCoverageInRange(List currentCoverage, double startMillis, double endMillis) { if (currentCoverage.Count == 0) @@ -207,8 +284,8 @@ private double CalculateCoverageInRange(List currentCoverage, do // The sum of number of paragraphs times the duration of the overlap. double weightedCoverage = 0; - CoverageRecord startRecord = new CoverageRecord(startMillis); - int startIndex = currentCoverage.BinarySearch(startRecord, new TimestampRecordComparer()); + var startRecord = new TimestampLayerEntry(startMillis); + int startIndex = currentCoverage.BinarySearch(startRecord, new TimestampEntryComparer()); if (startIndex < 0) { // Start of range has no record, need to build the information from the record we would have found. @@ -222,14 +299,14 @@ private double CalculateCoverageInRange(List currentCoverage, do } // Any start record that would have existed at startIndex would have the same number of paragraphs // as the previous record. - previousNumberOfParagraphs = currentCoverage[startIndex - 1].numberOfParagraphs; - if (endMillis <= currentCoverage[startIndex].timestamp) + previousNumberOfParagraphs = currentCoverage[startIndex - 1].NumberOfParagraphs; + if (endMillis <= currentCoverage[startIndex].TimestampMillis) { // The start and end both happen before the same record. Average coverage over the entire range is trivial. return previousNumberOfParagraphs; } - weightedCoverage = previousNumberOfParagraphs * (currentCoverage[startIndex].timestamp - startMillis); - previousNumberOfParagraphs = currentCoverage[startIndex].numberOfParagraphs; + weightedCoverage = previousNumberOfParagraphs * (currentCoverage[startIndex].TimestampMillis - startMillis); + previousNumberOfParagraphs = currentCoverage[startIndex].NumberOfParagraphs; } else { @@ -238,7 +315,7 @@ private double CalculateCoverageInRange(List currentCoverage, do previousNumberOfParagraphs = 0; } // We are guaranteed to have at least one item in the array because of the checks above. - previousTimestamp = currentCoverage[startIndex].timestamp; + previousTimestamp = currentCoverage[startIndex].TimestampMillis; } else { @@ -249,22 +326,23 @@ private double CalculateCoverageInRange(List currentCoverage, do // very different depending on the value of startIndex. return 0; } - CoverageRecord previousRecord = currentCoverage[startIndex]; - previousTimestamp = previousRecord.timestamp; - previousNumberOfParagraphs = previousRecord.numberOfParagraphs; + TimestampLayerEntry previousRecord = currentCoverage[startIndex]; + previousTimestamp = previousRecord.TimestampMillis; + previousNumberOfParagraphs = previousRecord.NumberOfParagraphs; } + // Add weighted overlap for segments in the middle of the range. if (startIndex < currentCoverage.Count - 1) { int currentIndex = startIndex + 1; - while (currentIndex < currentCoverage.Count && currentCoverage[currentIndex].timestamp <= endMillis) + while (currentIndex < currentCoverage.Count && currentCoverage[currentIndex].TimestampMillis <= endMillis) { - CoverageRecord currentRecord = currentCoverage[currentIndex]; - weightedCoverage += previousNumberOfParagraphs * (currentRecord.timestamp - previousTimestamp); + TimestampLayerEntry currentRecord = currentCoverage[currentIndex]; + weightedCoverage += previousNumberOfParagraphs * (currentRecord.TimestampMillis - previousTimestamp); - previousTimestamp = currentRecord.timestamp; - previousNumberOfParagraphs = currentRecord.numberOfParagraphs; + previousTimestamp = currentRecord.TimestampMillis; + previousNumberOfParagraphs = currentRecord.NumberOfParagraphs; currentIndex++; } } @@ -281,92 +359,48 @@ private double CalculateCoverageInRange(List currentCoverage, do /// /// Chooses the best paragraph from the list of candidate paragraphs. /// - /// The total coverage of the visible timeline, to prefer visible paragraphs + /// The total coverage of the visible timeline, in order to prefer visible paragraphs /// until the timeline has enough coverage. /// The current coverage of the visible timeline. If this is high enough, /// invisible paragraphs may be chosen. - /// The minimum amount of coverage in the + /// The minimum amount of coverage in the timeline, so that this can exit early if such a minimum is found. /// - /// + /// /// /// - private int GetBestParagraphIndex(double averageCoverage, double currentVisibleCoverage, double lowestCoverage, - List candidates, List currentCoverage, Dictionary coverageCache) + private int GetBestParagraphIndex(List candidates, double totalVisibleCoverage, double currentVisibleCoverage, + double coverageThreshold, List timestampLayerCounts, Dictionary coverageCache, TimelineMap partitions) { - double minimumCoverage = double.MaxValue; - int indexOfMinimum = -1; + var currentMinimumCoverage = double.MaxValue; + var bestParagraphIndex = -1; for (var i = 0; i < candidates.Count; i++) { Paragraph p = candidates[i]; bool paragraphVisible = IsVisible(p); // Only consider visible paragraphs until a minimum portion of the visible area is covered. - if (currentVisibleCoverage > averageCoverage * VisibleSelectionRequirement || paragraphVisible) + if (currentVisibleCoverage > totalVisibleCoverage * VisibleSelectionRequirement || paragraphVisible) { if (!coverageCache.TryGetValue(p, out double existingCoverage)) { - existingCoverage = CalculateCoverageInRange(currentCoverage, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); + existingCoverage = CalculateCoverageInRange(timestampLayerCounts, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds); coverageCache.Add(p, existingCoverage); - _cachedParagraphPartitions.Add(p); + partitions.Add(p); } - if (existingCoverage < minimumCoverage) + // A better paragraph has fewer paragraphs already in that location on the timeline. + if (existingCoverage < currentMinimumCoverage) { - minimumCoverage = existingCoverage; - indexOfMinimum = i; - if (existingCoverage <= lowestCoverage) + currentMinimumCoverage = existingCoverage; + bestParagraphIndex = i; + if (existingCoverage <= coverageThreshold) { - return indexOfMinimum; + return bestParagraphIndex; } } } } - return indexOfMinimum; - } - - public List GetParagraphs(int limit) - { - if (limit >= _paragraphs.Count) - { - return _paragraphs; - } - - List result = new List(); - Dictionary coverageCache = new Dictionary(); - - double averageCoverage = CalculateAverageParagraphCoverage(); - double currentVisibleCoverage = 0; - List records = new List(limit * 2); - - // Ensure that longer paragraphs are preferred. - _paragraphs.Sort(new ParagraphComparer()); - - double lowestCoverage = 0; - - while (result.Count < limit && _paragraphs.Count > 0) - { - int bestParagraphIndex = GetBestParagraphIndex(averageCoverage, currentVisibleCoverage, lowestCoverage, _paragraphs, records, coverageCache); - if (bestParagraphIndex != -1) - { - Paragraph selection = _paragraphs[bestParagraphIndex]; - _paragraphs.RemoveAt(bestParagraphIndex); - lowestCoverage = coverageCache[selection]; - - result.Add(selection); - UpdateCoverageRecords(records, selection); - if (IsVisible(selection)) - { - double coveragePercent = CalculateVisiblePercentOfTimeline(selection); - currentVisibleCoverage += coveragePercent; - } - if (result.Count < limit) - { - UpdateCacheForParagraph(coverageCache, selection); - } - } - } - - return result; + return bestParagraphIndex; } /// @@ -379,11 +413,12 @@ public List GetParagraphs(int limit) /// is a much smaller set than the set of all cached paragraphs. /// /// - /// /// - private void UpdateCacheForParagraph(Dictionary coverageCache, Paragraph p) + /// + /// + private void UpdateCacheForParagraph(Paragraph p, Dictionary coverageCache, TimelineMap partitions) { - HashSet partitionedParagraphs = _cachedParagraphPartitions.GetPartitionedParagraphs(p); + HashSet partitionedParagraphs = partitions.GetParagraphsNearParagraph(p); // We don't want to update the cache entry for the selected paragraph on this iteration or any further iterations. partitionedParagraphs.Remove(p); coverageCache.Remove(p); @@ -400,7 +435,6 @@ private void UpdateCacheForParagraph(Dictionary coverageCache } } - private double CalculateOverlapLength(Paragraph p1, Paragraph p2) { double overlapStart = Math.Max(p1.StartTime.TotalMilliseconds, p2.StartTime.TotalMilliseconds); @@ -408,21 +442,34 @@ private double CalculateOverlapLength(Paragraph p1, Paragraph p2) return overlapEnd - overlapStart; } - private void UpdateCoverageRecords(List records, Paragraph newParagraph) + /// + /// Updates the timestamp layer list, adding new entries for the paragraph in the correct location. + /// + /// + /// + private void UpdateTimestampLayers(List records, Paragraph addedParagraph) { - int startIndex = CreateAndGetRecordIndex(records, newParagraph.StartTime.TotalMilliseconds); - int endIndex = CreateAndGetRecordIndex(records, newParagraph.EndTime.TotalMilliseconds); - for (int i = startIndex; i < endIndex; i++) + int startIndex = CreateAndGetRecordIndex(records, addedParagraph.StartTime.TotalMilliseconds); + int endIndex = CreateAndGetRecordIndex(records, addedParagraph.EndTime.TotalMilliseconds); + for (var i = startIndex; i < endIndex; i++) { - records[i].numberOfParagraphs++; + records[i].NumberOfParagraphs++; } } - private int CreateAndGetRecordIndex(List records, double timestamp) + /// + /// Creates a new TimestampLayerEntry at the given time if required and inserts into the layer entry list. + /// If a new entry was created, updates the number of paragraphs property to match the previous item. + /// + /// + /// + /// The index of the TimestampLayerEntry that corresponds to this timestamp, either an existing + /// entry or a newly created one. + private int CreateAndGetRecordIndex(List records, double timestamp) { - CoverageRecord newRecord = new CoverageRecord(timestamp); + TimestampLayerEntry newRecord = new TimestampLayerEntry(timestamp); - int recordIndex = records.BinarySearch(newRecord, new TimestampRecordComparer()); + int recordIndex = records.BinarySearch(newRecord, new TimestampEntryComparer()); if (recordIndex < 0) { recordIndex = ~recordIndex; @@ -430,7 +477,7 @@ private int CreateAndGetRecordIndex(List records, double timesta if (recordIndex > 0) { // Carry over the overlap from the previous item to keep layers correct. - newRecord.numberOfParagraphs = records[recordIndex - 1].numberOfParagraphs; + newRecord.NumberOfParagraphs = records[recordIndex - 1].NumberOfParagraphs; } records.Insert(recordIndex, newRecord); @@ -439,38 +486,44 @@ private int CreateAndGetRecordIndex(List records, double timesta return recordIndex; } - private class CoverageRecord + /// + /// A class that stores the number of paragraph layers at an instant in time. + /// + private class TimestampLayerEntry { - public double timestamp { get; } - public int numberOfParagraphs { get; set; } - private int numberOfStartMarks; - private int numberOfEndMarks; + public double TimestampMillis { get; } + public int NumberOfParagraphs { get; set; } - public CoverageRecord(double timestamp) + public TimestampLayerEntry(double milliseconds) { - this.timestamp = timestamp; + TimestampMillis = milliseconds; } public override string ToString() { - return $"Record - {timestamp} millis / {numberOfParagraphs} paragraphs"; + return $"Record - {TimestampMillis} millis / {NumberOfParagraphs} paragraphs"; } } - private class TimestampRecordComparer : IComparer + /// + /// An IComparer that compares TimestampLayerEntry objects solely based on the timestamp. + /// + private class TimestampEntryComparer : IComparer { - public int Compare(CoverageRecord x, CoverageRecord y) + public int Compare(TimestampLayerEntry x, TimestampLayerEntry y) { - return x.timestamp.CompareTo(y.timestamp); + return x.TimestampMillis.CompareTo(y.TimestampMillis); } } - /** - * A comparer for paragraphs, prioritizing those that are: - * 1. Longer - * 2. Have a smaller (earlier) start time. - */ + /// + /// A comparer for paragraphs, prioritizing those that are: + /// + /// longer + /// Have a smaller (earlier) start time. + /// + /// private class ParagraphComparer : IComparer { public int Compare(Paragraph x, Paragraph y) @@ -491,24 +544,38 @@ public int Compare(Paragraph x, Paragraph y) } } - private class TimelinePartition + /// + /// A class that provides efficient access to a set of paragraphs near another another paragraph + /// by slicing up the timeline into equally sized partitions and grouping paragraphs into those partitions. + /// + /// This is helpful when scanning for collisions between a large number of paragraphs, limiting + /// the search to those that are nearby on the timeline. + /// + /// + private class TimelineMap { - private double _startMillis; - private double _endMillis; - private int _partitionCount; + private readonly double _startMillis; + private readonly double _endMillis; + private readonly int _partitionCount; - private HashSet[] _partitions; + private readonly double _partitionWidth; + private readonly HashSet[] _partitions; - public TimelinePartition(double startMillis, double endMillis, int partitionCount) + public TimelineMap(double startMillis, double endMillis, int partitionCount) { _startMillis = startMillis; _endMillis = endMillis; _partitionCount = partitionCount; + _partitionWidth = (_endMillis - _startMillis) / _partitionCount; _partitions = new HashSet[_partitionCount]; } + /// + /// Adds a paragraph to the set. + /// + /// public void Add(Paragraph p) { PartitionRange insertRange = GetPartitionRange(p); @@ -522,11 +589,20 @@ public void Add(Paragraph p) } } - public HashSet GetPartitionedParagraphs(Paragraph p) + /// + /// Gets the set of all paragraphs that are near the given paragraph. + /// + /// The paragraphs returned are not guaranteed to overlap the paragraph, + /// they instead occupy at least one slice of the timeline that the paragraph does. + /// + /// + /// + /// + public HashSet GetParagraphsNearParagraph(Paragraph p) { PartitionRange range = GetPartitionRange(p); HashSet result = new HashSet(); - for (var i = range.StartIndex; i < range.EndIndex; i++) + for (var i = range.StartIndex; i <= range.EndIndex; i++) { HashSet partition = _partitions[i]; if (partition != null) @@ -537,11 +613,31 @@ public HashSet GetPartitionedParagraphs(Paragraph p) return result; } + /// + /// Gets a range of partiction indices that a paragraph occupies. + /// + /// + /// + private PartitionRange GetPartitionRange(Paragraph p) + { + int startPartition = GetPartitionNumber(p.StartTime.TotalMilliseconds, false); + int endPartition = GetPartitionNumber(p.EndTime.TotalMilliseconds, true); + return new PartitionRange(startPartition, endPartition); + } + + /// + /// Calculates which partition a particular timestamp falls into, rounding up or down to + /// assign the correct index to the start or end of a time range. + /// + /// The index returned is guaranteed to be within the partition array. + /// + /// + /// The timestamp, in milliseconds. + /// + /// private int GetPartitionNumber(double timestampMillis, bool roundUp) { - double timeSpan = _endMillis - _startMillis; - double partitionWidth = timeSpan / _partitionCount; - double partitionNumberFraction = (timestampMillis - _startMillis) / partitionWidth; + double partitionNumberFraction = (timestampMillis - _startMillis) / _partitionWidth; int partitionNumber; if (roundUp) { @@ -564,15 +660,10 @@ private int GetPartitionNumber(double timestampMillis, bool roundUp) return partitionNumber; } - public PartitionRange GetPartitionRange(Paragraph p) - { - int startPartition = GetPartitionNumber(p.StartTime.TotalMilliseconds, false); - int endPartition = GetPartitionNumber(p.EndTime.TotalMilliseconds, true); - return new PartitionRange(startPartition, endPartition); - } - - - public class PartitionRange + /// + /// A range of partition indices that can be indexed in the partition array. + /// + private class PartitionRange { public PartitionRange(int startIndex, int endIndex) { From 62d5e2cd3dd74bcc32c0ecd00d9355fa128a56c3 Mon Sep 17 00:00:00 2001 From: JonSchram <7951615+JonSchram@users.noreply.github.com> Date: Mon, 18 Sep 2023 14:23:06 -0400 Subject: [PATCH 11/11] Adds tests for DisplayableParagraphHelper. --- .../Logic/DisplayableParagraphHelperTest.cs | 137 ++++++++++++++++++ src/Test/Test.csproj | 1 + src/ui/Logic/DisplayableParagraphHelper.cs | 2 +- 3 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 src/Test/Logic/DisplayableParagraphHelperTest.cs diff --git a/src/Test/Logic/DisplayableParagraphHelperTest.cs b/src/Test/Logic/DisplayableParagraphHelperTest.cs new file mode 100644 index 0000000000..3a86dee8ab --- /dev/null +++ b/src/Test/Logic/DisplayableParagraphHelperTest.cs @@ -0,0 +1,137 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Nikse.SubtitleEdit.Core.Common; +using Nikse.SubtitleEdit.Logic; +using System.Collections.Generic; + +namespace Test.Logic +{ + [TestClass] + public class DisplayableParagraphHelperTest + { + + /// + /// Tests that the longest paragraph is selected when neither has any overlap. + /// + [TestMethod] + public void GetLongestParagraphTest() + { + var paragraphs = new List() + { + new Paragraph("Longer", TimeCode.ParseToMilliseconds("00:00:10,500"), TimeCode.ParseToMilliseconds("00:00:15,000")), + new Paragraph("Shorter", TimeCode.ParseToMilliseconds("00:00:20,000"), TimeCode.ParseToMilliseconds("00:00:21,000")) + }; + DisplayableParagraphHelper helper = new DisplayableParagraphHelper(TimeCode.ParseToMilliseconds("00:00:00,000"), TimeCode.ParseToMilliseconds("00:00:30,000"), 1000); + AddAllParagraphs(helper, paragraphs); + + List selectedParagraphs = helper.GetParagraphs(1); + Assert.AreEqual(1, selectedParagraphs.Count); + + Assert.AreEqual("Longer", selectedParagraphs[0].Text); + } + + /// + /// Tests that the paragraph without overlap is chosen when the alternative is completely overlapped by a longer paragraph. + /// + [TestMethod] + public void GetLeastOverlappingParagraphTest() + { + var paragraphs = new List() + { + new Paragraph("Outer", TimeCode.ParseToMilliseconds("00:00:5,000"), TimeCode.ParseToMilliseconds("00:00:15,000")), + new Paragraph("Inner", TimeCode.ParseToMilliseconds("00:00:10,000"), TimeCode.ParseToMilliseconds("00:00:11,000")), + new Paragraph("Second", TimeCode.ParseToMilliseconds("00:00:20,000"), TimeCode.ParseToMilliseconds("00:00:21,000")), + }; + DisplayableParagraphHelper helper = new DisplayableParagraphHelper(TimeCode.ParseToMilliseconds("00:00:00,000"), TimeCode.ParseToMilliseconds("00:00:30,000"), 1000); + AddAllParagraphs(helper, paragraphs); + + List selectedParagraphs = helper.GetParagraphs(2); + Assert.AreEqual(2, selectedParagraphs.Count); + + Assert.AreEqual("Outer", selectedParagraphs[0].Text); + Assert.AreEqual("Second", selectedParagraphs[1].Text); + } + + /// + /// Tests that a paragraph that partially overlaps another paragraph is chosen when the alternative completely overlaps another paragraph. + /// + [TestMethod] + public void GetPartiallyOverlappingTest() + { + var paragraphs = new List() + { + new Paragraph("Outer", TimeCode.ParseToMilliseconds("00:00:5,000"), TimeCode.ParseToMilliseconds("00:00:15,000")), + new Paragraph("Inner", TimeCode.ParseToMilliseconds("00:00:07,000"), TimeCode.ParseToMilliseconds("00:00:10,000")), + new Paragraph("Partial", TimeCode.ParseToMilliseconds("00:00:14,000"), TimeCode.ParseToMilliseconds("00:00:16,000")), + }; + DisplayableParagraphHelper helper = new DisplayableParagraphHelper(TimeCode.ParseToMilliseconds("00:00:00,000"), TimeCode.ParseToMilliseconds("00:00:30,000"), 1000); + AddAllParagraphs(helper, paragraphs); + + List selectedParagraphs = helper.GetParagraphs(2); + Assert.AreEqual(2, selectedParagraphs.Count); + + Assert.AreEqual("Outer", selectedParagraphs[0].Text); + Assert.AreEqual("Partial", selectedParagraphs[1].Text); + } + + /// + /// Tests that consecutive paragraphs can be chosen (starting and ending at the same time). + /// + [TestMethod] + public void GetConsecutiveParagraphsTest() + { + List paragraphs = CreateConsecutiveParagraphs(1); + DisplayableParagraphHelper helper = new DisplayableParagraphHelper(TimeCode.ParseToMilliseconds("00:00:00,000"), TimeCode.ParseToMilliseconds("00:00:30,000"), 1000); + AddAllParagraphs(helper, paragraphs); + + List selectedParagraphs = helper.GetParagraphs(3); + + Assert.AreEqual(3, selectedParagraphs.Count); + Assert.AreEqual("P1 L1", selectedParagraphs[0].Text); + Assert.AreEqual("P2 L1", selectedParagraphs[1].Text); + Assert.AreEqual("P3 L1", selectedParagraphs[2].Text); + } + + /// + /// Tests that only a single layer of paragraphs will be chosen when all paragraphs overlap in a layer 3 deep. + /// + [TestMethod] + public void GetSingleOverlapLayerTest() + { + var paragraphs = new List(); + paragraphs.AddRange(CreateConsecutiveParagraphs(1)); + paragraphs.AddRange(CreateConsecutiveParagraphs(2)); + paragraphs.AddRange(CreateConsecutiveParagraphs(3)); + DisplayableParagraphHelper helper = new DisplayableParagraphHelper(TimeCode.ParseToMilliseconds("00:00:00,000"), TimeCode.ParseToMilliseconds("00:00:30,000"), 1000); + AddAllParagraphs(helper, paragraphs); + + List selectedParagraphs = helper.GetParagraphs(4); + + Assert.AreEqual(4, selectedParagraphs.Count); + Assert.IsTrue(selectedParagraphs[0].Text.StartsWith("P1")); + Assert.IsTrue(selectedParagraphs[1].Text.StartsWith("P2")); + Assert.IsTrue(selectedParagraphs[2].Text.StartsWith("P3")); + Assert.IsTrue(selectedParagraphs[3].Text.StartsWith("P4")); + } + + private List CreateConsecutiveParagraphs(int layerNumber) + { + var paragraphs = new List() + { + new Paragraph($"P1 L{layerNumber}", TimeCode.ParseToMilliseconds("00:00:2,500"), TimeCode.ParseToMilliseconds("00:00:3,000")), + new Paragraph($"P2 L{layerNumber}", TimeCode.ParseToMilliseconds("00:00:3,000"), TimeCode.ParseToMilliseconds("00:00:3,500")), + new Paragraph($"P3 L{layerNumber}", TimeCode.ParseToMilliseconds("00:00:3,500"), TimeCode.ParseToMilliseconds("00:00:4,000")), + new Paragraph($"P4 L{layerNumber}", TimeCode.ParseToMilliseconds("00:00:4,000"), TimeCode.ParseToMilliseconds("00:00:4,500")), + }; + return paragraphs; + } + + private void AddAllParagraphs(DisplayableParagraphHelper helper, List paragraphs) + { + foreach (var paragraph in paragraphs) + { + helper.Add(paragraph); + } + } + + } +} diff --git a/src/Test/Test.csproj b/src/Test/Test.csproj index bb5bfae6af..8b10737d6d 100644 --- a/src/Test/Test.csproj +++ b/src/Test/Test.csproj @@ -72,6 +72,7 @@ + diff --git a/src/ui/Logic/DisplayableParagraphHelper.cs b/src/ui/Logic/DisplayableParagraphHelper.cs index 9c0c24d2f5..c224c4e0a7 100644 --- a/src/ui/Logic/DisplayableParagraphHelper.cs +++ b/src/ui/Logic/DisplayableParagraphHelper.cs @@ -33,7 +33,7 @@ namespace Nikse.SubtitleEdit.Logic * * */ - internal class DisplayableParagraphHelper + public class DisplayableParagraphHelper { /// /// The percentage of the visible timeline that must be covered by paragraphs before a paragraph outside the visible area may be chosen.