-
-
Notifications
You must be signed in to change notification settings - Fork 242
/
TrackGroupingService.cs
253 lines (215 loc) · 9.77 KB
/
TrackGroupingService.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using NLog;
using NzbDrone.Common.Disk;
using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation;
using NzbDrone.Common.Instrumentation.Extensions;
using NzbDrone.Core.Parser.Model;
namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
{
public interface ITrackGroupingService
{
List<LocalAlbumRelease> GroupTracks(List<LocalTrack> localTracks);
}
public class TrackGroupingService : ITrackGroupingService
{
private const string MultiDiscPatternFormat = @"^(?<root>.*%s[\W_]*)\d";
private static readonly Logger _logger = NzbDroneLogger.GetLogger(typeof(TrackGroupingService));
private static readonly List<string> MultiDiscMarkers = new () { @"dis[ck]", @"cd" };
private static readonly List<string> VariousArtistTitles = new () { "", "various artists", "various", "va", "unknown" };
public List<LocalAlbumRelease> GroupTracks(List<LocalTrack> localTracks)
{
_logger.ProgressInfo($"Grouping {localTracks.Count} tracks");
var releases = new List<LocalAlbumRelease>();
// first attempt, assume grouped by folder
var unprocessed = new List<LocalTrack>();
foreach (var group in GroupTracksByDirectory(localTracks))
{
var tracks = group.ToList();
if (LooksLikeSingleRelease(tracks))
{
releases.Add(new LocalAlbumRelease(tracks));
}
else
{
unprocessed.AddRange(tracks);
}
}
// If anything didn't get grouped correctly, try grouping by Album (to pick up VA)
var unprocessed2 = new List<LocalTrack>();
foreach (var group in unprocessed.GroupBy(x => x.FileTrackInfo.AlbumTitle))
{
_logger.Debug("Falling back to grouping by album tag");
var tracks = group.ToList();
if (LooksLikeSingleRelease(tracks))
{
releases.Add(new LocalAlbumRelease(tracks));
}
else
{
unprocessed2.AddRange(tracks);
}
}
// Finally fall back to grouping by Album/Artist pair
foreach (var group in unprocessed2.GroupBy(x => new { x.FileTrackInfo.ArtistTitle, x.FileTrackInfo.AlbumTitle }))
{
_logger.Debug("Falling back to grouping by album+artist tag");
releases.Add(new LocalAlbumRelease(group.ToList()));
}
return releases;
}
private static bool HasCommonEntry(IEnumerable<string> values, double threshold, double fuzz)
{
var groups = values.GroupBy(x => x).OrderByDescending(x => x.Count());
var distinctCount = groups.Count();
var mostCommonCount = groups.First().Count();
var mostCommonEntry = groups.First().Key;
var totalCount = values.Count();
// merge groups that are close to the most common value
foreach (var group in groups.Skip(1))
{
if (mostCommonEntry.IsNotNullOrWhiteSpace() &&
group.Key.IsNotNullOrWhiteSpace() &&
mostCommonEntry.LevenshteinCoefficient(group.Key) > fuzz)
{
distinctCount--;
mostCommonCount += group.Count();
}
}
_logger.Trace($"DistinctCount {distinctCount} MostCommonCount {mostCommonCount} TotalCout {totalCount}");
if (distinctCount > 1 &&
(distinctCount / (double)totalCount > threshold ||
mostCommonCount / (double)totalCount < 1 - threshold))
{
return false;
}
return true;
}
public static bool LooksLikeSingleRelease(List<LocalTrack> tracks)
{
// returns true if we think all the tracks belong to a single release
// artist/album tags must be the same for 75% of tracks, with no more than 25% having different values
// (except in the case of various artists)
const double albumTagThreshold = 0.25;
const double artistTagThreshold = 0.25;
const double tagFuzz = 0.9;
// check that any Album/Release MBID is unique
if (tracks.Select(x => x.FileTrackInfo.AlbumMBId).Distinct().Count(x => x.IsNotNullOrWhiteSpace()) > 1 ||
tracks.Select(x => x.FileTrackInfo.ReleaseMBId).Distinct().Count(x => x.IsNotNullOrWhiteSpace()) > 1)
{
_logger.Trace("LooksLikeSingleRelease: MBIDs are not unique");
return false;
}
// check that there's a common album tag.
var albumTags = tracks.Select(x => x.FileTrackInfo.AlbumTitle);
if (!HasCommonEntry(albumTags, albumTagThreshold, tagFuzz))
{
_logger.Trace("LooksLikeSingleRelease: No common album tag");
return false;
}
// If not various artists, make sure artists are sensible
if (!IsVariousArtists(tracks))
{
var artistTags = tracks.Select(x => x.FileTrackInfo.ArtistTitle);
if (!HasCommonEntry(artistTags, artistTagThreshold, tagFuzz))
{
_logger.Trace("LooksLikeSingleRelease: No common artist tag");
return false;
}
}
return true;
}
public static bool IsVariousArtists(List<LocalTrack> tracks)
{
// checks whether most common title is a known VA title
// Also checks whether more than 75% of tracks have a distinct artist and that the most common artist
// is responsible for < 25% of tracks
const double artistTagThreshold = 0.75;
const double tagFuzz = 0.9;
var artistTags = tracks.Select(x => x.FileTrackInfo.ArtistTitle).ToList();
if (!HasCommonEntry(artistTags, artistTagThreshold, tagFuzz))
{
return true;
}
if (VariousArtistTitles.Contains(artistTags.GroupBy(x => x).OrderByDescending(x => x.Count()).First().Key, StringComparer.OrdinalIgnoreCase))
{
return true;
}
return false;
}
private IEnumerable<List<LocalTrack>> GroupTracksByDirectory(List<LocalTrack> tracks)
{
// we want to check for layouts like:
// xx/CD1/1.mp3
// xx/CD2/1.mp3
// or
// yy Disc 1/1.mp3
// yy Disc 2/1.mp3
// and group them.
// we only bother doing this for the immediate parent directory.
var trackFolders = tracks.Select(x => Tuple.Create(x, Path.GetDirectoryName(x.Path))).ToList();
var distinctFolders = trackFolders.Select(x => x.Item2).Distinct().ToList();
distinctFolders.Sort();
_logger.Trace("Folders:\n{0}", string.Join("\n", distinctFolders));
Regex subdirRegex = null;
var output = new List<LocalTrack>();
foreach (var folder in distinctFolders)
{
if (subdirRegex != null)
{
if (subdirRegex.IsMatch(folder))
{
// current folder continues match, so append output
output.AddRange(tracks.Where(x => x.Path.StartsWith(folder)));
continue;
}
}
// we have finished a multi disc match. yield the previous output
// and check current folder
if (output.Count > 0)
{
_logger.Trace("Yielding from 1:\n{0}", string.Join("\n", output));
yield return output;
output = new List<LocalTrack>();
}
// reset and put current folder into output
subdirRegex = null;
var currentTracks = trackFolders.Where(x => x.Item2.Equals(folder, DiskProviderBase.PathStringComparison))
.Select(x => x.Item1);
output.AddRange(currentTracks);
// check if the start of another multi disc match
foreach (var marker in MultiDiscMarkers)
{
// check if this is the first of a multi-disc set of folders
var pattern = MultiDiscPatternFormat.Replace("%s", marker);
var multiStartRegex = new Regex(pattern, RegexOptions.IgnoreCase);
var match = multiStartRegex.Match(folder);
if (match.Success)
{
var subdirPattern = $"^{Regex.Escape(match.Groups["root"].ToString())}\\d+$";
subdirRegex = new Regex(subdirPattern, RegexOptions.IgnoreCase);
break;
}
}
if (subdirRegex == null)
{
// not the start of a multi-disc match, yield
_logger.Trace("Yielding from 2:\n{0}", string.Join("\n", output));
yield return output;
// reset output
output = new List<LocalTrack>();
}
}
// return the final stored output
if (output.Count > 0)
{
_logger.Trace("Yielding final:\n{0}", string.Join("\n", output));
yield return output;
}
}
}
}