-
Notifications
You must be signed in to change notification settings - Fork 21
Change parsing algorithm from download URL to package id+version #343
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Web; | ||
using NuGet.Versioning; | ||
|
||
namespace Stats.ImportAzureCdnStatistics | ||
{ | ||
public class PackageDefinition | ||
{ | ||
private const string _nupkgExtension = ".nupkg"; | ||
private const string _dotSeparator = "."; | ||
|
||
public string PackageId { get; set; } | ||
public string PackageVersion { get; set; } | ||
|
||
public PackageDefinition() | ||
{ | ||
} | ||
|
||
private PackageDefinition(string packageId, string packageVersion) | ||
{ | ||
PackageId = packageId.Trim(); | ||
PackageVersion = packageVersion.Trim(); | ||
} | ||
|
||
public static IList<PackageDefinition> FromRequestUrl(string requestUrl) | ||
{ | ||
if (string.IsNullOrWhiteSpace(requestUrl) || !requestUrl.EndsWith(_nupkgExtension, StringComparison.InvariantCultureIgnoreCase)) | ||
{ | ||
return null; | ||
} | ||
|
||
List<PackageDefinition> resolutionOptions = new List<PackageDefinition>(); | ||
|
||
requestUrl = HttpUtility.UrlDecode(requestUrl); | ||
|
||
var urlSegments = requestUrl.Split(new[] { '/' }, StringSplitOptions.RemoveEmptyEntries); | ||
|
||
var fileName = urlSegments.Last(); | ||
|
||
fileName = fileName.Remove(fileName.Length - _nupkgExtension.Length, _nupkgExtension.Length); | ||
|
||
// Special handling for flat container | ||
if (urlSegments.Length > 3) | ||
{ | ||
var packageIdContainer = urlSegments[urlSegments.Length - 3]; | ||
var packageVersionContainer = urlSegments[urlSegments.Length - 2]; | ||
|
||
if (string.Equals(fileName, $"{packageIdContainer}.{packageVersionContainer}", StringComparison.InvariantCultureIgnoreCase)) | ||
{ | ||
resolutionOptions.Add(new PackageDefinition(packageIdContainer, packageVersionContainer)); | ||
} | ||
} | ||
|
||
if (!resolutionOptions.Any()) | ||
{ | ||
var nextDotIndex = fileName.IndexOf('.'); | ||
|
||
while (nextDotIndex != -1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure you can write this as a
and
e.g.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't think it makes a big difference. Will keep as is. |
||
{ | ||
string packagePart = fileName.Substring(0, nextDotIndex); | ||
string versionPart = fileName.Substring(nextDotIndex + 1); | ||
|
||
if (NuGetVersion.TryParse(versionPart, out var parsedVersion)) | ||
{ | ||
var normalizedVersion = parsedVersion.ToNormalizedString(); | ||
|
||
if (string.Equals(normalizedVersion, versionPart, StringComparison.InvariantCultureIgnoreCase)) | ||
{ | ||
resolutionOptions.Add(new PackageDefinition(packagePart, versionPart)); | ||
} | ||
} | ||
|
||
nextDotIndex = fileName.IndexOf('.', nextDotIndex + 1); | ||
} | ||
} | ||
|
||
return resolutionOptions; | ||
} | ||
|
||
public override string ToString() | ||
{ | ||
return $"[{PackageId}, {PackageVersion}]"; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using NuGet; | ||
using System; | ||
using System.Linq; | ||
using Microsoft.Extensions.Logging; | ||
using NuGet.Versioning; | ||
using Stats.AzureCdnLogs.Common; | ||
|
||
|
@@ -10,25 +12,50 @@ namespace Stats.ImportAzureCdnStatistics | |
public class PackageStatisticsParser | ||
: StatisticsParser, IPackageStatisticsParser | ||
{ | ||
private readonly ILogger<PackageStatisticsParser> _logger; | ||
private readonly PackageTranslator _packageTranslator; | ||
|
||
public PackageStatisticsParser(PackageTranslator packageTranslator) | ||
public PackageStatisticsParser(PackageTranslator packageTranslator, ILoggerFactory loggerFactory) | ||
{ | ||
if (loggerFactory == null) | ||
{ | ||
throw new ArgumentNullException(nameof(loggerFactory)); | ||
} | ||
|
||
_packageTranslator = packageTranslator; | ||
_logger = loggerFactory.CreateLogger<PackageStatisticsParser>(); | ||
} | ||
|
||
public PackageStatistics FromCdnLogEntry(CdnLogEntry cdnLogEntry) | ||
{ | ||
var packageDefinition = PackageDefinition.FromRequestUrl(cdnLogEntry.RequestUrl); | ||
var packageDefinitions = PackageDefinition.FromRequestUrl(cdnLogEntry.RequestUrl); | ||
|
||
if (packageDefinition == null) | ||
if (packageDefinitions == null || !packageDefinitions.Any()) | ||
{ | ||
return null; | ||
} | ||
|
||
if (packageDefinitions.Count > 1) | ||
{ | ||
_logger.LogWarning(LogEvents.MultiplePackageIDVersionParseOptions, | ||
"Found multiple parse options for URL {RequestUrl}: {PackageDefinitions}", | ||
cdnLogEntry.RequestUrl, | ||
string.Join(", ", packageDefinitions)); | ||
} | ||
|
||
var packageDefinition = packageDefinitions.First(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we assume the first here? Could all of them be run against the translator? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Translator could potentially de-dupe some There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you look at the cases the translator is fixing, those are the cases when a package id ends with [dot][Number]. in order for this to work, the incorrect package id should be without a number. This is exactly what you get in the first result of the new algorithm, meaning, any other check will be redundant and impact perf. |
||
|
||
if (_packageTranslator != null) | ||
{ | ||
packageDefinition = _packageTranslator.TranslatePackageDefinition(packageDefinition); | ||
bool translateOccured = _packageTranslator.TryTranslatePackageDefinition(packageDefinition); | ||
|
||
if (translateOccured) | ||
{ | ||
_logger.LogInformation(LogEvents.TranslatedPackageIdVersion, | ||
"Translated package. Url: {RequestUrl}, New definition: {PackageDefinition}", | ||
cdnLogEntry.RequestUrl, | ||
packageDefinition); | ||
} | ||
} | ||
|
||
var statistic = new PackageStatistics(); | ||
|
@@ -42,7 +69,7 @@ public PackageStatistics FromCdnLogEntry(CdnLogEntry cdnLogEntry) | |
statistic.UserAgent = GetUserAgentValue(cdnLogEntry); | ||
statistic.EdgeServerIpAddress = cdnLogEntry.EdgeServerIpAddress; | ||
|
||
// ignore blacklisted user agents | ||
// Ignore blacklisted user agents | ||
if (!IsBlackListed(statistic.UserAgent)) | ||
{ | ||
return statistic; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,6 @@ | |
using System.IO; | ||
using System.Text.RegularExpressions; | ||
using Newtonsoft.Json.Linq; | ||
using Stats.AzureCdnLogs.Common; | ||
|
||
namespace Stats.ImportAzureCdnStatistics | ||
{ | ||
|
@@ -45,8 +44,10 @@ public PackageTranslator(string packageTranslationsJsonPath) | |
} | ||
} | ||
|
||
public PackageDefinition TranslatePackageDefinition(PackageDefinition packageDefinition) | ||
public bool TryTranslatePackageDefinition(PackageDefinition packageDefinition) | ||
{ | ||
bool translateOccurred = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for this variable. Just return true where you set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer a method to have a single return statement. Makes the code more readable. |
||
|
||
if (packageDefinition != null | ||
&& !string.IsNullOrEmpty(packageDefinition.PackageId) | ||
&& !string.IsNullOrEmpty(packageDefinition.PackageVersion) | ||
|
@@ -63,12 +64,13 @@ public PackageDefinition TranslatePackageDefinition(PackageDefinition packageDef | |
{ | ||
packageDefinition.PackageId = potentialTranslation.CorrectedPackageId; | ||
packageDefinition.PackageVersion = correctedPackageVersion; | ||
translateOccurred = true; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
return packageDefinition; | ||
return translateOccurred; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is a flat container URL, you can use the n-3 and n-2 segments as well to avoid any ambiguity.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Was surprised that this wasn't done up until now.