Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

Change parsing algorithm from download URL to package id+version #343

Merged
merged 5 commits into from
Feb 23, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Stats.AzureCdnLogs.Common/LogEvents.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public class LogEvents
public static EventId FailedToGetFtpResponse = new EventId(512, "Failed to get FTP response");
public static EventId FailedToCheckAlreadyProcessedLogFilePackageStatistics = new EventId(513, "Failed to check already processed package statistics for log file");
public static EventId FailedToCheckAlreadyProcessedLogFileToolStatistics = new EventId(514, "Failed to check already processed tool statistics for log file");
public static EventId MultiplePackageIDVersionParseOptions = new EventId(515, "Multiple package id/version parse options");
public static EventId TranslatedPackageIdVersion = new EventId(516, "Translated package id and version");
public static EventId JobRunFailed = new EventId(550, "Job run failed");
public static EventId JobInitFailed = new EventId(551, "Job initialization failed");
}
Expand Down
85 changes: 0 additions & 85 deletions src/Stats.AzureCdnLogs.Common/PackageDefinition.cs

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@
<Compile Include="CdnLogCustomFieldParser.cs" />
<Compile Include="CdnLogEntryParser.cs" />
<Compile Include="NuGetCustomHeaders.cs" />
<Compile Include="PackageDefinition.cs" />
<Compile Include="PackageStatistics.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="ToolStatistics.cs" />
Expand Down
2 changes: 1 addition & 1 deletion src/Stats.ImportAzureCdnStatistics/Job.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public override async Task Run()
foreach (var leasedLogFile in leasedLogFiles)
{
var packageTranslator = new PackageTranslator("packagetranslations.json");
var packageStatisticsParser = new PackageStatisticsParser(packageTranslator);
var packageStatisticsParser = new PackageStatisticsParser(packageTranslator, LoggerFactory);
await logProcessor.ProcessLogFileAsync(leasedLogFile, packageStatisticsParser, _aggregatesOnly);

if (_aggregatesOnly)
Expand Down
90 changes: 90 additions & 0 deletions src/Stats.ImportAzureCdnStatistics/PackageDefinition.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using NuGet.Versioning;

namespace Stats.ImportAzureCdnStatistics
{
public class PackageDefinition
{
private const string _nupkgExtension = ".nupkg";
private const string _dotSeparator = ".";

public string PackageId { get; set; }
public string PackageVersion { get; set; }

public PackageDefinition()
{
}

private PackageDefinition(string packageId, string packageVersion)
{
PackageId = packageId.Trim();
PackageVersion = packageVersion.Trim();
}

public static IList<PackageDefinition> FromRequestUrl(string requestUrl)
{
if (string.IsNullOrWhiteSpace(requestUrl) || !requestUrl.EndsWith(_nupkgExtension, StringComparison.InvariantCultureIgnoreCase))
{
return null;
}

List<PackageDefinition> resolutionOptions = new List<PackageDefinition>();

requestUrl = HttpUtility.UrlDecode(requestUrl);

var urlSegments = requestUrl.Split(new[] { '/' }, StringSplitOptions.RemoveEmptyEntries);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is a flat container URL, you can use the n-3 and n-2 segments as well to avoid any ambiguity.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Was surprised that this wasn't done up until now.


var fileName = urlSegments.Last();

fileName = fileName.Remove(fileName.Length - _nupkgExtension.Length, _nupkgExtension.Length);

// Special handling for flat container
if (urlSegments.Length > 3)
{
var packageIdContainer = urlSegments[urlSegments.Length - 3];
var packageVersionContainer = urlSegments[urlSegments.Length - 2];

if (string.Equals(fileName, $"{packageIdContainer}.{packageVersionContainer}", StringComparison.InvariantCultureIgnoreCase))
{
resolutionOptions.Add(new PackageDefinition(packageIdContainer, packageVersionContainer));
}
}

if (!resolutionOptions.Any())
{
var nextDotIndex = fileName.IndexOf('.');

while (nextDotIndex != -1)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure you can write this as a do while and remove the redundancy between

var nextDotIndex = fileName.IndexOf('.');

and

nextDotIndex = fileName.IndexOf('.', nextDotIndex + 1);

e.g.

var nextDotIndex = -1;
do
{
    nextDotIndex = fileName.IndexOf('.', nextDotIndex + 1);
    ... rest of the code ...
} while (nextDotIndex != -1)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think it makes a big difference. Will keep as is.

{
string packagePart = fileName.Substring(0, nextDotIndex);
string versionPart = fileName.Substring(nextDotIndex + 1);

if (NuGetVersion.TryParse(versionPart, out var parsedVersion))
{
var normalizedVersion = parsedVersion.ToNormalizedString();

if (string.Equals(normalizedVersion, versionPart, StringComparison.InvariantCultureIgnoreCase))
{
resolutionOptions.Add(new PackageDefinition(packagePart, versionPart));
}
}

nextDotIndex = fileName.IndexOf('.', nextDotIndex + 1);
}
}

return resolutionOptions;
}

public override string ToString()
{
return $"[{PackageId}, {PackageVersion}]";
}
}
}
39 changes: 33 additions & 6 deletions src/Stats.ImportAzureCdnStatistics/PackageStatisticsParser.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using NuGet;
using System;
using System.Linq;
using Microsoft.Extensions.Logging;
using NuGet.Versioning;
using Stats.AzureCdnLogs.Common;

Expand All @@ -10,25 +12,50 @@ namespace Stats.ImportAzureCdnStatistics
public class PackageStatisticsParser
: StatisticsParser, IPackageStatisticsParser
{
private readonly ILogger<PackageStatisticsParser> _logger;
private readonly PackageTranslator _packageTranslator;

public PackageStatisticsParser(PackageTranslator packageTranslator)
public PackageStatisticsParser(PackageTranslator packageTranslator, ILoggerFactory loggerFactory)
{
if (loggerFactory == null)
{
throw new ArgumentNullException(nameof(loggerFactory));
}

_packageTranslator = packageTranslator;
_logger = loggerFactory.CreateLogger<PackageStatisticsParser>();
}

public PackageStatistics FromCdnLogEntry(CdnLogEntry cdnLogEntry)
{
var packageDefinition = PackageDefinition.FromRequestUrl(cdnLogEntry.RequestUrl);
var packageDefinitions = PackageDefinition.FromRequestUrl(cdnLogEntry.RequestUrl);

if (packageDefinition == null)
if (packageDefinitions == null || !packageDefinitions.Any())
{
return null;
}

if (packageDefinitions.Count > 1)
{
_logger.LogWarning(LogEvents.MultiplePackageIDVersionParseOptions,
"Found multiple parse options for URL {RequestUrl}: {PackageDefinitions}",
cdnLogEntry.RequestUrl,
string.Join(", ", packageDefinitions));
}

var packageDefinition = packageDefinitions.First();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we assume the first here? Could all of them be run against the translator?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Translator could potentially de-dupe some PackageDefinitions and then get rid of the need to warn.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you look at the cases the translator is fixing, those are the cases when a package id ends with [dot][Number].
For example:
"incorrectpackageid": "xrmlibrary.extensionmethods",
"incorrectpackageversionpattern": "2016.(.*)",
"correctedpackageid": "xrmlibrary.extensionmethods.2016",
"correctedpackageversionpattern": "$1"

in order for this to work, the incorrect package id should be without a number. This is exactly what you get in the first result of the new algorithm, meaning, any other check will be redundant and impact perf.


if (_packageTranslator != null)
{
packageDefinition = _packageTranslator.TranslatePackageDefinition(packageDefinition);
bool translateOccured = _packageTranslator.TryTranslatePackageDefinition(packageDefinition);

if (translateOccured)
{
_logger.LogInformation(LogEvents.TranslatedPackageIdVersion,
"Translated package. Url: {RequestUrl}, New definition: {PackageDefinition}",
cdnLogEntry.RequestUrl,
packageDefinition);
}
}

var statistic = new PackageStatistics();
Expand All @@ -42,7 +69,7 @@ public PackageStatistics FromCdnLogEntry(CdnLogEntry cdnLogEntry)
statistic.UserAgent = GetUserAgentValue(cdnLogEntry);
statistic.EdgeServerIpAddress = cdnLogEntry.EdgeServerIpAddress;

// ignore blacklisted user agents
// Ignore blacklisted user agents
if (!IsBlackListed(statistic.UserAgent))
{
return statistic;
Expand Down
8 changes: 5 additions & 3 deletions src/Stats.ImportAzureCdnStatistics/PackageTranslator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
using System.IO;
using System.Text.RegularExpressions;
using Newtonsoft.Json.Linq;
using Stats.AzureCdnLogs.Common;

namespace Stats.ImportAzureCdnStatistics
{
Expand Down Expand Up @@ -45,8 +44,10 @@ public PackageTranslator(string packageTranslationsJsonPath)
}
}

public PackageDefinition TranslatePackageDefinition(PackageDefinition packageDefinition)
public bool TryTranslatePackageDefinition(PackageDefinition packageDefinition)
{
bool translateOccurred = false;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for this variable. Just return true where you set translateOccurred to true and return false at the end of the function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer a method to have a single return statement. Makes the code more readable.


if (packageDefinition != null
&& !string.IsNullOrEmpty(packageDefinition.PackageId)
&& !string.IsNullOrEmpty(packageDefinition.PackageVersion)
Expand All @@ -63,12 +64,13 @@ public PackageDefinition TranslatePackageDefinition(PackageDefinition packageDef
{
packageDefinition.PackageId = potentialTranslation.CorrectedPackageId;
packageDefinition.PackageVersion = correctedPackageVersion;
translateOccurred = true;
break;
}
}
}

return packageDefinition;
return translateOccurred;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
<Reference Include="System.Spatial, Version=5.7.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35, processorArchitecture=MSIL">
<HintPath>..\..\packages\System.Spatial.5.7.0\lib\net40\System.Spatial.dll</HintPath>
</Reference>
<Reference Include="System.Web" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
Expand All @@ -138,6 +139,7 @@
<Compile Include="ApplicationInsightsHelper.cs" />
<Compile Include="IPackageStatisticsParser.cs" />
<Compile Include="IStatisticsBlobContainerUtility.cs" />
<Compile Include="PackageDefinition.cs" />
<Compile Include="StatisticsBlobContainerUtility.cs" />
<Compile Include="Dimensions\IpAddressFact.cs" />
<Compile Include="IStatisticsWarehouse.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class LogFileProcessorFacts
new PackageTranslator("packageTranslations.json");

private static readonly IPackageStatisticsParser _packageStatisticsParser =
new PackageStatisticsParser(_packageTranslator);
new PackageStatisticsParser(_packageTranslator, new LoggerFactory());

public class WhenOnlyPackageStatisticsInLogFile
{
Expand Down
Loading