diff --git a/.github/workflows/parity-test.yml b/.github/workflows/parity-test.yml index 5d43789..24d7ef3 100644 --- a/.github/workflows/parity-test.yml +++ b/.github/workflows/parity-test.yml @@ -60,3 +60,26 @@ jobs: $dotnetPinget = Resolve-Path 'dotnet/src/Devolutions.Pinget.Cli/bin/Release/net10.0/pinget.exe' $pingetModule = Resolve-Path 'dist/powershell-module/Devolutions.Pinget.Client/Devolutions.Pinget.Client.psd1' pwsh -NoLogo -NoProfile -File (Resolve-Path 'scripts/Parity-Compare-WingetParity.ps1') -RustWinget $rustPinget -DotnetWinget $dotnetPinget -PowerShellModulePath $pingetModule + + - name: Run upgrade-parity diff + shell: pwsh + # CI runners have very few user-installed apps so the row counts are + # small; the value here is regression-catching: a winget table-format + # change or a break in pinget's `upgrade --output json` schema will + # surface as a parse failure before users notice. The fixture is + # uploaded as an artifact so cross-machine corpora can be assembled + # from real machines later. + run: | + $rustPinget = Resolve-Path 'rust/target/debug/pinget.exe' + pwsh -NoLogo -NoProfile -File (Resolve-Path 'scripts/Test-UpgradeParity.ps1') ` + -Pinget $rustPinget ` + -FixturePath 'parity-upgrade-fixture.json' ` + -FailOnDiff + + - name: Upload parity fixture + if: always() + uses: actions/upload-artifact@v4 + with: + name: parity-upgrade-fixture + path: parity-upgrade-fixture.json + if-no-files-found: ignore diff --git a/dotnet/src/Devolutions.Pinget.Core.Tests/CoreTests.cs b/dotnet/src/Devolutions.Pinget.Core.Tests/CoreTests.cs index 9f858a8..05cc958 100644 --- a/dotnet/src/Devolutions.Pinget.Core.Tests/CoreTests.cs +++ b/dotnet/src/Devolutions.Pinget.Core.Tests/CoreTests.cs @@ -1360,11 +1360,14 @@ public void CorrelateInstalledPackage_PrefersAnchoredCandidateOverWordFragment() } [Fact] - public void CorrelateInstalledPackage_MsixCorrelatesByName() - { - // Previously hard-skipped via `LocalId.StartsWith("MSIX\\")` → null, which - // prevented obvious MSIX updates (Microsoft.Teams etc.) from ever surfacing. - // Name-based correlation now runs uniformly. + public void CorrelateInstalledPackage_MsixPackagesDoNotCorrelateViaName() + { + // MSIX correlation must go through the v2 index's `pfns2` table — + // name fallback is wrong because two MSIX packages can legitimately + // share a display name without sharing identity (Microsoft Edge + // Stable MSIX vs the catalog Microsoft.Edge MSI; Notepad++ Store + // stub MSIX vs the catalog Inno installer). The PFN lookup happens + // earlier in CorrelateInstalledViaIndex. var installed = new InstalledPackage { Name = "Microsoft Teams", @@ -1388,9 +1391,539 @@ public void CorrelateInstalledPackage_MsixCorrelatesByName() }, }; - var correlated = Repository.CorrelateInstalledPackage(installed, candidates, loose: true); - Assert.NotNull(correlated); - Assert.Equal("Microsoft.Teams", correlated!.Id); + Assert.Null(Repository.CorrelateInstalledPackage(installed, candidates, loose: true)); + } + + [Fact] + public void CorrelateInstalledPackage_RefusesAmbiguousWinners() + { + // Two catalog packages both expose name "Git" (Git.Git and + // Microsoft.Git). Without publisher disambiguation they score + // identically; winget refuses to correlate (the install lists with + // empty Source). pinget must do the same to avoid manufacturing an + // upgrade against the wrong catalog package. + var installed = new InstalledPackage + { + Name = "Git", + LocalId = @"ARP\Machine\X64\Git_is1", + InstalledVersion = "2.53.0", + Publisher = "The Git Development Community", + Scope = "Machine", + InstallerCategory = "exe", + PackageFamilyNames = [], + ProductCodes = [], + UpgradeCodes = [], + }; + var candidates = new List + { + new() { SourceName = "winget", SourceKind = SourceKind.PreIndexed, Id = "Git.Git", Name = "Git", Version = "2.54.0" }, + new() { SourceName = "winget", SourceKind = SourceKind.PreIndexed, Id = "Microsoft.Git", Name = "Git", Version = "2.53.0.0.7" }, + }; + + Assert.Null(Repository.CorrelateInstalledPackage(installed, candidates, loose: true)); + } + + [Fact] + public void MapArpVersionToCatalog_ReturnsCatalogVersionInsideRange() + { + // .NET SDK 10.0.108 declares its ARP DisplayVersion is + // `10.1.826.23019`. Without this mapping the upgrade was silently + // dropped because compare_version says `10.1.x > 10.0.108`. + var entries = new List + { + new() { Version = "10.0.300", ArpMinVersion = "10.3.26.23102", ArpMaxVersion = "10.3.26.23102" }, + new() { Version = "10.0.108", ArpMinVersion = "10.1.826.23019", ArpMaxVersion = "10.1.826.23019" }, + new() { Version = "10.0.107", ArpMinVersion = "10.1.726.21808", ArpMaxVersion = "10.1.726.21808" }, + }; + + Assert.Equal("10.0.108", Repository.MapArpVersionToCatalog(entries, "10.1.826.23019")); + } + + [Fact] + public void MapArpVersionToCatalog_ReturnsNullWhenNoRangeMatches() + { + var entries = new List + { + new() { Version = "10.0.300", ArpMinVersion = "10.3.26.23102", ArpMaxVersion = "10.3.26.23102" }, + }; + Assert.Null(Repository.MapArpVersionToCatalog(entries, "40.10.18029")); + Assert.Null(Repository.MapArpVersionToCatalog(entries, "Unknown")); + Assert.Null(Repository.MapArpVersionToCatalog(entries, "")); + } + + [Fact] + public void LatestArpAnchoredVersion_SkipsInternalRows() + { + // Microsoft.WindowsAppRuntime.1.8 publishes both an internal build + // version (`8000.836.2153.0`, no ARP bounds) and user-facing + // versions (`1.8.6`, `1.8.5`, …). The internal row shouldn't win. + var entries = new List + { + new() { Version = "8000.836.2153.0" }, + new() { Version = "1.8.6", ArpMinVersion = "8000.806.2252.0", ArpMaxVersion = "8000.806.2252.0" }, + new() { Version = "1.8.5", ArpMinVersion = "8000.770.947.0", ArpMaxVersion = "8000.770.947.0" }, + }; + Assert.Equal("1.8.6", Repository.LatestArpAnchoredVersion(entries)); + } + + [Fact] + public void LatestArpAnchoredVersion_ReturnsNullWhenNoBounds() + { + var entries = new List + { + new() { Version = "1.28.240.0" }, + new() { Version = "1.27.470.0" }, + }; + Assert.Null(Repository.LatestArpAnchoredVersion(entries)); + } + + [Fact] + public void NormalizePublisher_Microsoft_Corporation_Strips_To_Microsoft() + { + // Fixture observed in the live catalog's norm_publishers2. + Assert.Equal("microsoft", NameNormalization.NormalizePublisher("Microsoft Corporation")); + } + + [Fact] + public void NormalizePublisher_JetBrains_Sro_Strips_To_JetBrains() + { + Assert.Equal("jetbrains", NameNormalization.NormalizePublisher("JetBrains s.r.o.")); + } + + [Fact] + public void NormalizePublisher_Without_LegalSuffix_Keeps_All_Tokens() + { + // "The Git Development Community" has no recognized legal-entity + // suffix, so all tokens stay — matches the live catalog row. + Assert.Equal( + "thegitdevelopmentcommunity", + NameNormalization.NormalizePublisher("The Git Development Community")); + } + + [Fact] + public void NormalizePublisher_Strips_Common_Suffixes() + { + Assert.Equal("foo", NameNormalization.NormalizePublisher("Foo Inc")); + Assert.Equal("foobar", NameNormalization.NormalizePublisher("Foo Bar LLC")); + Assert.Equal("foo", NameNormalization.NormalizePublisher("Foo GmbH")); + } + + [Fact] + public void NormalizeName_Strips_VersionDelimited_Token() + { + // `2025.3.0.1` matches VersionDelimited. Bare `2026` doesn't. + Assert.Equal("jetbrainsrider", NameNormalization.NormalizeName("JetBrains Rider 2025.3.0.1").Name); + Assert.Equal( + "visualstudioprofessional2026", + NameNormalization.NormalizeName("Visual Studio Professional 2026").Name); + } + + [Fact] + public void NormalizeName_Strips_Architecture_Suffix() + { + var r = NameNormalization.NormalizeName("PowerToys (Preview) x64"); + Assert.Equal("powertoys", r.Name); + Assert.Equal(NameNormalization.Architecture.X64, r.Architecture); + } + + [Fact] + public void NormalizeName_Strips_Known_Locale() + { + var r = NameNormalization.NormalizeName("Foo en-US Edition"); + Assert.Equal("en-us", r.Locale); + } + + [Fact] + public void NormalizeName_Keeps_Unknown_Locale_Shaped_Tokens() + { + var r = NameNormalization.NormalizeName("Foo XY-AB"); + Assert.Equal(string.Empty, r.Locale); + } + + [Fact] + public void NormalizeName_Strips_Parens_Content() + { + Assert.Equal("foo", NameNormalization.NormalizeName("Foo (beta)").Name); + } + + [Fact] + public void NormalizeName_Microsoft_Edge_Matches_Catalog() + { + Assert.Equal("microsoftedge", NameNormalization.NormalizeName("Microsoft Edge").Name); + } + + [Fact] + public void NormalizeName_Keeps_Year_Only_Suffix() + { + Assert.Equal("foo2026", NameNormalization.NormalizeName("Foo 2026").Name); + } + + [Fact] + public void Manifest_Parses_RequireExplicitUpgrade_AtTopLevel() + { + // Top-level RequireExplicitUpgrade flag propagates to + // Manifest.RequireExplicitUpgrade. winget catalogs put the flag + // here for browser packages and self-updating apps that opt out + // of bulk `upgrade`. + var yaml = @" +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +DefaultLocale: en-US +ManifestType: singleton +ManifestVersion: 1.10.0 +PackageLocale: en-US +PackageName: Test Package +Publisher: Example +License: MIT +ShortDescription: explicit-upgrade fixture +RequireExplicitUpgrade: true +Installers: + - Architecture: x64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.exe + InstallerSha256: ABC123 +"; + var manifest = Repository.ParseYamlManifest(System.Text.Encoding.UTF8.GetBytes(yaml)); + Assert.True(manifest.RequireExplicitUpgrade); + } + + [Fact] + public void Manifest_Parses_RequireExplicitUpgrade_OnInstaller() + { + // Per-installer flag — only one of several installers declares + // it, but the Manifest aggregate is still true because the user + // could pick that installer when upgrading. + var yaml = @" +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +DefaultLocale: en-US +ManifestType: singleton +ManifestVersion: 1.10.0 +PackageLocale: en-US +PackageName: Test Package +Publisher: Example +License: MIT +ShortDescription: explicit-upgrade fixture +Installers: + - Architecture: x64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.x64.exe + InstallerSha256: ABC123 + - Architecture: arm64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.arm64.exe + InstallerSha256: DEF456 + RequireExplicitUpgrade: true +"; + var manifest = Repository.ParseYamlManifest(System.Text.Encoding.UTF8.GetBytes(yaml)); + Assert.True(manifest.RequireExplicitUpgrade); + Assert.False(manifest.Installers[0].RequireExplicitUpgrade); + Assert.True(manifest.Installers[1].RequireExplicitUpgrade); + } + + [Fact] + public void Manifest_WithoutRequireExplicitUpgrade_DefaultsToFalse() + { + var yaml = @" +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +DefaultLocale: en-US +ManifestType: singleton +ManifestVersion: 1.10.0 +PackageLocale: en-US +PackageName: Test Package +Publisher: Example +License: MIT +ShortDescription: baseline fixture +Installers: + - Architecture: x64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.exe + InstallerSha256: ABC123 +"; + var manifest = Repository.ParseYamlManifest(System.Text.Encoding.UTF8.GetBytes(yaml)); + Assert.False(manifest.RequireExplicitUpgrade); + } + + [Fact] + public void LookupUniqueNormalizedIdentity_ReturnsUniqueMatch() + { + using var connection = new Microsoft.Data.Sqlite.SqliteConnection("Data Source=:memory:"); + connection.Open(); + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = @" + CREATE TABLE norm_names2 (norm_name TEXT, package INT64); + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64); + INSERT INTO norm_names2 VALUES ('microsoftedge', 100); + INSERT INTO norm_publishers2 VALUES ('microsoft', 100);"; + cmd.ExecuteNonQuery(); + } + + var rowid = Repository.LookupUniqueNormalizedIdentityForTesting(connection, "microsoftedge", "microsoft"); + Assert.Equal(100L, rowid); + } + + [Fact] + public void LookupUniqueNormalizedIdentity_RejectsAmbiguousMatch() + { + // Two distinct packages share the same (norm_name, norm_publisher) + // — winget refuses to correlate when it can't disambiguate. + using var connection = new Microsoft.Data.Sqlite.SqliteConnection("Data Source=:memory:"); + connection.Open(); + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = @" + CREATE TABLE norm_names2 (norm_name TEXT, package INT64); + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64); + INSERT INTO norm_names2 VALUES ('git', 100), ('git', 200); + INSERT INTO norm_publishers2 VALUES ('thegitdevelopmentcommunity', 100), ('thegitdevelopmentcommunity', 200);"; + cmd.ExecuteNonQuery(); + } + + var rowid = Repository.LookupUniqueNormalizedIdentityForTesting(connection, "git", "thegitdevelopmentcommunity"); + Assert.Null(rowid); + } + + [Fact] + public void LookupUniqueNormalizedIdentity_RequiresPublisherIntersect() + { + // norm_name has multiple matches; only one shares the publisher + // with the installed package. Intersect picks the right one. + using var connection = new Microsoft.Data.Sqlite.SqliteConnection("Data Source=:memory:"); + connection.Open(); + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = @" + CREATE TABLE norm_names2 (norm_name TEXT, package INT64); + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64); + INSERT INTO norm_names2 VALUES ('git', 100), ('git', 200); + INSERT INTO norm_publishers2 VALUES ('thegitdevelopmentcommunity', 100), ('microsoft', 200);"; + cmd.ExecuteNonQuery(); + } + + var rowid = Repository.LookupUniqueNormalizedIdentityForTesting(connection, "git", "thegitdevelopmentcommunity"); + Assert.Equal(100L, rowid); + } + + [Fact] + public void LookupUniqueNormalizedIdentity_MissesWhenPublisherDoesNotMatch() + { + using var connection = new Microsoft.Data.Sqlite.SqliteConnection("Data Source=:memory:"); + connection.Open(); + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = @" + CREATE TABLE norm_names2 (norm_name TEXT, package INT64); + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64); + INSERT INTO norm_names2 VALUES ('foo', 100); + INSERT INTO norm_publishers2 VALUES ('bar', 200);"; + cmd.ExecuteNonQuery(); + } + + var rowid = Repository.LookupUniqueNormalizedIdentityForTesting(connection, "foo", "bar"); + Assert.Null(rowid); + } + + [Fact] + public void UpgradeFilter_HidesRequireExplicitUpgrade_ByDefault() + { + // winget hides RequireExplicitUpgrade rows from bulk `upgrade` + // (Edge, Steam, Discord). pinget must do the same. + var pkg = new InstalledPackage + { + Name = "Edge", + LocalId = @"ARP\Machine\X64\Edge", + InstalledVersion = "100.0", + Scope = "Machine", + InstallerCategory = "exe", + Correlated = new SearchMatch + { + SourceName = "winget", + SourceKind = SourceKind.PreIndexed, + Id = "Microsoft.Edge", + Name = "Microsoft Edge", + Version = "110.0", + }, + CorrelatedRequiresExplicitUpgrade = true, + }; + + var bulkQuery = new ListQuery { UpgradeOnly = true }; + Assert.False( + Repository.InstalledPackageMatchesUpgradeFilterForTesting(pkg, bulkQuery), + "RequireExplicitUpgrade row must be hidden from bulk upgrade"); + + // When the user explicitly targets it by id, winget shows it. + var filteredQuery = new ListQuery { UpgradeOnly = true, Id = "Microsoft.Edge" }; + Assert.True( + Repository.InstalledPackageMatchesUpgradeFilterForTesting(pkg, filteredQuery), + "RequireExplicitUpgrade row must surface when the user filters for it"); + + // Without the flag, the row appears in bulk upgrade. + pkg.CorrelatedRequiresExplicitUpgrade = false; + Assert.True(Repository.InstalledPackageMatchesUpgradeFilterForTesting(pkg, bulkQuery)); + } + + [Fact] + public void ApplyMsixResourceStringNameFix_ResolvesPlaceholderToCatalogName() + { + // App Installer's MSIX manifest stores DisplayName as + // `ms-resource:appDisplayName`. Once we correlate it via PFN, we + // know the catalog calls it "App Installer" — show that instead of + // the unresolved placeholder, matching winget's output. + var package = new InstalledPackage + { + Name = "ms-resource:appDisplayName", + LocalId = @"MSIX\Microsoft.DesktopAppInstaller_1.28.239.0_arm64__8wekyb3d8bbwe", + InstalledVersion = "1.28.239.0", + Scope = "User", + InstallerCategory = "msix", + PackageFamilyNames = ["Microsoft.DesktopAppInstaller_8wekyb3d8bbwe"], + Correlated = new SearchMatch + { + SourceName = "winget", + SourceKind = SourceKind.PreIndexed, + Id = "Microsoft.AppInstaller", + Name = "App Installer", + Version = "1.28.240.0", + MatchCriteria = "PackageFamilyName", + }, + }; + Repository.ApplyMsixResourceStringNameFix(package); + Assert.Equal("App Installer", package.Name); + } + + [Fact] + public void ApplyMsixResourceStringNameFix_NoopForNonMsix() + { + // The fix is gated on LocalId starting with "MSIX\\" so an unusual + // ARP DisplayName that happens to contain "ms-resource:" doesn't + // get silently rewritten. + var package = new InstalledPackage + { + Name = "ms-resource:appDisplayName", + LocalId = @"ARP\Machine\X64\{deadbeef}", + InstalledVersion = "1.0", + Scope = "Machine", + InstallerCategory = "msi", + Correlated = new SearchMatch + { + SourceName = "winget", + SourceKind = SourceKind.PreIndexed, + Id = "Some.Package", + Name = "Should Not Apply", + Version = "1.0", + }, + }; + Repository.ApplyMsixResourceStringNameFix(package); + Assert.Equal("ms-resource:appDisplayName", package.Name); + } + + [Fact] + public void ApplyMsixResourceStringNameFix_SkipsResolvedNames() + { + // MSIX entries with already-resolved names must not be touched — + // installed Name and catalog Name may legitimately differ. + var package = new InstalledPackage + { + Name = "Microsoft Teams", + LocalId = @"MSIX\MSTeams_25290.205.4069.4894_arm64__8wekyb3d8bbwe", + InstalledVersion = "25290.205.4069.4894", + Scope = "User", + InstallerCategory = "msix", + PackageFamilyNames = ["MSTeams_8wekyb3d8bbwe"], + Correlated = new SearchMatch + { + SourceName = "winget", + SourceKind = SourceKind.PreIndexed, + Id = "Microsoft.Teams", + Name = "Microsoft Teams Catalog Name", + Version = "26106.1906.4665.7308", + MatchCriteria = "PackageFamilyName", + }, + }; + Repository.ApplyMsixResourceStringNameFix(package); + Assert.Equal("Microsoft Teams", package.Name); + } + + [Fact] + public void UnflipPackedGuid_ReversesMsiInstallerPacking() + { + // Verified against the live Installer hive: the user's installed + // Node.js ProductCode `{9292CBD9-...}` packs to + // `9DBC2929593B4D2488740C8E00C4F652`. + Assert.Equal( + "{9292cbd9-b395-42d4-8847-c0e8004c6f25}", + InstalledPackages.UnflipPackedGuid("9DBC2929593B4D2488740C8E00C4F652")); + Assert.Equal( + "{47c07a3a-42ef-4213-a85d-8f5a59077c28}", + InstalledPackages.UnflipPackedGuid("A3A70C74FE2431248AD5F8A59570C782")); + Assert.Null(InstalledPackages.UnflipPackedGuid("nothex")); + Assert.Null(InstalledPackages.UnflipPackedGuid("9DBC2929593B4D2488740C8E00C4F65")); + Assert.Null(InstalledPackages.UnflipPackedGuid("ZZZZZZZZ593B4D2488740C8E00C4F652")); + } + + [Fact] + public void DedupeCorrelatedForUpgrade_PrefersCanonicalRowOverRawArp() + { + // VS-installed .NET SDK has ARP `40.10.18029` (no canonical mapping) + // while the proper install has `10.0.108` (canonical). Without the + // canonical preference, compare_version picks the wrong row and the + // upgrade disappears. + var raw = InstalledWithCorrelation("Microsoft.DotNet.SDK.10", "40.10.18029", canonical: false); + var canonical = InstalledWithCorrelation("Microsoft.DotNet.SDK.10", "10.0.108", canonical: true); + + var result = Repository.DedupeCorrelatedForUpgrade([raw, canonical]); + Assert.Single(result); + Assert.Equal("10.0.108", result[0].InstalledVersion); + Assert.True(result[0].InstalledVersionCanonical); + } + + [Fact] + public void DedupeCorrelatedForUpgrade_KeepsHighestAmongCanonical() + { + var lower = InstalledWithCorrelation("Microsoft.WindowsAppRuntime.1.7", "1.7.7", canonical: true); + var higher = InstalledWithCorrelation("Microsoft.WindowsAppRuntime.1.7", "1.7.9", canonical: true); + + var result = Repository.DedupeCorrelatedForUpgrade([lower, higher]); + Assert.Single(result); + Assert.Equal("1.7.9", result[0].InstalledVersion); + } + + [Fact] + public void DedupeCorrelatedForUpgrade_LeavesUncorrelatedAlone() + { + var uncorrelated = new InstalledPackage + { + Name = "Foo", + LocalId = @"ARP\Machine\X64\Foo", + InstalledVersion = "1.0", + Scope = "Machine", + InstallerCategory = "exe", + }; + var result = Repository.DedupeCorrelatedForUpgrade([uncorrelated]); + Assert.Single(result); + } + + private static InstalledPackage InstalledWithCorrelation(string id, string installedVersion, bool canonical) + { + return new InstalledPackage + { + Name = $"{id} install", + LocalId = $@"ARP\Machine\X64\{id}", + InstalledVersion = installedVersion, + Scope = "Machine", + InstallerCategory = "msi", + Correlated = new SearchMatch + { + SourceName = "winget", + SourceKind = SourceKind.PreIndexed, + Id = id, + Name = id, + Version = "99.0.0", + }, + InstalledVersionCanonical = canonical, + }; } [Fact] diff --git a/dotnet/src/Devolutions.Pinget.Core/InstalledPackages.cs b/dotnet/src/Devolutions.Pinget.Core/InstalledPackages.cs index 9e11e77..8b59cef 100644 --- a/dotnet/src/Devolutions.Pinget.Core/InstalledPackages.cs +++ b/dotnet/src/Devolutions.Pinget.Core/InstalledPackages.cs @@ -16,11 +16,21 @@ public static List Collect(string? scope) bool machine = !string.Equals(scope, "user", StringComparison.OrdinalIgnoreCase); bool user = !string.Equals(scope, "machine", StringComparison.OrdinalIgnoreCase); + // MSI registers UpgradeCode → ProductCode mappings under + // HKLM\SOFTWARE\Classes\Installer\UpgradeCodes (per-machine) and + // HKCU\Software\Microsoft\Installer\UpgradeCodes (per-user). Most + // ARP entries don't expose UpgradeCode directly, so winget reads it + // from here. Without it, packages like OpenJS.NodeJS.22 (correlated + // only via UpgradeCode in the v2 index) fall back to a sibling + // correlation (e.g. OpenJS.NodeJS.LTS) that manufactures a spurious + // upgrade. + var upgradeCodes = CollectMsiUpgradeCodes(machine, user); + if (machine) { - CollectArpPackages(packages, seen, Microsoft.Win32.RegistryHive.LocalMachine, "Machine", "X64", + CollectArpPackages(packages, seen, upgradeCodes, Microsoft.Win32.RegistryHive.LocalMachine, "Machine", "X64", Microsoft.Win32.RegistryView.Registry64); - CollectArpPackages(packages, seen, Microsoft.Win32.RegistryHive.LocalMachine, "Machine", "X86", + CollectArpPackages(packages, seen, upgradeCodes, Microsoft.Win32.RegistryHive.LocalMachine, "Machine", "X86", Microsoft.Win32.RegistryView.Registry32); CollectAppModelPackages(packages, seen, Microsoft.Win32.RegistryHive.LocalMachine, "Machine", Microsoft.Win32.RegistryView.Registry64); @@ -28,7 +38,7 @@ public static List Collect(string? scope) if (user) { - CollectArpPackages(packages, seen, Microsoft.Win32.RegistryHive.CurrentUser, "User", "X64", + CollectArpPackages(packages, seen, upgradeCodes, Microsoft.Win32.RegistryHive.CurrentUser, "User", "X64", Microsoft.Win32.RegistryView.Registry64); CollectAppModelPackages(packages, seen, Microsoft.Win32.RegistryHive.CurrentUser, "User", Microsoft.Win32.RegistryView.Registry64); @@ -37,9 +47,102 @@ public static List Collect(string? scope) return packages; } + /// + /// Builds a ProductCode → UpgradeCode map (both in standard + /// {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} lowercase format) by walking + /// the MSI Installer registry. Subkeys under Installer\UpgradeCodes are + /// named by the flipped UpgradeCode GUID, and each subkey's value names + /// are the flipped ProductCodes registered under that UpgradeCode. + /// + [SupportedOSPlatform("windows")] + private static Dictionary CollectMsiUpgradeCodes(bool includeMachine, bool includeUser) + { + var map = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (includeMachine) + { + CollectMsiUpgradeCodesFrom( + Microsoft.Win32.RegistryHive.LocalMachine, + @"SOFTWARE\Classes\Installer\UpgradeCodes", + Microsoft.Win32.RegistryView.Registry64, + map); + } + if (includeUser) + { + CollectMsiUpgradeCodesFrom( + Microsoft.Win32.RegistryHive.CurrentUser, + @"Software\Microsoft\Installer\UpgradeCodes", + Microsoft.Win32.RegistryView.Registry64, + map); + } + return map; + } + + [SupportedOSPlatform("windows")] + private static void CollectMsiUpgradeCodesFrom( + Microsoft.Win32.RegistryHive hive, string path, Microsoft.Win32.RegistryView view, + Dictionary map) + { + try + { + using var baseKey = Microsoft.Win32.RegistryKey.OpenBaseKey(hive, view); + using var root = baseKey.OpenSubKey(path); + if (root is null) return; + foreach (var keyName in root.GetSubKeyNames()) + { + var upgradeCode = UnflipPackedGuid(keyName); + if (upgradeCode is null) continue; + using var subkey = root.OpenSubKey(keyName); + if (subkey is null) continue; + foreach (var valueName in subkey.GetValueNames()) + { + var productCode = UnflipPackedGuid(valueName); + if (productCode is null) continue; + // First mapping wins — keeps the map deterministic if + // the same ProductCode is registered under multiple + // UpgradeCodes (rare, but possible for repackaged MSIs). + map.TryAdd(productCode, upgradeCode); + } + } + } + catch + { + // Installer hive missing or unreadable; identity correlation + // falls back to ProductCode-only and the harness will flag a + // mismatch if it matters. + } + } + + /// + /// Converts the 32-character "packed GUID" used inside the MSI Installer + /// registry hive back to the standard + /// {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} lowercase form. The packing + /// reverses each of the 11 chunks (sized 8/4/4 then eight 2-char byte + /// pairs) of the GUID's hex representation. + /// + internal static string? UnflipPackedGuid(string packed) + { + if (packed.Length != 32) return null; + foreach (var c in packed) + { + if (!Uri.IsHexDigit(c)) return null; + } + int[] chunks = [8, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2]; + var sb = new System.Text.StringBuilder(32); + int offset = 0; + foreach (var size in chunks) + { + for (int i = size - 1; i >= 0; i--) + sb.Append(packed[offset + i]); + offset += size; + } + var r = sb.ToString().ToLowerInvariant(); + return $"{{{r[..8]}-{r[8..12]}-{r[12..16]}-{r[16..20]}-{r[20..32]}}}"; + } + [SupportedOSPlatform("windows")] private static void CollectArpPackages( List packages, HashSet seen, + Dictionary upgradeCodeMap, Microsoft.Win32.RegistryHive hive, string scopeLabel, string archLabel, Microsoft.Win32.RegistryView view) { @@ -108,6 +211,23 @@ private static void CollectArpPackages( var upgradeCodes = new List(); if (!string.IsNullOrWhiteSpace(upgradeCode)) upgradeCodes.Add(upgradeCode); + if (upgradeCodes.Count == 0) + { + // ARP rarely exposes UpgradeCode directly. Recover + // it from the MSI Installer registry hive so + // identity correlation can fall back to + // upgradecodes2 when productcodes2 picks the wrong + // sibling (Node.js 24.x being attributed to + // OpenJS.NodeJS.LTS rather than OpenJS.NodeJS.22). + foreach (var code in productCodes) + { + if (upgradeCodeMap.TryGetValue(code, out var uc)) + { + upgradeCodes.Add(uc); + break; + } + } + } packages.Add(new InstalledPackage { diff --git a/dotnet/src/Devolutions.Pinget.Core/Models.cs b/dotnet/src/Devolutions.Pinget.Core/Models.cs index fc8dfb5..d730efd 100644 --- a/dotnet/src/Devolutions.Pinget.Core/Models.cs +++ b/dotnet/src/Devolutions.Pinget.Core/Models.cs @@ -202,6 +202,7 @@ public record Installer public InstallerSwitches Switches { get; init; } = new(); public List Commands { get; init; } = []; public List PackageDependencies { get; init; } = []; + public bool RequireExplicitUpgrade { get; init; } } public record InstallerSwitches @@ -265,6 +266,11 @@ public record Manifest public List PackageDependencies { get; init; } = []; public List Documentation { get; init; } = []; public List Installers { get; init; } = []; + // `RequireExplicitUpgrade: true` opts a package out of bulk + // `pinget upgrade` output (winget parity). Users can still upgrade by + // explicit id. Set at top-level or per-installer; treated as true + // when any installer asserts it. + public bool RequireExplicitUpgrade { get; init; } } public record InstallRequest @@ -495,9 +501,16 @@ public record InstallResult // Internal type for installed package tracking internal record InstalledPackage { - public required string Name { get; init; } + // Mutable so the post-correlation enrichment pass can resolve MSIX + // `ms-resource:` placeholder names to the catalog's display name — + // matching winget's behavior of pulling these through + // Windows.Management.Deployment. + public required string Name { get; set; } public required string LocalId { get; init; } - public required string InstalledVersion { get; init; } + // Mutable so identity correlation can rewrite ARP DisplayVersion to the + // catalog Version it maps to via versionData.mszyml's aMiV/aMaV ranges. + // Keeps comparisons against catalog versions on a common scale. + public required string InstalledVersion { get; set; } public string? Publisher { get; init; } public string? Scope { get; init; } public string? InstallerCategory { get; init; } @@ -506,6 +519,15 @@ internal record InstalledPackage public List ProductCodes { get; init; } = []; public List UpgradeCodes { get; init; } = []; public SearchMatch? Correlated { get; set; } + // True when `InstalledVersion` was remapped to a catalog Version. Used by + // dedupe so canonical-versioned rows beat raw-ARP rows for the same id + // (e.g. `Microsoft.DotNet.SDK.10` 10.0.108 wins over a VS-installed + // 40.10.18029 that doesn't fit any aMiV bucket). + public bool InstalledVersionCanonical { get; set; } + // True when the correlated catalog package's latest version sets + // RequireExplicitUpgrade: true. winget hides those rows from bulk + // `upgrade`; we mirror that. Users can still upgrade by explicit id. + public bool CorrelatedRequiresExplicitUpgrade { get; set; } } internal enum SearchSemantics diff --git a/dotnet/src/Devolutions.Pinget.Core/NameNormalization.cs b/dotnet/src/Devolutions.Pinget.Core/NameNormalization.cs new file mode 100644 index 0000000..6065778 --- /dev/null +++ b/dotnet/src/Devolutions.Pinget.Core/NameNormalization.cs @@ -0,0 +1,322 @@ +using System.Text.RegularExpressions; + +namespace Devolutions.Pinget.Core; + +/// +/// Pinget port of winget-cli's NameNormalization.cpp (the "Initial" +/// version). Produces the same normalized name + publisher strings that +/// winget stores in the catalog's norm_names2 / norm_publishers2 +/// tables for an ARP entry's DisplayName and Publisher. +/// +/// Without this, identity correlation only succeeds when the installed +/// display name happens to match the catalog's PackageName after our naive +/// alphanumeric normalization — winget can match many more entries because +/// it strips version-like tokens, locales, architectures, and legal-entity +/// suffixes before comparing. This class reproduces those transformations +/// so we can correlate the same set of ARP rows winget does. +/// +internal static class NameNormalization +{ + internal enum Architecture + { + Unknown, + X86, + X64, + } + + internal readonly record struct NormalizedName(string Name, Architecture Architecture, string Locale); + + /// + /// Normalizes an ARP DisplayName the same way winget's NameNormalizer + /// (Initial version, preserveWhiteSpace = false) does — producing the + /// string that ends up in the catalog's norm_names2 table. + /// + public static NormalizedName NormalizeName(string value) + { + var name = PrepareForValidation(value); + while (Unwrap(ref name)) { } + + // SAP Business Object program names follow a specific pattern that + // breaks under the regular flow; winget short-circuits them. + if (SapPackage.IsMatch(name)) + { + return new NormalizedName(name, Architecture.Unknown, string.Empty); + } + + var architecture = RemoveArchitecture(ref name); + var locale = RemoveLocale(ref name); + + // Preserve KB numbers from within parens before the bracket strippers + // would eat them — winget keeps `KB1234567` as part of the normalized + // name because it's the only meaningful identifier on some patches. + name = KbNumbers.Replace(name, "$1"); + + while (RemoveAll(ProgramNameRegexes, ref name)) { } + + var tokens = SplitWithLegalSuffixExclusion(ProgramNameSplit, name, stopOnExclusion: false); + name = string.Concat(tokens); + name = NonLettersAndDigits.Replace(name, string.Empty); + + return new NormalizedName(name.ToLowerInvariant(), architecture, locale.ToLowerInvariant()); + } + + /// + /// Normalizes a publisher string. Strips the same set of patterns as + /// the name path plus splits on word boundaries with the + /// legal-entity-suffix list — so "Microsoft Corporation" → "microsoft", + /// "JetBrains s.r.o." → "jetbrains", but "The Git Development Community" + /// stays intact because no token in it is a recognized suffix. + /// + public static string NormalizePublisher(string value) + { + var publisher = PrepareForValidation(value); + while (Unwrap(ref publisher)) { } + + while (RemoveAll(PublisherNameRegexes, ref publisher)) { } + + // Publisher split stops at the FIRST legal-entity suffix it sees + // (after the first token), so "Foo Inc Internal Sub Bar" keeps just + // "Foo" — "Inc" cuts off everything beyond. + var tokens = SplitWithLegalSuffixExclusion(PublisherNameSplit, publisher, stopOnExclusion: true); + publisher = string.Concat(tokens); + publisher = NonLettersAndDigits.Replace(publisher, string.Empty); + return publisher.ToLowerInvariant(); + } + + // ── Internal helpers ────────────────────────────────────────────────── + + private static string PrepareForValidation(string value) + { + var s = value.Trim(); + // winget supports an `@@`-delimited suffix on internal display names + // that should be stripped before normalization — keep parity even + // though it's unusual in the wild. + var idx = s.IndexOf("@@", StringComparison.Ordinal); + if (idx >= 3) + { + s = s[..idx]; + } + return s; + } + + private static bool Unwrap(ref string value) + { + if (value.Length < 2) return false; + var first = value[0]; + var last = value[^1]; + var wrapped = first switch + { + '"' => last == '"', + '(' => last == ')', + _ => false, + }; + if (!wrapped) return false; + value = value[1..^1]; + return true; + } + + private static bool Remove(Regex re, ref string value) + { + var replaced = re.Replace(value, string.Empty); + if (replaced == value) return false; + value = replaced; + return true; + } + + private static bool RemoveAll(IReadOnlyList regexes, ref string value) + { + var changed = false; + foreach (var re in regexes) + { + if (Remove(re, ref value)) changed = true; + } + return changed; + } + + private static Architecture RemoveArchitecture(ref string value) + { + // Order matters: "32/64-bit" is a superstring of "64-bit"; "X64"/ + // "AMD64" must beat "X32"/"X86" because of "x86-64". + if (Remove(Architecture32Or64Bit, ref value)) + return Architecture.Unknown; + if (Remove(ArchitectureX64, ref value) || Remove(Architecture64Bit, ref value)) + return Architecture.X64; + if (Remove(ArchitectureX32, ref value) || Remove(Architecture32Bit, ref value)) + return Architecture.X86; + return Architecture.Unknown; + } + + private static string RemoveLocale(ref string value) + { + var matches = Locale.Matches(value); + if (matches.Count == 0) return string.Empty; + + var newValue = new System.Text.StringBuilder(value.Length); + string? localeFound = null; + var lastEnd = 0; + + foreach (Match m in matches) + { + var folded = m.Value.ToUpperInvariant(); + var isKnown = Array.BinarySearch(Locales, folded) >= 0; + newValue.Append(value, lastEnd, m.Index - lastEnd); + if (!isKnown) + { + newValue.Append(m.Value); + } + else if (localeFound is null) + { + localeFound = folded; + } + else if (!string.Equals(localeFound, folded, StringComparison.Ordinal)) + { + // Multiple distinct locales: keep only if the language matches. + var existingLang = localeFound.Split('-')[0]; + var newLang = folded.Split('-')[0]; + if (!string.Equals(existingLang, newLang, StringComparison.Ordinal)) + { + localeFound = string.Empty; + } + } + lastEnd = m.Index + m.Length; + } + newValue.Append(value, lastEnd, value.Length - lastEnd); + value = newValue.ToString(); + return localeFound ?? string.Empty; + } + + private static List SplitWithLegalSuffixExclusion(Regex re, string value, bool stopOnExclusion) + { + var result = new List(); + var lastEnd = 0; + + bool PushSegment(string segment) + { + var trimmed = segment.Trim(); + if (trimmed.Length == 0) return true; + var folded = trimmed.ToUpperInvariant(); + if (result.Count > 0 && Array.BinarySearch(LegalEntitySuffixes, folded) >= 0) + { + return !stopOnExclusion; + } + result.Add(trimmed); + return true; + } + + foreach (Match m in re.Matches(value)) + { + var segment = value.Substring(lastEnd, m.Index - lastEnd); + if (!PushSegment(segment)) return result; + lastEnd = m.Index + m.Length; + } + PushSegment(value[lastEnd..]); + return result; + } + + // ── Regex patterns. .NET regex supports variable-length lookbehind + // directly, so the C++ patterns port over unchanged. ───────────────── + + private const RegexOptions Opts = RegexOptions.IgnoreCase | RegexOptions.Compiled; + + private static readonly Regex ArchitectureX32 = + new(@"(?<=^|[^\p{L}\p{Nd}])(X32|X86)(?=\P{Nd}|$)(?:\sEDITION)?", Opts); + private static readonly Regex ArchitectureX64 = + new(@"(?<=^|[^\p{L}\p{Nd}])(X64|AMD64|X86([\p{Pd}\p{Pc}]64))(?=\P{Nd}|$)(?:\sEDITION)?", Opts); + private static readonly Regex Architecture32Bit = + new(@"(?<=^|[^\p{L}\p{Nd}])(32[\p{Pd}\p{Pc}\p{Z}]?BIT)S?(?:\sEDITION)?", Opts); + private static readonly Regex Architecture64Bit = + new(@"(?<=^|[^\p{L}\p{Nd}])(64[\p{Pd}\p{Pc}\p{Z}]?BIT)S?(?:\sEDITION)?", Opts); + private static readonly Regex Architecture32Or64Bit = + new(@"(?<=^|[^\p{L}\p{Nd}])((64[\\/]32|32[\\/]64)[\p{Pd}\p{Pc}\p{Z}]?BIT)S?(?:\sEDITION)?", Opts); + + private static readonly Regex Locale = + new(@"(? m.Display).ToList(); foreach (var pkg in installed) + { + if (pkg.Correlated is not null) continue; pkg.Correlated = CorrelateInstalledPackage(pkg, candidates, AllowLooseListCorrelation(query)); + } } else if (needsAvailable) { warnings.AddRange(CorrelateAllInstalled(installed)); } + if (needsAvailable) + { + // Enrich rows that correlated via the name-based fallback. Those + // rows skipped the v2-index aMiV/aMaV pass that runs inside + // CorrelateInstalledViaIndex, so they still carry the raw ARP + // DisplayVersion (e.g. `JetBrains Rider 2025.3.0.1` showing + // build `253.28294.112` instead of the marketing `2025.3.0.1`). + // Also resolves MSIX placeholder names like + // `ms-resource:appDisplayName` to the catalog's display name — + // winget pulls these through Windows.Management.Deployment; the + // catalog name is the parity-faithful substitute pinget has + // available without taking a WinRT dependency. + warnings.AddRange(EnrichCorrelatedViaIndex(installed, query.Source)); + } + + // For `upgrade`, collapse multiple installed entries that map to the + // same catalog package id (side-by-side .NET SDKs, several runtime + // versions of WindowsAppRuntime, the MSIX shim alongside an ARP + // install of Edge, etc.). Keep the entry with the highest + // installed_version so the upgrade comparison runs against the + // user's newest install — matches winget's one-row-per-id output. + if (query.UpgradeOnly) + installed = DedupeCorrelatedForUpgrade(installed); + var filtered = installed .Where(p => ListPackageMatches(p, query) && (!query.UpgradeOnly || InstalledPackageMatchesUpgradeFilter(p, query))) @@ -1536,6 +1582,7 @@ List InstArr(string key) Switches = switches, Commands = InstArr("Commands"), PackageDependencies = InstArr("PackageDependencies"), + RequireExplicitUpgrade = ReadBool(instDict, "RequireExplicitUpgrade"), }); } } @@ -1555,6 +1602,9 @@ List InstArr(string key) } } + var topLevelRequireExplicit = ReadBool(dict, "RequireExplicitUpgrade"); + var anyInstallerRequireExplicit = installers.Any(i => i.RequireExplicitUpgrade); + return new Manifest { Id = GetStr("PackageIdentifier"), @@ -1580,6 +1630,29 @@ List InstArr(string key) Documentation = docs, Installers = installers, PackageDependencies = dependencies, + RequireExplicitUpgrade = topLevelRequireExplicit || anyInstallerRequireExplicit, + }; + } + + private static bool ReadBool(IDictionary dict, string key) + { + if (!dict.TryGetValue(key, out var raw) || raw is null) return false; + return raw switch + { + bool b => b, + string s => s.Equals("true", StringComparison.OrdinalIgnoreCase), + _ => false, + }; + } + + private static bool ReadBool(IDictionary dict, string key) + { + if (!dict.TryGetValue(key, out var raw) || raw is null) return false; + return raw switch + { + bool b => b, + string s => s.Equals("true", StringComparison.OrdinalIgnoreCase), + _ => false, }; } @@ -1770,28 +1843,37 @@ private static InstallerSwitches ReadInstallerSwitchesObject(object? switchesObj private List CorrelateAllInstalled(List installed) { + // Skip work entirely when every entry was resolved by identity. + if (installed.All(p => p.Correlated is not null)) + return []; var allQuery = new PackageQuery { Count = 100_000 }; var (matches, warnings, _, _) = SearchLocated(allQuery, SearchSemantics.Many); var candidates = matches.Select(m => m.Display).ToList(); foreach (var pkg in installed) + { + if (pkg.Correlated is not null) continue; pkg.Correlated = CorrelateInstalledPackage(pkg, candidates, true); + } return warnings; } internal static SearchMatch? CorrelateInstalledPackage(InstalledPackage pkg, List candidates, bool loose) { - // Note: MSIX packages used to be hard-skipped here, but that prevented obvious - // correlations like `Microsoft Teams` (MSIX) → `Microsoft.Teams` (catalog). - // Name-based correlation now applies uniformly; MSIX entries whose installed - // name is an unresolved resource string (e.g. `ms-resource:appDisplayName`) - // simply fail to match and return null, same as before. + // MSIX entries can only be correlated through their PackageFamilyName. + // That lookup happens earlier in CorrelateInstalledViaIndex; if it + // didn't find a match, name-based fallback is wrong — the catalog + // doesn't carry this MSIX's identity, so any name collision (e.g. the + // self-updating `Microsoft Edge Stable` MSIX or the Store stub + // `Notepad++` MSIX) would manufacture a phantom correlation that + // winget doesn't make. + if (pkg.LocalId.StartsWith(@"MSIX\", StringComparison.OrdinalIgnoreCase)) + return null; var installedName = NormalizeCorrelationName(pkg.Name); var installedNameLower = pkg.Name.ToLowerInvariant(); var candidateNames = CorrelationNameCandidates(pkg.Name); - SearchMatch? best = null; - int bestScore = 0; + var scored = new List<(int Score, SearchMatch Match)>(); foreach (var candidate in candidates) { var candidateNorm = NormalizeCorrelationName(candidate.Name); @@ -1828,10 +1910,545 @@ private List CorrelateAllInstalled(List installed) else prefixBonus = 0; - var score = baseScore + prefixBonus; - if (score > bestScore) { bestScore = score; best = candidate; } + scored.Add((baseScore + prefixBonus, candidate)); + } + + if (scored.Count == 0) return null; + + scored.Sort((a, b) => b.Score.CompareTo(a.Score)); + var (topScore, topMatch) = scored[0]; + // Reject ambiguous wins. When the user has e.g. `Git` (publisher + // "The Git Development Community") installed, both `Git.Git` and + // `Microsoft.Git` normalize their names to `git` and tie at the same + // score — the only signal that disambiguates them, publisher, isn't + // part of this scoring function. winget refuses the correlation in + // this case (the install lists with empty Source); we do the same to + // avoid manufacturing a fake upgrade against the wrong package. + if (scored.Count > 1 && scored[1].Score == topScore) + return null; + return topMatch; + } + + // ── Identity correlation against the v2 index ───────────────────────── + + /// + /// Looks up a v2 catalog package whose identity (PackageFamilyName, + /// ProductCode, or UpgradeCode) matches the installed package. Returns + /// the packages.rowid and the field that produced the match. UpgradeCode + /// is tried *before* ProductCode because it's the MSI family identity — + /// stable across versions — while ProductCode changes every release. For + /// installs whose ProductCode happens to also be carried under a sibling + /// catalog package (e.g. `OpenJS.NodeJS.LTS` retains old LTS-line + /// ProductCodes while the live install actually belongs to + /// `OpenJS.NodeJS.22` via its UpgradeCode), the upgrade-side match + /// represents the user's package family — the one winget uses. + /// + private static (long Rowid, string MatchedBy)? LookupIdentityMatchV2( + Microsoft.Data.Sqlite.SqliteConnection conn, InstalledPackage pkg) + { + foreach (var pfn in pkg.PackageFamilyNames) + { + var rowid = QueryOptionalLong(conn, + "SELECT package FROM pfns2 WHERE pfn = @v LIMIT 1", + pfn.ToLowerInvariant()); + if (rowid is long r) return (r, "PackageFamilyName"); + } + foreach (var code in pkg.UpgradeCodes) + { + var rowid = QueryOptionalLong(conn, + "SELECT package FROM upgradecodes2 WHERE upgradecode = @v LIMIT 1", + code.ToLowerInvariant()); + if (rowid is long r) return (r, "UpgradeCode"); + } + foreach (var code in pkg.ProductCodes) + { + var rowid = QueryOptionalLong(conn, + "SELECT package FROM productcodes2 WHERE productcode = @v LIMIT 1", + code.ToLowerInvariant()); + if (rowid is long r) return (r, "ProductCode"); + } + return null; + } + + private static long? QueryOptionalLong(Microsoft.Data.Sqlite.SqliteConnection conn, string sql, string value) + { + using var cmd = conn.CreateCommand(); + cmd.CommandText = sql; + cmd.Parameters.AddWithValue("@v", value); + var result = cmd.ExecuteScalar(); + if (result is null || result is DBNull) return null; + return Convert.ToInt64(result); + } + + private static bool V2IdentityTablesPresent(Microsoft.Data.Sqlite.SqliteConnection conn) + { + using var cmd = conn.CreateCommand(); + cmd.CommandText = "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'productcodes2' LIMIT 1"; + return cmd.ExecuteScalar() is not null; + } + + private record V2PackageMetadata(long Rowid, string Id, string Name, string? Moniker, string LatestVersion, string PackageHash); + + private static V2PackageMetadata? FetchV2PackageMetadata(Microsoft.Data.Sqlite.SqliteConnection conn, long packageRowid) + { + using var cmd = conn.CreateCommand(); + cmd.CommandText = "SELECT id, name, moniker, latest_version, hash FROM packages WHERE rowid = @rowid LIMIT 1"; + cmd.Parameters.AddWithValue("@rowid", packageRowid); + using var reader = cmd.ExecuteReader(); + if (!reader.Read()) return null; + var hashValue = reader.GetValue(4); + var hash = hashValue is byte[] blob + ? Convert.ToHexString(blob).ToLowerInvariant() + : (hashValue?.ToString() ?? string.Empty).ToLowerInvariant(); + return new V2PackageMetadata( + packageRowid, + reader.GetString(0), + reader.GetString(1), + reader.IsDBNull(2) ? null : reader.GetString(2), + reader.IsDBNull(3) ? string.Empty : reader.GetString(3), + hash); + } + + /// + /// Translates an ARP DisplayVersion to the catalog Version it maps to. + /// For packages like the .NET SDK or Python the ARP version is an MSI + /// build number (e.g. `10.1.826.23019`) unrelated to the public version + /// (`10.0.108`). winget records the ARP range per catalog version in + /// versionData.mszyml as `aMiV` / `aMaV`; we look for the entry whose + /// range contains the installed version. + /// + internal static string? MapArpVersionToCatalog(IReadOnlyList entries, string arpVersion) + { + if (string.IsNullOrEmpty(arpVersion) || arpVersion.Equals("Unknown", StringComparison.OrdinalIgnoreCase)) + return null; + foreach (var entry in entries) + { + if (entry.ArpMinVersion is null || entry.ArpMaxVersion is null) continue; + if (RestSource.CompareVersionStrings(arpVersion, entry.ArpMinVersion) >= 0 && + RestSource.CompareVersionStrings(arpVersion, entry.ArpMaxVersion) <= 0) + { + return entry.Version; + } + } + return null; + } + + /// + /// Returns the latest catalog Version that carries ARP-range metadata. + /// Packages like `Microsoft.WindowsAppRuntime.1.8` also publish "internal" + /// version rows whose `v` is an MSI build number (`8000.836.2153.0`) + /// without ARP bounds — those aren't user-facing upgrade targets. When + /// the installed side was matched through an ARP range, the available + /// side must be a peer that also exposes ARP bounds so the two versions + /// compare on the same scale. Returns null when no entry has bounds. + /// + internal static string? LatestArpAnchoredVersion(IReadOnlyList entries) + { + PreIndexedSource.V2VersionDataEntry? best = null; + foreach (var entry in entries) + { + if (entry.ArpMinVersion is null || entry.ArpMaxVersion is null) continue; + if (best is null || RestSource.CompareVersionStrings(entry.Version, best.Version) > 0) + best = entry; + } + return best?.Version; + } + + /// + /// Correlates installed packages against the v2 pre-indexed catalog + /// using PackageFamilyName / ProductCode / UpgradeCode lookups — winget's + /// authoritative correlation path. Also rewrites `InstalledVersion` to + /// the catalog Version whose ARP range covers the ARP DisplayVersion, so + /// packages whose ARP version is an MSI build number compare against a + /// meaningful catalog version. Packages without any identifier or whose + /// identifiers don't match a catalog entry are left untouched for the + /// name-based fallback. + /// + private List CorrelateInstalledViaIndex(List installed, string? requestedSource) + { + var warnings = new List(); + for (int sourceIndex = 0; sourceIndex < _store.Sources.Count; sourceIndex++) + { + var source = _store.Sources[sourceIndex]; + if (source.Kind != SourceKind.PreIndexed) continue; + if (requestedSource is not null && !source.Name.Equals(requestedSource, StringComparison.OrdinalIgnoreCase)) + continue; + + Microsoft.Data.Sqlite.SqliteConnection conn; + try { conn = OpenPreindexedConnection(sourceIndex); } + catch { continue; } + + var resolved = new List<(int InstalledIndex, V2PackageMetadata Meta, string MatchedBy)>(); + using (conn) + { + if (!V2IdentityTablesPresent(conn)) continue; + for (int i = 0; i < installed.Count; i++) + { + if (installed[i].Correlated is not null) continue; + var hit = LookupIdentityMatchV2(conn, installed[i]); + if (hit is null) continue; + var meta = FetchV2PackageMetadata(conn, hit.Value.Rowid); + if (meta is null) continue; + resolved.Add((i, meta, hit.Value.MatchedBy)); + } + } + + foreach (var (idx, meta, matchedBy) in resolved) + { + string? canonicalInstalled = null; + string? anchoredLatest = null; + try + { + using var conn2 = OpenPreindexedConnection(sourceIndex); + var (entries, _) = PreIndexedSource.LoadV2VersionData(_client, conn2, source, meta.Rowid, meta.PackageHash, _appRoot); + canonicalInstalled = MapArpVersionToCatalog(entries, installed[idx].InstalledVersion); + anchoredLatest = LatestArpAnchoredVersion(entries); + } + catch { /* version data fetch is best-effort; fall back below */ } + + // When the installed side was rebased to a catalog Version + // via aMiV/aMaV, the available side must compare on the same + // scale: prefer the latest ARP-anchored Version over + // `latest_version`, which can be an internal/MSI build + // number that would manufacture a phantom upgrade against + // an installed canonical. + var availableVersion = (canonicalInstalled is not null && anchoredLatest is not null) + ? anchoredLatest + : meta.LatestVersion; + + installed[idx].Correlated = new SearchMatch + { + SourceName = source.Name, + SourceKind = source.Kind, + Id = meta.Id, + Name = meta.Name, + Moniker = meta.Moniker, + Version = availableVersion, + MatchCriteria = matchedBy, + }; + if (canonicalInstalled is not null) + { + installed[idx].InstalledVersion = canonicalInstalled; + installed[idx].InstalledVersionCanonical = true; + } + } + } + return warnings; + } + + /// + /// Mid-tier correlation that mirrors winget's ARP normalization path. + /// For each installed package without a PFN/PC/UC identity match, the + /// (DisplayName, Publisher) is run through NameNormalization and + /// intersected against the v2 index's norm_names2 and norm_publishers2 + /// tables. A single resulting package_rowid produces a correlation; + /// ambiguous matches are skipped, matching winget's empty-Source + /// behavior when normalization can't pick a winner. + /// + private List CorrelateInstalledByNormalizedIdentity(List installed, string? requestedSource) + { + var warnings = new List(); + for (int sourceIndex = 0; sourceIndex < _store.Sources.Count; sourceIndex++) + { + var source = _store.Sources[sourceIndex]; + if (source.Kind != SourceKind.PreIndexed) continue; + if (requestedSource is not null && !source.Name.Equals(requestedSource, StringComparison.OrdinalIgnoreCase)) + continue; + + Microsoft.Data.Sqlite.SqliteConnection conn; + try { conn = OpenPreindexedConnection(sourceIndex); } + catch { continue; } + + var resolved = new List<(int Index, V2PackageMetadata Meta)>(); + using (conn) + { + if (!V2NormalizedIdentityTablesPresent(conn)) continue; + for (int i = 0; i < installed.Count; i++) + { + var pkg = installed[i]; + if (pkg.Correlated is not null) continue; + // MSIX entries belong to the PFN-only path; matches + // `CorrelateInstalledPackage`'s MSIX hard-block. + if (pkg.LocalId.StartsWith(@"MSIX\", StringComparison.OrdinalIgnoreCase)) continue; + if (string.IsNullOrWhiteSpace(pkg.Publisher)) continue; + + var normName = NameNormalization.NormalizeName(pkg.Name).Name; + if (string.IsNullOrEmpty(normName)) continue; + var normPub = NameNormalization.NormalizePublisher(pkg.Publisher); + if (string.IsNullOrEmpty(normPub)) continue; + + var rowid = LookupUniqueNormalizedIdentity(conn, normName, normPub); + if (rowid is null) continue; + var meta = FetchV2PackageMetadata(conn, rowid.Value); + if (meta is null) continue; + resolved.Add((i, meta)); + } + } + + foreach (var (idx, meta) in resolved) + { + string? canonicalInstalled = null; + string? anchoredLatest = null; + try + { + using var conn2 = OpenPreindexedConnection(sourceIndex); + var (entries, _) = PreIndexedSource.LoadV2VersionData(_client, conn2, source, meta.Rowid, meta.PackageHash, _appRoot); + canonicalInstalled = MapArpVersionToCatalog(entries, installed[idx].InstalledVersion); + anchoredLatest = LatestArpAnchoredVersion(entries); + } + catch { /* best-effort */ } + + var availableVersion = (canonicalInstalled is not null && anchoredLatest is not null) + ? anchoredLatest + : meta.LatestVersion; + + installed[idx].Correlated = new SearchMatch + { + SourceName = source.Name, + SourceKind = source.Kind, + Id = meta.Id, + Name = meta.Name, + Moniker = meta.Moniker, + Version = availableVersion, + MatchCriteria = "NormalizedNameAndPublisher", + }; + if (canonicalInstalled is not null) + { + installed[idx].InstalledVersion = canonicalInstalled; + installed[idx].InstalledVersionCanonical = true; + } + } } - return best; + return warnings; + } + + private static bool V2NormalizedIdentityTablesPresent(Microsoft.Data.Sqlite.SqliteConnection conn) + { + using var cmd = conn.CreateCommand(); + cmd.CommandText = "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'norm_names2' LIMIT 1"; + return cmd.ExecuteScalar() is not null; + } + + /// + /// Returns the catalog packages.rowid whose (norm_name, norm_publisher) + /// pair matches the installed package — if and only if the match is + /// unique. Multiple matches mean normalization couldn't disambiguate + /// (an ARP DisplayName that normalizes the same as two distinct catalog + /// packages); winget refuses to correlate and we do the same. + /// + // Test-only entry points. Keep these internal so the assembly's + // InternalsVisibleTo grant exposes them just to Core.Tests. + internal static long? LookupUniqueNormalizedIdentityForTesting(Microsoft.Data.Sqlite.SqliteConnection conn, string normName, string normPublisher) + => LookupUniqueNormalizedIdentity(conn, normName, normPublisher); + + internal static bool InstalledPackageMatchesUpgradeFilterForTesting(InstalledPackage pkg, ListQuery query) + => InstalledPackageMatchesUpgradeFilter(pkg, query); + + private static long? LookupUniqueNormalizedIdentity(Microsoft.Data.Sqlite.SqliteConnection conn, string normName, string normPublisher) + { + using var cmd = conn.CreateCommand(); + cmd.CommandText = @" + SELECT DISTINCT n.package FROM norm_names2 n + WHERE n.norm_name = @name + AND EXISTS (SELECT 1 FROM norm_publishers2 p WHERE p.package = n.package AND p.norm_publisher = @pub) + LIMIT 2"; + cmd.Parameters.AddWithValue("@name", normName); + cmd.Parameters.AddWithValue("@pub", normPublisher); + using var reader = cmd.ExecuteReader(); + if (!reader.Read()) return null; + var first = reader.GetInt64(0); + if (reader.Read()) return null; // ambiguous + return first; + } + + /// + /// Resolves a v2 package by its catalog id back to its rowid + hash so + /// the post-correlation enrichment pass can load versionData for rows + /// that correlated through the name-based fallback (which throws the + /// rowid away). + /// + private static V2PackageMetadata? LookupV2MetadataById(Microsoft.Data.Sqlite.SqliteConnection conn, string packageId) + { + using var cmd = conn.CreateCommand(); + cmd.CommandText = "SELECT rowid, id, name, moniker, latest_version, hash FROM packages WHERE id = @id COLLATE NOCASE LIMIT 1"; + cmd.Parameters.AddWithValue("@id", packageId); + using var reader = cmd.ExecuteReader(); + if (!reader.Read()) return null; + var hashValue = reader.GetValue(5); + var hash = hashValue is byte[] blob + ? Convert.ToHexString(blob).ToLowerInvariant() + : (hashValue?.ToString() ?? string.Empty).ToLowerInvariant(); + return new V2PackageMetadata( + reader.GetInt64(0), + reader.GetString(1), + reader.GetString(2), + reader.IsDBNull(3) ? null : reader.GetString(3), + reader.IsDBNull(4) ? string.Empty : reader.GetString(4), + hash); + } + + /// + /// MSIX entries whose Windows resource-string display name (e.g. + /// ms-resource:appDisplayName) hasn't been resolved by the AppX + /// runtime would otherwise leak that placeholder into pinget's output. + /// winget resolves it via Windows.Management.Deployment and shows the + /// catalog's PackageName; we mirror that by substituting the correlated + /// catalog name once the package is correlated, which is the + /// parity-faithful substitute we can produce without taking a WinRT + /// dependency. + /// + internal static void ApplyMsixResourceStringNameFix(InstalledPackage package) + { + if (!package.LocalId.StartsWith(@"MSIX\", StringComparison.OrdinalIgnoreCase)) return; + if (!package.Name.StartsWith("ms-resource:", StringComparison.OrdinalIgnoreCase)) return; + var candidate = package.Correlated; + if (candidate is null) return; + if (!string.IsNullOrEmpty(candidate.Name)) package.Name = candidate.Name; + } + + /// + /// Runs after both correlation paths to enrich already-correlated rows + /// with information that the identity path applies inline but that + /// name-based correlation skips: the MSIX-placeholder name fix and the + /// aMiV/aMaV version remap. Without this, packages without a + /// ProductCode/PFN/UpgradeCode (e.g. JetBrains.Rider) keep showing the + /// raw ARP DisplayVersion (`253.28294.112`) instead of the catalog + /// Version (`2025.3.0.1`). + /// + private List EnrichCorrelatedViaIndex(List installed, string? requestedSource) + { + // Cheap pass first — no SQL required. + foreach (var pkg in installed) + ApplyMsixResourceStringNameFix(pkg); + + // Expensive pass — load versionData for rows whose installed_version + // wasn't remapped earlier. + var warnings = new List(); + for (int sourceIndex = 0; sourceIndex < _store.Sources.Count; sourceIndex++) + { + var source = _store.Sources[sourceIndex]; + if (source.Kind != SourceKind.PreIndexed) continue; + if (requestedSource is not null && !source.Name.Equals(requestedSource, StringComparison.OrdinalIgnoreCase)) + continue; + + Microsoft.Data.Sqlite.SqliteConnection conn; + try { conn = OpenPreindexedConnection(sourceIndex); } + catch { continue; } + + var needs = new List<(int Index, V2PackageMetadata Meta)>(); + using (conn) + { + if (!V2IdentityTablesPresent(conn)) continue; + for (int i = 0; i < installed.Count; i++) + { + var pkg = installed[i]; + if (pkg.Correlated is null) continue; + // Stay within the source the row was correlated to — + // looking up `JetBrains.Rider` in a different catalog + // would either miss or pull the wrong package's range. + if (!pkg.Correlated.SourceName.Equals(source.Name, StringComparison.OrdinalIgnoreCase)) + continue; + var meta = LookupV2MetadataById(conn, pkg.Correlated.Id); + if (meta is null) continue; + needs.Add((i, meta)); + } + } + + foreach (var (idx, meta) in needs) + { + // Load versionData once: needed for both the aMiV/aMaV + // remap (if installed_version isn't canonical yet) and to + // locate the latest manifest for RequireExplicitUpgrade. + List? entries = null; + try + { + using var conn2 = OpenPreindexedConnection(sourceIndex); + var (loaded, _) = PreIndexedSource.LoadV2VersionData(_client, conn2, source, meta.Rowid, meta.PackageHash, _appRoot); + entries = loaded; + } + catch { /* best-effort */ } + + if (entries is null) continue; + + if (!installed[idx].InstalledVersionCanonical) + { + var canonicalInstalled = MapArpVersionToCatalog(entries, installed[idx].InstalledVersion); + var anchoredLatest = LatestArpAnchoredVersion(entries); + if (canonicalInstalled is not null) + { + installed[idx].InstalledVersion = canonicalInstalled; + installed[idx].InstalledVersionCanonical = true; + if (installed[idx].Correlated is SearchMatch correlated && anchoredLatest is not null) + installed[idx].Correlated = correlated with { Version = anchoredLatest }; + } + } + + // RequireExplicitUpgrade is on the latest version's + // installer manifest. winget hides those from bulk + // `upgrade` (Edge, Steam, Discord). Peek at the first + // versionData entry (latest by sort order) and read its + // manifest to mirror that filtering. + var latest = entries.Count > 0 ? entries[0] : null; + if (latest is not null) + { + try + { + var bytes = PreIndexedSource.GetCachedSourceFile( + _client, "V2_M", source, latest.ManifestRelativePath, latest.ManifestHash); + var manifest = ParseYamlManifest(bytes); + installed[idx].CorrelatedRequiresExplicitUpgrade = manifest.RequireExplicitUpgrade; + } + catch { /* best-effort */ } + } + } + } + return warnings; + } + + /// + /// Collapses installed entries that correlate to the same catalog package + /// id (case-insensitive within a source) down to a single representative + /// — the one with the highest installed version, preferring rows whose + /// version was successfully remapped via aMiV/aMaV. Uncorrelated entries + /// pass through untouched (they can't appear in upgrade output anyway). + /// + internal static List DedupeCorrelatedForUpgrade(List packages) + { + var byId = new Dictionary<(string Id, string Source), InstalledPackage>(); + var uncorrelated = new List(); + foreach (var pkg in packages) + { + if (pkg.Correlated is null) + { + uncorrelated.Add(pkg); + continue; + } + var key = ( + pkg.Correlated.Id.ToLowerInvariant(), + pkg.Correlated.SourceName.ToLowerInvariant() + ); + if (!byId.TryGetValue(key, out var existing)) + { + byId[key] = pkg; + continue; + } + // Prefer rows whose ARP version was mapped to a catalog Version. + // Comparing a raw ARP DisplayVersion like `40.10.18029` against + // the catalog's `10.0.300` is meaningless and would also hide + // the row that legitimately upgrades to it. + bool keepNew; + if (pkg.InstalledVersionCanonical && !existing.InstalledVersionCanonical) + keepNew = true; + else if (!pkg.InstalledVersionCanonical && existing.InstalledVersionCanonical) + keepNew = false; + else + keepNew = RestSource.CompareVersionStrings(pkg.InstalledVersion, existing.InstalledVersion) > 0; + if (keepNew) byId[key] = pkg; + } + + var result = new List(uncorrelated); + result.AddRange(byId.Values); + return result; } private static List CorrelationNameCandidates(string name) @@ -1903,9 +2520,18 @@ private static bool ListPackageMatches(InstalledPackage pkg, ListQuery query) return true; } - private static bool InstalledPackageMatchesUpgradeFilter(InstalledPackage pkg, ListQuery query) => - InstalledPackageHasUpgrade(pkg) || - (query.IncludeUnknown && InstalledPackageHasUnknownVersion(pkg) && pkg.Correlated is not null); + private static bool InstalledPackageMatchesUpgradeFilter(InstalledPackage pkg, ListQuery query) + { + // Hide RequireExplicitUpgrade packages from bulk `upgrade` output to + // match winget (Edge, Steam, Discord, several MSIX packages). When + // the user explicitly filtered by id/name/etc., they're targeting + // a specific package and want to see it regardless — same allowance + // winget makes. + if (pkg.CorrelatedRequiresExplicitUpgrade && !ListQueryNeedsAvailableLookup(query)) + return false; + return InstalledPackageHasUpgrade(pkg) + || (query.IncludeUnknown && InstalledPackageHasUnknownVersion(pkg) && pkg.Correlated is not null); + } internal static PinRecord? FindApplicablePin(ListMatch match, IReadOnlyList pins) { diff --git a/rust/crates/pinget-core/Cargo.toml b/rust/crates/pinget-core/Cargo.toml index d711c51..adb23fa 100644 --- a/rust/crates/pinget-core/Cargo.toml +++ b/rust/crates/pinget-core/Cargo.toml @@ -15,6 +15,11 @@ anyhow = "1.0.102" chrono = { version = "0.4.44", features = ["serde"] } dirs = "6.0.0" flate2 = "1.1.9" +# fancy-regex (not the `regex` crate) because winget's NameNormalization +# patterns rely on lookbehind / lookahead — RE2's algorithm doesn't support +# those, but fancy-regex's backtracking engine does. Performance is fine for +# the one-shot normalize call per installed package. +fancy-regex = "0.16" reqwest = { version = "0.13.2", default-features = false, features = ["blocking", "json", "query", "rustls"] } serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.149" diff --git a/rust/crates/pinget-core/src/lib.rs b/rust/crates/pinget-core/src/lib.rs index 37fc207..34fca7a 100644 --- a/rust/crates/pinget-core/src/lib.rs +++ b/rust/crates/pinget-core/src/lib.rs @@ -1,3 +1,5 @@ +mod name_normalization; + use std::cmp::Ordering; use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::fmt::{Display, Formatter}; @@ -278,6 +280,11 @@ pub struct Manifest { pub package_dependencies: Vec, pub documentation: Vec, pub installers: Vec, + // `RequireExplicitUpgrade: true` opts a package out of bulk + // `pinget upgrade` output (winget parity). Users can still upgrade by + // explicit `id`. Set at top-level or per-installer; treated as true + // when any installer asserts it. + pub require_explicit_upgrade: bool, } #[derive(Debug, Clone, serde::Serialize)] @@ -310,6 +317,7 @@ pub struct Installer { pub switches: InstallerSwitches, pub commands: Vec, pub package_dependencies: Vec, + pub require_explicit_upgrade: bool, } #[derive(Debug, Clone, Default, serde::Serialize)] @@ -562,6 +570,17 @@ struct InstalledPackage { product_codes: Vec, upgrade_codes: Vec, correlated: Option, + // True when `installed_version` was rewritten to a catalog Version via + // versionData.mszyml's aMiV/aMaV range. Such versions compare meaningfully + // against the catalog's Version; raw ARP DisplayVersions usually don't. + // Used by dedupe so that, when the same id has both a canonical and a raw + // install row, we prefer the canonical (e.g. `Microsoft.DotNet.SDK.10` + // 10.0.108 over the VS-installed 40.10.18029 that doesn't map). + installed_version_canonical: bool, + // True when the correlated catalog package's latest version sets + // `RequireExplicitUpgrade: true`. winget hides those rows from bulk + // `upgrade`; we mirror that. Users can still upgrade by explicit id. + correlated_requires_explicit_upgrade: bool, } #[derive(Debug, Clone)] @@ -634,6 +653,14 @@ struct PackageVersionDataEntry { manifest_relative_path: String, #[serde(rename = "s256H")] manifest_hash: String, + // ARP version range covered by this catalog version. WinGet writes these + // into versionData.mszyml so an installed ARP DisplayVersion (which may be + // an MSI build number with no relation to the catalog Version) can be + // mapped back to the catalog Version it corresponds to. + #[serde(rename = "aMiV", default)] + arp_min_version: Option, + #[serde(rename = "aMaV", default)] + arp_max_version: Option, } pub struct Repository { @@ -980,13 +1007,36 @@ impl Repository { } let mut installed = collect_installed_packages(query.install_scope.as_deref())?; + if needs_available { + // Authoritative correlation via the v2 index's identity tables + // (PackageFamilyName / ProductCode / UpgradeCode). This is winget's + // primary path and resolves cases where display-name matching is + // ambiguous (Microsoft.Teams vs Microsoft.Teams.Free) or impossible + // (MSIX with `ms-resource:` placeholder names). + warnings.extend(self.correlate_installed_via_index(&mut installed, query.source.as_deref())?); + + // ARP entries that lack identity keys (no PFN/PC/UC) still match + // winget's ARP correlation when their (DisplayName, Publisher), + // run through the NameNormalizer, lands on a single package in + // norm_names2 ∩ norm_publishers2. Covers Inno Setup-style + // installers, MSIs whose ARP keys don't include ProductCode, and + // any vendor that publishes a DisplayName that differs from the + // catalog's PackageName but matches an AppsAndFeaturesEntries + // DisplayName. + warnings.extend(self.correlate_installed_by_normalized_identity(&mut installed, query.source.as_deref())?); + } + if needs_available && has_filter { - // Filtered lookup: search sources with the user's query + // Filtered lookup: search sources with the user's query for any + // installed package not already resolved by identity. let available_query = package_query_from_list_query(query); let (matches, source_warnings, _) = self.search_located(&available_query, SearchSemantics::Many)?; warnings.extend(source_warnings); let candidates: Vec = matches.into_iter().map(|c| c.display).collect(); for package in &mut installed { + if package.correlated.is_some() { + continue; + } package.correlated = correlate_installed_package(package, &candidates, allow_loose_list_correlation(query)); } @@ -995,6 +1045,30 @@ impl Repository { warnings.extend(self.correlate_all_installed(&mut installed)?); } + if needs_available { + // Enrich rows that correlated via the name-based fallback. Those + // rows skipped the v2-index aMiV/aMaV pass that runs inside + // `correlate_installed_via_index`, so they still carry the raw + // ARP DisplayVersion (e.g. `JetBrains Rider 2025.3.0.1` showing + // build `253.28294.112` instead of the marketing `2025.3.0.1`). + // Also resolves MSIX placeholder names like + // `ms-resource:appDisplayName` to the catalog's display name — + // winget pulls these through Windows.Management.Deployment; the + // catalog name is the parity-faithful substitute pinget has + // available without taking a WinRT dependency. + warnings.extend(self.enrich_correlated_via_index(&mut installed, query.source.as_deref())?); + } + + // For `upgrade`, collapse multiple installed entries that map to the + // same catalog package id (different ARP rows for side-by-side .NET + // SDKs, several runtime versions of WindowsAppRuntime, the MSIX shim + // alongside an ARP install of Edge, etc.). Keep the entry with the + // highest installed_version so the upgrade comparison runs against + // the user's newest install — matches winget's one-row-per-id output. + if query.upgrade_only { + installed = dedupe_correlated_for_upgrade(installed); + } + let mut matches = installed .into_iter() .filter(|package| { @@ -1036,8 +1110,12 @@ impl Repository { } /// For unfiltered upgrade/list, search the entire available index and correlate - /// against all installed packages. + /// against all installed packages. Skips packages already correlated via the + /// v2 identity tables. fn correlate_all_installed(&mut self, installed: &mut [InstalledPackage]) -> Result> { + if installed.iter().all(|p| p.correlated.is_some()) { + return Ok(Vec::new()); + } let all_query = PackageQuery { query: None, id: None, @@ -1061,12 +1139,335 @@ impl Repository { let candidates: Vec = matches.into_iter().map(|c| c.display).collect(); for package in installed.iter_mut() { + if package.correlated.is_some() { + continue; + } package.correlated = correlate_installed_package(package, &candidates, true); } Ok(warnings) } + /// Correlates installed packages against the v2 pre-indexed catalog using + /// PackageFamilyName / ProductCode / UpgradeCode lookups — winget's + /// authoritative correlation path. Also rewrites `installed_version` to + /// the catalog Version whose ARP range covers the ARP DisplayVersion, so + /// packages whose ARP version is an MSI build number (e.g. .NET SDK, + /// Python) compare against a meaningful catalog version. Packages without + /// any of the three identifiers, or whose identifiers don't match a + /// catalog entry, are left untouched for the name-based fallback. + fn correlate_installed_via_index( + &mut self, + installed: &mut [InstalledPackage], + requested_source: Option<&str>, + ) -> Result> { + let source_indices: Vec = self + .store + .sources + .iter() + .enumerate() + .filter(|(_, source)| { + source.kind == SourceKind::PreIndexed + && match requested_source { + Some(name) => source.name.eq_ignore_ascii_case(name), + None => true, + } + }) + .map(|(index, _)| index) + .collect(); + + for source_index in source_indices { + let resolved: Vec<(usize, V2PackageMetadata, &'static str)> = { + let connection = match self.open_preindexed_connection(source_index) { + Ok(c) => c, + Err(_) => continue, + }; + if !v2_identity_tables_present(&connection) { + continue; + } + let mut acc = Vec::new(); + for (idx, package) in installed.iter().enumerate() { + if package.correlated.is_some() { + continue; + } + let Some((rowid, by)) = lookup_identity_match_v2(&connection, package)? else { + continue; + }; + let Some(meta) = fetch_v2_package_metadata(&connection, rowid)? else { + continue; + }; + acc.push((idx, meta, by)); + } + acc + }; + + let source = self.source_clone(source_index); + for (idx, meta, by) in resolved { + let (canonical_installed, anchored_latest) = + match self.load_v2_version_data(&source, meta.rowid, &meta.package_hash) { + Ok((entries, _)) => ( + map_arp_version_to_catalog(&entries, &installed[idx].installed_version), + latest_arp_anchored_version(&entries), + ), + Err(_) => (None, None), + }; + // When the installed side was rebased to a catalog Version via + // aMiV/aMaV, the available side must compare on the same scale: + // prefer the latest ARP-anchored Version over `latest_version`, + // which can be an internal/MSI build number (e.g. WinAppRuntime + // 1.8's `8000.836.2153.0`) that would manufacture a phantom + // upgrade against an installed canonical like `1.8.6`. + let available_version = match (&canonical_installed, anchored_latest) { + (Some(_), Some(anchored)) => anchored, + _ => meta.latest_version, + }; + let installed_pkg = &mut installed[idx]; + installed_pkg.correlated = Some(SearchMatch { + source_name: source.name.clone(), + source_kind: source.kind, + id: meta.id, + name: meta.name, + moniker: meta.moniker, + version: Some(available_version), + channel: None, + match_criteria: Some(by.to_owned()), + }); + if let Some(version) = canonical_installed { + installed_pkg.installed_version = version; + installed_pkg.installed_version_canonical = true; + } + } + } + + Ok(Vec::new()) + } + + /// Mid-tier correlation that mirrors winget's ARP normalization path. + /// For each installed package without a PFN/PC/UC identity match, the + /// (DisplayName, Publisher) is run through the NameNormalizer and + /// intersected against the v2 index's `norm_names2` and + /// `norm_publishers2` tables. A single resulting `package_rowid` + /// produces a correlation; ambiguous matches are skipped, matching + /// winget's empty-Source behavior when normalization can't pick a + /// winner. + fn correlate_installed_by_normalized_identity( + &mut self, + installed: &mut [InstalledPackage], + requested_source: Option<&str>, + ) -> Result> { + let source_indices: Vec = self + .store + .sources + .iter() + .enumerate() + .filter(|(_, source)| { + source.kind == SourceKind::PreIndexed + && match requested_source { + Some(name) => source.name.eq_ignore_ascii_case(name), + None => true, + } + }) + .map(|(index, _)| index) + .collect(); + + for source_index in source_indices { + let resolved: Vec<(usize, V2PackageMetadata)> = { + let connection = match self.open_preindexed_connection(source_index) { + Ok(c) => c, + Err(_) => continue, + }; + if !v2_normalized_identity_tables_present(&connection) { + continue; + } + let mut acc = Vec::new(); + for (idx, pkg) in installed.iter().enumerate() { + if pkg.correlated.is_some() { + continue; + } + // MSIX entries belong to the PFN-only path; skipping + // here mirrors `correlate_installed_package`'s + // hard-block on MSIX (their `ms-resource:` placeholder + // names normalize to garbage anyway). + if pkg.local_id.starts_with("MSIX\\") { + continue; + } + let Some(publisher) = pkg.publisher.as_deref() else { + continue; + }; + let normalized_name = name_normalization::normalize_name(&pkg.name).name; + if normalized_name.is_empty() { + continue; + } + let normalized_publisher = name_normalization::normalize_publisher(publisher); + if normalized_publisher.is_empty() { + continue; + } + let Some(rowid) = + lookup_unique_normalized_identity(&connection, &normalized_name, &normalized_publisher)? + else { + continue; + }; + let Some(meta) = fetch_v2_package_metadata(&connection, rowid)? else { + continue; + }; + acc.push((idx, meta)); + } + acc + }; + + let source = self.source_clone(source_index); + for (idx, meta) in resolved { + let (canonical_installed, anchored_latest) = + match self.load_v2_version_data(&source, meta.rowid, &meta.package_hash) { + Ok((entries, _)) => ( + map_arp_version_to_catalog(&entries, &installed[idx].installed_version), + latest_arp_anchored_version(&entries), + ), + Err(_) => (None, None), + }; + let available_version = match (&canonical_installed, anchored_latest) { + (Some(_), Some(anchored)) => anchored, + _ => meta.latest_version, + }; + let installed_pkg = &mut installed[idx]; + installed_pkg.correlated = Some(SearchMatch { + source_name: source.name.clone(), + source_kind: source.kind, + id: meta.id, + name: meta.name, + moniker: meta.moniker, + version: Some(available_version), + channel: None, + match_criteria: Some("NormalizedNameAndPublisher".to_owned()), + }); + if let Some(version) = canonical_installed { + installed_pkg.installed_version = version; + installed_pkg.installed_version_canonical = true; + } + } + } + + Ok(Vec::new()) + } + + /// Runs after both correlation paths to enrich already-correlated rows + /// with information that the identity path applies inline but that + /// name-based correlation skips: + /// • The MSIX-placeholder name fix — when an MSIX entry's installed + /// name is an unresolved Windows resource string (`ms-resource:...`) + /// winget renders the catalog's `PackageName` instead. We do the + /// same so display-name comparisons match. + /// • The aMiV/aMaV version remap — `JetBrains.Rider` has no + /// ProductCode/PFN/UpgradeCode so it correlates by name; without + /// this pass its installed version stays as the ARP build number + /// `253.28294.112` instead of the catalog Version `2025.3.0.1`. + fn enrich_correlated_via_index( + &mut self, + installed: &mut [InstalledPackage], + requested_source: Option<&str>, + ) -> Result> { + // Cheap pass first — no SQL required. + for package in installed.iter_mut() { + apply_msix_resource_string_name_fix(package); + } + + // Expensive pass — load versionData for rows whose installed_version + // wasn't remapped earlier. + let source_indices: Vec = self + .store + .sources + .iter() + .enumerate() + .filter(|(_, source)| { + source.kind == SourceKind::PreIndexed + && match requested_source { + Some(name) => source.name.eq_ignore_ascii_case(name), + None => true, + } + }) + .map(|(index, _)| index) + .collect(); + + for source_index in source_indices { + let needs: Vec<(usize, V2PackageMetadata)> = { + let connection = match self.open_preindexed_connection(source_index) { + Ok(c) => c, + Err(_) => continue, + }; + if !v2_identity_tables_present(&connection) { + continue; + } + let mut acc = Vec::new(); + for (idx, pkg) in installed.iter().enumerate() { + let Some(candidate) = pkg.correlated.as_ref() else { + continue; + }; + // Stay within the source the row was correlated to — + // looking up `JetBrains.Rider` in a different catalog + // would either miss or pull the wrong package's range. + if !candidate + .source_name + .eq_ignore_ascii_case(&self.store.sources[source_index].name) + { + continue; + } + let Some(meta) = lookup_v2_metadata_by_id(&connection, &candidate.id)? else { + continue; + }; + acc.push((idx, meta)); + } + acc + }; + + let source = self.source_clone(source_index); + for (idx, meta) in needs { + // Load versionData once: we may need it for both the + // aMiV/aMaV remap (only if installed_version isn't already + // canonical) and to locate the latest manifest for the + // RequireExplicitUpgrade flag. + let entries = match self.load_v2_version_data(&source, meta.rowid, &meta.package_hash) { + Ok((entries, _)) => entries, + Err(_) => continue, + }; + + if !installed[idx].installed_version_canonical { + let canonical_installed = map_arp_version_to_catalog(&entries, &installed[idx].installed_version); + let anchored_latest = latest_arp_anchored_version(&entries); + if let Some(canonical) = canonical_installed { + installed[idx].installed_version = canonical; + installed[idx].installed_version_canonical = true; + if let (Some(correlated), Some(anchored)) = + (installed[idx].correlated.as_mut(), anchored_latest) + { + correlated.version = Some(anchored); + } + } + } + + // RequireExplicitUpgrade is set on the installer manifest + // for the latest catalog version. winget hides these from + // bulk `upgrade` (Edge, Steam, Discord and others). We + // peek at the first versionData entry (latest by sort + // order) and read its manifest to mirror that filtering. + if let Some(latest) = entries.first() { + let bytes_result = self.get_cached_source_file( + "V2_M", + &source, + &latest.manifest_relative_path, + Some(latest.manifest_hash.as_str()), + ); + if let Ok(bytes) = bytes_result + && let Ok(manifest) = parse_yaml_manifest(&bytes.bytes) + { + installed[idx].correlated_requires_explicit_upgrade = manifest.require_explicit_upgrade; + } + } + } + } + + Ok(Vec::new()) + } + pub fn search_versions(&mut self, query: &PackageQuery) -> Result { let (located, warnings) = self.find_single_match_with_semantics(query, SearchSemantics::Many)?; let versions = self.versions_for_match(&located, query)?; @@ -2464,10 +2865,69 @@ fn installed_package_has_unknown_version(package: &InstalledPackage) -> bool { } fn installed_package_matches_upgrade_filter(package: &InstalledPackage, query: &ListQuery) -> bool { + // Hide RequireExplicitUpgrade packages from bulk `upgrade` output to + // match winget (Edge, Steam, Discord, several MSIX packages). When + // the user explicitly filtered by id/name/etc., they're targeting a + // specific package and want to see it regardless — same allowance + // winget makes. + if package.correlated_requires_explicit_upgrade && !list_query_needs_available_lookup(query) { + return false; + } installed_package_has_upgrade(package) || (query.include_unknown && installed_package_has_unknown_version(package) && package.correlated.is_some()) } +/// Collapses installed entries that correlate to the same catalog package id +/// (case-insensitive within a source) down to a single representative — the +/// one with the highest installed version. Uncorrelated entries pass through +/// untouched; they can't appear in `upgrade` output anyway (the upgrade +/// filter requires a correlation), so they're effectively a no-op here. +fn dedupe_correlated_for_upgrade(packages: Vec) -> Vec { + use std::collections::HashMap; + let mut by_id: HashMap<(String, String), InstalledPackage> = HashMap::new(); + let mut uncorrelated: Vec = Vec::new(); + + for package in packages { + match package.correlated.as_ref() { + Some(candidate) => { + let key = ( + candidate.id.to_ascii_lowercase(), + candidate.source_name.to_ascii_lowercase(), + ); + let keep_new = match by_id.get(&key) { + Some(existing) => { + // Prefer rows whose `installed_version` was remapped to + // a catalog Version (via aMiV/aMaV). Comparing a raw + // ARP DisplayVersion like `40.10.18029` against the + // catalog's `10.0.300` is meaningless and would also + // hide the row that legitimately upgrades to it. + match ( + package.installed_version_canonical, + existing.installed_version_canonical, + ) { + (true, false) => true, + (false, true) => false, + _ => { + compare_version(&package.installed_version, &existing.installed_version) + == Ordering::Greater + } + } + } + None => true, + }; + if keep_new { + by_id.insert(key, package); + } + } + None => uncorrelated.push(package), + } + } + + let mut result = uncorrelated; + result.extend(by_id.into_values()); + result +} + fn find_applicable_pin<'a>(item: &ListMatch, pins: &'a [PinRecord]) -> Option<&'a PinRecord> { let mut source_specific = None; let mut source_agnostic = None; @@ -2642,17 +3102,22 @@ fn correlate_installed_package( candidates: &[SearchMatch], allow_loose_name_match: bool, ) -> Option { - // Note: MSIX packages used to be hard-skipped here, but that prevented obvious - // correlations like `Microsoft Teams` (MSIX) → `Microsoft.Teams` (catalog). - // Name-based correlation now applies uniformly; MSIX entries whose installed - // name is an unresolved resource string (e.g. `ms-resource:appDisplayName`) - // simply fail to match and return None, same as before. + // MSIX entries can only be correlated through their PackageFamilyName. + // That lookup happens earlier in `correlate_installed_via_index`; if it + // didn't find a match, name-based fallback is wrong — the catalog + // doesn't carry this MSIX's identity, so any name collision (e.g. the + // self-updating `Microsoft Edge Stable` MSIX or the Store stub + // `Notepad++` MSIX) would manufacture a phantom correlation that winget + // doesn't make. + if package.local_id.starts_with("MSIX\\") { + return None; + } let installed_name = normalize_correlation_name(&package.name); let installed_name_lower = package.name.to_ascii_lowercase(); let candidate_names = correlation_name_candidates(&package.name); - candidates + let mut scored: Vec<(i32, SearchMatch)> = candidates .iter() .filter_map(|candidate| { let candidate_name = normalize_correlation_name(&candidate.name); @@ -2696,8 +3161,25 @@ fn correlate_installed_package( Some((base_score + prefix_bonus, candidate.clone())) }) - .max_by_key(|(score, _)| *score) - .map(|(_, candidate)| candidate) + .collect(); + + scored.sort_by_key(|entry| std::cmp::Reverse(entry.0)); + let mut iter = scored.into_iter(); + let (top_score, top_match) = iter.next()?; + + // Reject ambiguous wins. When the user has e.g. `Git` (publisher "The Git + // Development Community") installed, both `Git.Git` and `Microsoft.Git` + // normalize their names to `git` and tie at the same score — the only + // signal that disambiguates them, publisher, isn't part of this scoring + // function. winget refuses the correlation in this case (the install + // shows up with an empty Source); we do the same to avoid manufacturing + // a fake upgrade against the wrong package. + if let Some((next_score, _)) = iter.next() + && next_score == top_score + { + return None; + } + Some(top_match) } fn correlation_name_candidates(name: &str) -> Vec { @@ -2739,6 +3221,214 @@ fn normalize_correlation_name(value: &str) -> String { .collect() } +struct V2PackageMetadata { + rowid: i64, + id: String, + name: String, + moniker: Option, + latest_version: String, + package_hash: String, +} + +/// Looks up a v2 catalog package whose identity (PackageFamilyName, ProductCode, +/// or UpgradeCode) matches the installed package. Returns the `packages.rowid` +/// and the field that produced the match. This is winget's primary correlation +/// path; falling back to display-name matching is only correct for ARP entries +/// that lack all three identifiers. +fn lookup_identity_match_v2( + connection: &Connection, + package: &InstalledPackage, +) -> Result> { + for pfn in &package.package_family_names { + if let Some(rowid) = query_optional_value( + connection, + "SELECT package FROM pfns2 WHERE pfn = ?1 LIMIT 1", + vec![SqlValue::Text(pfn.to_ascii_lowercase())], + |row| row_i64(row, 0), + )? { + return Ok(Some((rowid, "PackageFamilyName"))); + } + } + // UpgradeCode wins over ProductCode. The UpgradeCode is the MSI *family* + // identity — it stays stable across versions, while ProductCode changes + // every release. When a single ProductCode happens to also be listed + // under a sibling catalog package (e.g. `OpenJS.NodeJS.LTS` carries + // installer rows from earlier LTS-line ProductCodes, while the v24 + // install actually belongs to `OpenJS.NodeJS.22` via its UpgradeCode), + // the upgrade-side match represents the user's "package family" — the + // one winget uses. + for code in &package.upgrade_codes { + if let Some(rowid) = query_optional_value( + connection, + "SELECT package FROM upgradecodes2 WHERE upgradecode = ?1 LIMIT 1", + vec![SqlValue::Text(code.to_ascii_lowercase())], + |row| row_i64(row, 0), + )? { + return Ok(Some((rowid, "UpgradeCode"))); + } + } + for code in &package.product_codes { + if let Some(rowid) = query_optional_value( + connection, + "SELECT package FROM productcodes2 WHERE productcode = ?1 LIMIT 1", + vec![SqlValue::Text(code.to_ascii_lowercase())], + |row| row_i64(row, 0), + )? { + return Ok(Some((rowid, "ProductCode"))); + } + } + Ok(None) +} + +/// Resolves a v2 package by its catalog id back to its rowid + hash so the +/// post-correlation enrichment pass can load versionData for rows that +/// correlated through the name-based fallback (which throws the rowid away). +fn lookup_v2_metadata_by_id(connection: &Connection, package_id: &str) -> Result> { + query_optional_value( + connection, + "SELECT rowid, id, name, moniker, latest_version, hash FROM packages WHERE id = ?1 COLLATE NOCASE LIMIT 1", + vec![SqlValue::Text(package_id.to_owned())], + |row| { + Ok(V2PackageMetadata { + rowid: row_i64(row, 0)?, + id: row_string(row, 1)?, + name: row_string(row, 2)?, + moniker: row_opt_string(row, 3)?, + latest_version: row_string(row, 4)?, + package_hash: row_hex_string(row, 5)?, + }) + }, + ) +} + +/// MSIX entries whose Windows resource-string display name (e.g. +/// `ms-resource:appDisplayName`) hasn't been resolved by the AppX runtime +/// would otherwise leak that placeholder into pinget's output. winget +/// resolves it via `Windows.Management.Deployment` and shows the catalog's +/// `PackageName`; we mirror that by substituting the correlated catalog +/// name once the package is correlated, which is the parity-faithful +/// substitute we can produce without taking a WinRT dependency. +fn apply_msix_resource_string_name_fix(package: &mut InstalledPackage) { + if !package.local_id.starts_with("MSIX\\") { + return; + } + if !package.name.starts_with("ms-resource:") { + return; + } + let Some(candidate) = package.correlated.as_ref() else { + return; + }; + if !candidate.name.is_empty() { + package.name = candidate.name.clone(); + } +} + +fn fetch_v2_package_metadata(connection: &Connection, package_rowid: i64) -> Result> { + query_optional_value( + connection, + "SELECT id, name, moniker, latest_version, hash FROM packages WHERE rowid = ?1", + vec![SqlValue::Integer(package_rowid)], + |row| { + Ok(V2PackageMetadata { + rowid: package_rowid, + id: row_string(row, 0)?, + name: row_string(row, 1)?, + moniker: row_opt_string(row, 2)?, + latest_version: row_string(row, 3)?, + package_hash: row_hex_string(row, 4)?, + }) + }, + ) +} + +fn v2_normalized_identity_tables_present(connection: &Connection) -> bool { + query_optional_value( + connection, + "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'norm_names2' LIMIT 1", + Vec::new(), + |row| row_i64(row, 0), + ) + .ok() + .flatten() + .is_some() +} + +/// Returns the catalog `packages.rowid` whose `(norm_name, norm_publisher)` +/// pair matches the installed package — if and only if the match is +/// unique. Multiple matches mean the normalization wasn't strong enough to +/// disambiguate (e.g. an ARP DisplayName that normalizes the same as two +/// distinct catalog packages); winget refuses to correlate in that case +/// and we do the same. +fn lookup_unique_normalized_identity( + connection: &Connection, + norm_name: &str, + norm_publisher: &str, +) -> Result> { + let rows: Vec = query_rows( + connection, + "SELECT DISTINCT n.package FROM norm_names2 n \ + WHERE n.norm_name = ?1 \ + AND EXISTS (SELECT 1 FROM norm_publishers2 p WHERE p.package = n.package AND p.norm_publisher = ?2) \ + LIMIT 2", + vec![ + SqlValue::Text(norm_name.to_owned()), + SqlValue::Text(norm_publisher.to_owned()), + ], + |row| row_i64(row, 0), + )?; + if rows.len() == 1 { Ok(Some(rows[0])) } else { Ok(None) } +} + +fn v2_identity_tables_present(connection: &Connection) -> bool { + query_optional_value( + connection, + "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'productcodes2' LIMIT 1", + Vec::new(), + |row| row_i64(row, 0), + ) + .ok() + .flatten() + .is_some() +} + +/// Translates an ARP DisplayVersion to the catalog Version it maps to. +/// For packages like the .NET SDK or Python the ARP version is an MSI build +/// number (e.g. `10.1.826.23019`) unrelated to the public version +/// (`10.0.108`). winget records the ARP range per catalog version in +/// versionData.mszyml as `aMiV` / `aMaV`; we look for the entry whose range +/// contains the installed version. +fn map_arp_version_to_catalog(entries: &[PackageVersionDataEntry], arp_version: &str) -> Option { + if arp_version.is_empty() || arp_version.eq_ignore_ascii_case("Unknown") { + return None; + } + for entry in entries { + let (Some(min), Some(max)) = (entry.arp_min_version.as_deref(), entry.arp_max_version.as_deref()) else { + continue; + }; + if compare_version(arp_version, min) != Ordering::Less && compare_version(arp_version, max) != Ordering::Greater + { + return Some(entry.version.clone()); + } + } + None +} + +/// Returns the latest catalog Version that carries ARP-range metadata +/// (`aMiV` / `aMaV`). Packages like `Microsoft.WindowsAppRuntime.1.8` also +/// publish "internal" version rows whose `v` is an MSI build number +/// (`8000.836.2153.0`) without ARP bounds. Those rows are not user-facing +/// upgrade targets — when the installed side was matched through an ARP +/// range, the available side must be a peer that also exposes ARP bounds so +/// the two versions compare on the same scale. Returns `None` when no entry +/// has bounds; the caller should fall back to `packages.latest_version`. +fn latest_arp_anchored_version(entries: &[PackageVersionDataEntry]) -> Option { + entries + .iter() + .filter(|e| e.arp_min_version.is_some() && e.arp_max_version.is_some()) + .max_by(|a, b| compare_version(&a.version, &b.version)) + .map(|entry| entry.version.clone()) +} + #[cfg(windows)] fn collect_installed_packages(scope: Option<&str>) -> Result> { let mut packages = Vec::new(); @@ -2747,10 +3437,20 @@ fn collect_installed_packages(scope: Option<&str>) -> Result) -> Result) -> Result) -> Result std::collections::HashMap { + use std::collections::HashMap; + let mut map = HashMap::new(); + if include_machine { + collect_msi_upgrade_codes_from( + RegKey::predef(HKEY_LOCAL_MACHINE), + r"SOFTWARE\Classes\Installer\UpgradeCodes", + KEY_READ | KEY_WOW64_64KEY, + &mut map, + ); + } + if include_user { + collect_msi_upgrade_codes_from( + RegKey::predef(HKEY_CURRENT_USER), + r"Software\Microsoft\Installer\UpgradeCodes", + KEY_READ, + &mut map, + ); + } + map +} + +#[cfg(windows)] +fn collect_msi_upgrade_codes_from( + root: RegKey, + path: &str, + flags: u32, + map: &mut std::collections::HashMap, +) { + let Ok(upgrade_codes) = root.open_subkey_with_flags(path, flags) else { + return; + }; + for key_name in upgrade_codes.enum_keys().flatten() { + let Some(upgrade_code) = unflip_packed_guid(&key_name) else { + continue; + }; + let Ok(subkey) = upgrade_codes.open_subkey_with_flags(&key_name, flags) else { + continue; + }; + for (value_name, _) in subkey.enum_values().flatten() { + if let Some(product_code) = unflip_packed_guid(&value_name) { + // ARP-side keys are likely to win the latest insert; first one + // is fine — we just need *any* mapping. Use `entry` so the + // first writer per ProductCode wins to stay deterministic. + map.entry(product_code).or_insert_with(|| upgrade_code.clone()); + } + } + } +} + +/// Converts the 32-character "packed GUID" used inside the MSI Installer +/// registry hive back to the standard `{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx}` +/// lowercase form. The packing reverses each of the 11 chunks (sized 8/4/4 +/// then eight 2-char byte pairs) of the GUID's hex representation. +fn unflip_packed_guid(packed: &str) -> Option { + if packed.len() != 32 || !packed.chars().all(|c| c.is_ascii_hexdigit()) { + return None; + } + const CHUNKS: [usize; 11] = [8, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2]; + let bytes = packed.as_bytes(); + let mut reversed = String::with_capacity(32); + let mut offset = 0; + for size in CHUNKS { + let chunk = &bytes[offset..offset + size]; + for &c in chunk.iter().rev() { + reversed.push(c as char); + } + offset += size; + } + let r = reversed.to_ascii_lowercase(); + Some(format!( + "{{{}-{}-{}-{}-{}}}", + &r[0..8], + &r[8..12], + &r[12..16], + &r[16..20], + &r[20..32] + )) +} + #[cfg(not(windows))] fn collect_installed_packages(_scope: Option<&str>) -> Result> { Ok(Vec::new()) @@ -2803,6 +3591,7 @@ fn collect_installed_packages(_scope: Option<&str>) -> Result, seen: &mut BTreeSet, + upgrade_code_map: &std::collections::HashMap, root: RegKey, scope: &str, arch: &str, @@ -2840,7 +3629,18 @@ fn collect_uninstall_view( if product_codes.is_empty() && looks_like_product_code(&key_name) { product_codes.push(key_name.to_ascii_lowercase()); } - let upgrade_codes = read_reg_string(&subkey, "UpgradeCode").into_iter().collect::>(); + let mut upgrade_codes = read_reg_string(&subkey, "UpgradeCode").into_iter().collect::>(); + if upgrade_codes.is_empty() { + // ARP rarely exposes UpgradeCode directly. Recover it from the MSI + // Installer registry hive so identity correlation can fall back to + // upgradecodes2 when productcodes2 picks the wrong sibling. + for code in &product_codes { + if let Some(uc) = upgrade_code_map.get(&code.to_ascii_lowercase()) { + upgrade_codes.push(uc.clone()); + break; + } + } + } let installer_category = if local_id.starts_with("ARP\\") && read_reg_dword(&subkey, "WindowsInstaller") == Some(1) { Some("msi".to_owned()) @@ -2873,6 +3673,8 @@ fn collect_uninstall_view( product_codes, upgrade_codes, correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }); } @@ -2936,6 +3738,8 @@ fn collect_appmodel_packages( product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }); } @@ -4879,6 +5683,8 @@ fn parse_yaml_manifest_bundle(bytes: &[u8]) -> Result<(Manifest, JsonValue)> { let name = yaml_localized_string(&merged, "PackageName").ok_or_else(|| anyhow!("manifest missing PackageName"))?; let installers = parse_yaml_installers(&merged); + let top_level_require_explicit = yaml_scalar_bool(&merged, "RequireExplicitUpgrade"); + let any_installer_require_explicit = installers.iter().any(|i| i.require_explicit_upgrade); Ok(( Manifest { @@ -4906,6 +5712,7 @@ fn parse_yaml_manifest_bundle(bytes: &[u8]) -> Result<(Manifest, JsonValue)> { package_dependencies: yaml_package_dependencies(&merged), documentation: yaml_documentation_list(&merged), installers, + require_explicit_upgrade: top_level_require_explicit || any_installer_require_explicit, }, collapse_structured_document(&JsonValue::Array(documents)), )) @@ -4972,10 +5779,13 @@ fn parse_rest_manifest(bytes: &[u8], package_id: &str, version: &str, channel: & switches: json_installer_switches(item).with_fallback(&installer_switch_defaults), commands: json_string_list(item, "Commands"), package_dependencies: json_package_dependencies(item), + require_explicit_upgrade: json_bool(item, "RequireExplicitUpgrade"), }) .collect::>() }) .unwrap_or_default(); + let top_level_require_explicit = json_bool(selected, "RequireExplicitUpgrade"); + let any_installer_require_explicit = installers.iter().any(|i| i.require_explicit_upgrade); let manifest = Manifest { id: package_id.to_owned(), @@ -5002,6 +5812,7 @@ fn parse_rest_manifest(bytes: &[u8], package_id: &str, version: &str, channel: & package_dependencies: json_package_dependencies(selected), documentation: json_documentation_list(default_locale), installers, + require_explicit_upgrade: top_level_require_explicit || any_installer_require_explicit, }; Ok(( @@ -5376,6 +6187,28 @@ fn installer_from_yaml(root: &YamlMapping, switches: InstallerSwitches) -> Insta switches, commands: yaml_string_list(root, "Commands"), package_dependencies: yaml_package_dependencies(root), + require_explicit_upgrade: yaml_scalar_bool(root, "RequireExplicitUpgrade"), + } +} + +/// Reads a YAML scalar boolean. Accepts `true`/`false` (any casing) plus +/// the legacy `True`/`False` variants the winget catalog mixes in. +fn yaml_scalar_bool(root: &YamlMapping, key: &str) -> bool { + let Some(value) = root.get(YamlValue::from(key)) else { + return false; + }; + match value { + YamlValue::Bool(b) => *b, + YamlValue::String(s) => s.eq_ignore_ascii_case("true"), + _ => false, + } +} + +fn json_bool(value: &JsonValue, key: &str) -> bool { + match value.get(key) { + Some(JsonValue::Bool(b)) => *b, + Some(JsonValue::String(s)) => s.eq_ignore_ascii_case("true"), + _ => false, } } @@ -7185,7 +8018,9 @@ mod tests { }, commands: vec!["testpkg".to_owned()], package_dependencies: vec!["Microsoft.UI.Xaml.2.8".to_owned()], + require_explicit_upgrade: false, }], + require_explicit_upgrade: false, }, selected_installer: Some(Installer { architecture: Some("x64".to_owned()), @@ -7206,6 +8041,7 @@ mod tests { }, commands: vec!["testpkg".to_owned()], package_dependencies: vec!["Microsoft.UI.Xaml.2.8".to_owned()], + require_explicit_upgrade: false, }), cached_files: vec![PathBuf::from(r"C:\temp\cache\Test.Package.yaml")], warnings: vec!["cache warmed".to_owned()], @@ -7269,54 +8105,269 @@ mod tests { } #[test] - fn parse_yaml_manifest_bundle_returns_singleton_document() { + fn manifest_parses_require_explicit_upgrade_at_top_level() { + // Top-level RequireExplicitUpgrade flag should propagate to + // Manifest.require_explicit_upgrade. winget catalogs typically + // place the flag here for browser packages and similar + // self-updating apps that opt out of bulk `upgrade`. let yaml = r#" PackageIdentifier: Test.Package PackageVersion: 1.2.3 DefaultLocale: en-US -ManifestType: version +ManifestType: singleton ManifestVersion: 1.10.0 ---- -PackageIdentifier: Test.Package -PackageVersion: 1.2.3 PackageLocale: en-US PackageName: Test Package Publisher: Example License: MIT -ShortDescription: Structured output -ManifestType: defaultLocale -ManifestVersion: 1.10.0 ---- -PackageIdentifier: Test.Package -PackageVersion: 1.2.3 -ManifestType: installer -ManifestVersion: 1.10.0 +ShortDescription: explicit-upgrade fixture +RequireExplicitUpgrade: true Installers: - Architecture: x64 InstallerType: exe InstallerUrl: https://example.test/Test.Package.exe InstallerSha256: ABC123 "#; - - let (_manifest, documents) = parse_yaml_manifest_bundle(yaml.as_bytes()).expect("bundle"); - - assert_eq!(documents["ManifestType"].as_str(), Some("singleton")); - assert_eq!(documents["PackageIdentifier"].as_str(), Some("Test.Package")); - assert_eq!(documents["PackageName"].as_str(), Some("Test Package")); + let manifest = parse_yaml_manifest(yaml.as_bytes()).expect("parse"); + assert!(manifest.require_explicit_upgrade); } #[test] - fn collapse_structured_documents_returns_plural_show_documents() { - let documents = collapse_structured_documents(&[ - JsonValue::Array(vec![ - serde_json::json!({ - "PackageIdentifier": "Test.Package.One", - "PackageVersion": "1.0.0", - "DefaultLocale": "en-US", - "ManifestType": "version", - "ManifestVersion": "1.10.0" - }), - serde_json::json!({ + fn manifest_parses_require_explicit_upgrade_on_installer() { + // Per-installer flag — only one of several installers declares it, + // but the Manifest aggregate must still be `true` because the user + // could pick that installer when upgrading. + let yaml = r#" +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +DefaultLocale: en-US +ManifestType: singleton +ManifestVersion: 1.10.0 +PackageLocale: en-US +PackageName: Test Package +Publisher: Example +License: MIT +ShortDescription: explicit-upgrade fixture +Installers: + - Architecture: x64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.x64.exe + InstallerSha256: ABC123 + - Architecture: arm64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.arm64.exe + InstallerSha256: DEF456 + RequireExplicitUpgrade: true +"#; + let manifest = parse_yaml_manifest(yaml.as_bytes()).expect("parse"); + assert!(manifest.require_explicit_upgrade); + assert!(!manifest.installers[0].require_explicit_upgrade); + assert!(manifest.installers[1].require_explicit_upgrade); + } + + #[test] + fn manifest_without_require_explicit_upgrade_defaults_to_false() { + let yaml = r#" +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +DefaultLocale: en-US +ManifestType: singleton +ManifestVersion: 1.10.0 +PackageLocale: en-US +PackageName: Test Package +Publisher: Example +License: MIT +ShortDescription: baseline fixture +Installers: + - Architecture: x64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.exe + InstallerSha256: ABC123 +"#; + let manifest = parse_yaml_manifest(yaml.as_bytes()).expect("parse"); + assert!(!manifest.require_explicit_upgrade); + } + + #[test] + fn upgrade_filter_hides_require_explicit_upgrade_by_default() { + // winget hides `RequireExplicitUpgrade` rows from bulk `upgrade` + // (Edge, Steam, Discord). pinget must do the same — the filter is + // the only gate that enforces it. + let mut pkg = InstalledPackage { + name: "Edge".to_owned(), + local_id: r"ARP\Machine\X64\Edge".to_owned(), + installed_version: "100.0".to_owned(), + publisher: None, + scope: Some("Machine".to_owned()), + installer_category: Some("exe".to_owned()), + install_location: None, + package_family_names: Vec::new(), + product_codes: Vec::new(), + upgrade_codes: Vec::new(), + correlated: Some(SearchMatch { + source_name: "winget".to_owned(), + source_kind: SourceKind::PreIndexed, + id: "Microsoft.Edge".to_owned(), + name: "Microsoft Edge".to_owned(), + moniker: None, + version: Some("110.0".to_owned()), + channel: None, + match_criteria: None, + }), + installed_version_canonical: false, + correlated_requires_explicit_upgrade: true, + }; + let bulk_query = ListQuery { + upgrade_only: true, + ..ListQuery::default() + }; + assert!( + !installed_package_matches_upgrade_filter(&pkg, &bulk_query), + "RequireExplicitUpgrade row must be hidden from bulk upgrade" + ); + + // When the user explicitly targets it by id, winget shows it — + // pinget must do the same so `pinget upgrade Microsoft.Edge` works. + let filtered_query = ListQuery { + upgrade_only: true, + id: Some("Microsoft.Edge".to_owned()), + ..ListQuery::default() + }; + assert!( + installed_package_matches_upgrade_filter(&pkg, &filtered_query), + "RequireExplicitUpgrade row must surface when the user filters for it explicitly" + ); + + // Without the flag, the row appears in bulk upgrade as usual. + pkg.correlated_requires_explicit_upgrade = false; + assert!(installed_package_matches_upgrade_filter(&pkg, &bulk_query)); + } + + #[test] + fn lookup_unique_normalized_identity_returns_unique_match() { + // Single (norm_name, norm_publisher) intersection — happy path. + let connection = Connection::open_in_memory().expect("open in-memory db"); + connection + .execute_batch( + "CREATE TABLE norm_names2 (norm_name TEXT, package INT64);\n\ + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64);\n\ + INSERT INTO norm_names2 VALUES ('microsoftedge', 100);\n\ + INSERT INTO norm_publishers2 VALUES ('microsoft', 100);", + ) + .expect("seed schema"); + + let rowid = lookup_unique_normalized_identity(&connection, "microsoftedge", "microsoft") + .expect("query") + .expect("unique match"); + assert_eq!(rowid, 100); + } + + #[test] + fn lookup_unique_normalized_identity_rejects_ambiguous_match() { + // Two distinct packages share the same (norm_name, norm_publisher) + // — the Git case where both `Git.Git` and a hypothetical sibling + // normalize identically. winget refuses to correlate when it can't + // disambiguate; pinget must do the same. + let connection = Connection::open_in_memory().expect("open in-memory db"); + connection + .execute_batch( + "CREATE TABLE norm_names2 (norm_name TEXT, package INT64);\n\ + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64);\n\ + INSERT INTO norm_names2 VALUES ('git', 100), ('git', 200);\n\ + INSERT INTO norm_publishers2 VALUES ('thegitdevelopmentcommunity', 100), ('thegitdevelopmentcommunity', 200);", + ) + .expect("seed schema"); + + let rowid = lookup_unique_normalized_identity(&connection, "git", "thegitdevelopmentcommunity").expect("query"); + assert!(rowid.is_none(), "ambiguous match must not correlate"); + } + + #[test] + fn lookup_unique_normalized_identity_requires_publisher_intersect() { + // norm_name has multiple matches but only one shares its + // norm_publisher with the installed package. Winget's intersect + // logic still picks the right one — verify. + let connection = Connection::open_in_memory().expect("open in-memory db"); + connection + .execute_batch( + "CREATE TABLE norm_names2 (norm_name TEXT, package INT64);\n\ + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64);\n\ + INSERT INTO norm_names2 VALUES ('git', 100), ('git', 200);\n\ + INSERT INTO norm_publishers2 VALUES ('thegitdevelopmentcommunity', 100), ('microsoft', 200);", + ) + .expect("seed schema"); + + let rowid = lookup_unique_normalized_identity(&connection, "git", "thegitdevelopmentcommunity").expect("query"); + assert_eq!(rowid, Some(100)); + } + + #[test] + fn lookup_unique_normalized_identity_misses_when_publisher_does_not_match() { + // Name matches but no publisher row for that package id — + // intersection is empty so we don't correlate. + let connection = Connection::open_in_memory().expect("open in-memory db"); + connection + .execute_batch( + "CREATE TABLE norm_names2 (norm_name TEXT, package INT64);\n\ + CREATE TABLE norm_publishers2 (norm_publisher TEXT, package INT64);\n\ + INSERT INTO norm_names2 VALUES ('foo', 100);\n\ + INSERT INTO norm_publishers2 VALUES ('bar', 200);", + ) + .expect("seed schema"); + + let rowid = lookup_unique_normalized_identity(&connection, "foo", "bar").expect("query"); + assert!(rowid.is_none(), "name matches package 100 but publisher matches 200 — no intersection"); + } + + #[test] + fn parse_yaml_manifest_bundle_returns_singleton_document() { + let yaml = r#" +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +DefaultLocale: en-US +ManifestType: version +ManifestVersion: 1.10.0 +--- +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +PackageLocale: en-US +PackageName: Test Package +Publisher: Example +License: MIT +ShortDescription: Structured output +ManifestType: defaultLocale +ManifestVersion: 1.10.0 +--- +PackageIdentifier: Test.Package +PackageVersion: 1.2.3 +ManifestType: installer +ManifestVersion: 1.10.0 +Installers: + - Architecture: x64 + InstallerType: exe + InstallerUrl: https://example.test/Test.Package.exe + InstallerSha256: ABC123 +"#; + + let (_manifest, documents) = parse_yaml_manifest_bundle(yaml.as_bytes()).expect("bundle"); + + assert_eq!(documents["ManifestType"].as_str(), Some("singleton")); + assert_eq!(documents["PackageIdentifier"].as_str(), Some("Test.Package")); + assert_eq!(documents["PackageName"].as_str(), Some("Test Package")); + } + + #[test] + fn collapse_structured_documents_returns_plural_show_documents() { + let documents = collapse_structured_documents(&[ + JsonValue::Array(vec![ + serde_json::json!({ + "PackageIdentifier": "Test.Package.One", + "PackageVersion": "1.0.0", + "DefaultLocale": "en-US", + "ManifestType": "version", + "ManifestVersion": "1.10.0" + }), + serde_json::json!({ "PackageIdentifier": "Test.Package.One", "PackageVersion": "1.0.0", "PackageLocale": "en-US", @@ -7399,6 +8450,7 @@ Installers: package_dependencies: Vec::new(), documentation: Vec::new(), installers: Vec::new(), + require_explicit_upgrade: false, }; let existing = ListMatch { name: "Contoso App".to_owned(), @@ -7459,6 +8511,7 @@ Installers: package_dependencies: Vec::new(), documentation: Vec::new(), installers: Vec::new(), + require_explicit_upgrade: false, }; let existing = ListMatch { name: "Contoso App".to_owned(), @@ -7615,6 +8668,7 @@ Installers: }, commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }; let manifest = Manifest { id: "Test.Package".to_owned(), @@ -7640,6 +8694,7 @@ Installers: package_dependencies: Vec::new(), documentation: Vec::new(), installers: Vec::new(), + require_explicit_upgrade: false, }; let mut silent_request = InstallRequest::new(PackageQuery::default()); @@ -7698,6 +8753,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }; let manifest = Manifest { id: "Test.Package".to_owned(), @@ -7723,6 +8779,7 @@ Installers: package_dependencies: Vec::new(), documentation: Vec::new(), installers: Vec::new(), + require_explicit_upgrade: false, }; let mut progress_request = InstallRequest::new(PackageQuery::default()); progress_request.mode = InstallerMode::SilentWithProgress; @@ -7784,6 +8841,7 @@ Installers: }, commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }; let manifest = Manifest { id: "ShareX.ShareX".to_owned(), @@ -7809,6 +8867,7 @@ Installers: package_dependencies: Vec::new(), documentation: Vec::new(), installers: Vec::new(), + require_explicit_upgrade: false, }; let mut request = InstallRequest::new(PackageQuery::default()); request.mode = InstallerMode::Silent; @@ -7968,6 +9027,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, Installer { architecture: Some("x64".to_owned()), @@ -7985,6 +9045,7 @@ Installers: switches: InstallerSwitches::default(), commands: vec!["demo".to_owned()], package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, ]; let query = PackageQuery { @@ -8019,6 +9080,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, Installer { architecture: Some("x64".to_owned()), @@ -8036,6 +9098,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, ]; @@ -8062,6 +9125,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, Installer { architecture: Some("x64".to_owned()), @@ -8079,6 +9143,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, ]; let query = PackageQuery { @@ -8110,6 +9175,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, Installer { architecture: Some("x64".to_owned()), @@ -8127,6 +9193,7 @@ Installers: switches: InstallerSwitches::default(), commands: Vec::new(), package_dependencies: Vec::new(), + require_explicit_upgrade: false, }, ]; let query = PackageQuery { @@ -8160,6 +9227,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let candidates = vec![SearchMatch { source_name: "winget".to_owned(), @@ -8200,6 +9269,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let query = ListQuery { tag: Some("powertoys".to_owned()), @@ -8239,6 +9310,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let candidates = vec![ SearchMatch { @@ -8285,6 +9358,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let candidates = vec![ SearchMatch { @@ -8324,10 +9399,14 @@ Installers: } #[test] - fn msix_packages_correlate_by_name() { - // Previously hard-skipped via `package.local_id.starts_with("MSIX\\")` → - // None, which prevented obvious MSIX updates (Microsoft.Teams etc.) from - // ever surfacing. Name-based correlation now runs uniformly. + fn msix_packages_do_not_correlate_via_name() { + // MSIX correlation must go through the v2 index's `pfns2` table — name + // fallback is wrong because two MSIX packages can legitimately share a + // display name without sharing identity (`Microsoft Edge Stable` MSIX + // vs the catalog `Microsoft.Edge` MSI, `Notepad++` Store stub MSIX vs + // the catalog Inno installer). Even when the names match exactly the + // catalog `Microsoft.Teams` doesn't represent the same MSIX without a + // PFN hit; that lookup happens in `correlate_installed_via_index`. let installed = InstalledPackage { name: "Microsoft Teams".to_owned(), local_id: r"MSIX\MSTeams_25290.205.4069.4894_arm64__8wekyb3d8bbwe".to_owned(), @@ -8340,6 +9419,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let candidates = vec![SearchMatch { source_name: "winget".to_owned(), @@ -8352,8 +9433,57 @@ Installers: match_criteria: None, }]; - let correlated = correlate_installed_package(&installed, &candidates, true).expect("correlated"); - assert_eq!(correlated.id, "Microsoft.Teams"); + assert!(correlate_installed_package(&installed, &candidates, true).is_none()); + } + + #[test] + fn name_fallback_refuses_ambiguous_winners() { + // The user has Git installed (publisher "The Git Development Community" + // — but publisher isn't in the scoring function). Two catalog packages + // both expose name "Git": `Git.Git` and `Microsoft.Git`. Without + // publisher disambiguation they score identically; winget refuses to + // correlate (the install lists with empty Source). pinget must do the + // same to avoid manufacturing an upgrade against the wrong catalog + // package. + let installed = InstalledPackage { + name: "Git".to_owned(), + local_id: r"ARP\Machine\X64\Git_is1".to_owned(), + installed_version: "2.53.0".to_owned(), + publisher: Some("The Git Development Community".to_owned()), + scope: Some("Machine".to_owned()), + installer_category: Some("exe".to_owned()), + install_location: None, + package_family_names: Vec::new(), + product_codes: Vec::new(), + upgrade_codes: Vec::new(), + correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, + }; + let candidates = vec![ + SearchMatch { + source_name: "winget".to_owned(), + source_kind: SourceKind::PreIndexed, + id: "Git.Git".to_owned(), + name: "Git".to_owned(), + moniker: None, + version: Some("2.54.0".to_owned()), + channel: None, + match_criteria: None, + }, + SearchMatch { + source_name: "winget".to_owned(), + source_kind: SourceKind::PreIndexed, + id: "Microsoft.Git".to_owned(), + name: "Git".to_owned(), + moniker: None, + version: Some("2.53.0.0.7".to_owned()), + channel: None, + match_criteria: None, + }, + ]; + + assert!(correlate_installed_package(&installed, &candidates, true).is_none()); } #[test] @@ -8373,6 +9503,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let candidates = vec![SearchMatch { source_name: "winget".to_owned(), @@ -8402,6 +9534,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let candidates = vec![SearchMatch { source_name: "winget".to_owned(), @@ -8440,6 +9574,8 @@ Installers: channel: None, match_criteria: None, }), + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; assert!(installed_package_has_upgrade(&package)); @@ -8468,6 +9604,8 @@ Installers: channel: None, match_criteria: None, }), + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let query = ListQuery { upgrade_only: true, @@ -8889,6 +10027,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let sparse = InstalledPackage { name: "PowerToys.SparseApp".to_owned(), @@ -8902,6 +10042,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; let extension = InstalledPackage { name: "PowerToys FileLocksmith Context Menu".to_owned(), @@ -8915,6 +10057,8 @@ Installers: product_codes: Vec::new(), upgrade_codes: Vec::new(), correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, }; assert!(list_sort_weight(&main) < list_sort_weight(&sparse)); @@ -9040,4 +10184,343 @@ Installers: build_winget_uninstall_arguments_with_scope(&installed, &request, false) ); } + + fn version_entry(version: &str, arp_min: Option<&str>, arp_max: Option<&str>) -> PackageVersionDataEntry { + PackageVersionDataEntry { + version: version.to_owned(), + manifest_relative_path: format!("manifests/x/{version}"), + manifest_hash: "00".repeat(32), + arp_min_version: arp_min.map(str::to_owned), + arp_max_version: arp_max.map(str::to_owned), + } + } + + #[test] + fn map_arp_version_returns_catalog_version_inside_range() { + // Real winget data: .NET SDK 10.0.108 declares its ARP DisplayVersion is + // `10.1.826.23019`. compare_version on that against the catalog Version + // `10.0.108` claims installed > available, so without this mapping the + // upgrade was silently dropped. + let entries = vec![ + version_entry("10.0.300", Some("10.3.26.23102"), Some("10.3.26.23102")), + version_entry("10.0.108", Some("10.1.826.23019"), Some("10.1.826.23019")), + version_entry("10.0.107", Some("10.1.726.21808"), Some("10.1.726.21808")), + ]; + + assert_eq!( + map_arp_version_to_catalog(&entries, "10.1.826.23019").as_deref(), + Some("10.0.108") + ); + } + + #[test] + fn map_arp_version_returns_none_when_no_range_matches() { + // VS-installed .NET SDK reports ARP DisplayVersion `40.10.18029` which + // doesn't sit inside any aMiV..aMaV bucket. Caller must keep the + // original installed version (and accept that no upgrade can be + // computed) — same as winget's behavior. + let entries = vec![ + version_entry("10.0.300", Some("10.3.26.23102"), Some("10.3.26.23102")), + version_entry("10.0.108", Some("10.1.826.23019"), Some("10.1.826.23019")), + ]; + + assert!(map_arp_version_to_catalog(&entries, "40.10.18029").is_none()); + } + + #[test] + fn map_arp_version_skips_entries_missing_arp_bounds() { + // Older catalog packages predate AppsAndFeaturesEntries and have no + // aMiV/aMaV. Those entries must be ignored — not silently treated as + // an unbounded range. + let entries = vec![ + version_entry("9.0.300", None, None), + version_entry("9.0.117", Some("9.1.1726.23010"), Some("9.1.1726.23010")), + ]; + + assert_eq!( + map_arp_version_to_catalog(&entries, "9.1.1726.23010").as_deref(), + Some("9.0.117") + ); + assert!(map_arp_version_to_catalog(&entries, "1.0.0").is_none()); + } + + #[test] + fn map_arp_version_handles_inclusive_range_endpoints() { + // aMiV/aMaV are inclusive bounds — a version equal to either endpoint + // must still map. + let entries = vec![version_entry("3.14.5", Some("3.14.5.0"), Some("3.14.5.999"))]; + + assert_eq!( + map_arp_version_to_catalog(&entries, "3.14.5.0").as_deref(), + Some("3.14.5") + ); + assert_eq!( + map_arp_version_to_catalog(&entries, "3.14.5.999").as_deref(), + Some("3.14.5") + ); + assert_eq!( + map_arp_version_to_catalog(&entries, "3.14.5.500").as_deref(), + Some("3.14.5") + ); + } + + #[test] + fn map_arp_version_ignores_unknown_marker() { + // collect_installed_packages writes the literal "Unknown" when ARP has + // no DisplayVersion. Trying to map that would falsely match the first + // entry whose aMiV..aMaV string-compares to include "Unknown". + let entries = vec![version_entry("1.0.0", Some("1.0.0"), Some("1.0.0"))]; + + assert!(map_arp_version_to_catalog(&entries, "Unknown").is_none()); + assert!(map_arp_version_to_catalog(&entries, "").is_none()); + } + + #[test] + fn latest_arp_anchored_skips_internal_versions() { + // Microsoft.WindowsAppRuntime.1.8 publishes both an internal build + // version (`8000.836.2153.0`, no ARP bounds) and user-facing versions + // (`1.8.6`, `1.8.5`, …). The internal row shouldn't win — comparing + // it against an installed canonical of `1.8.6` would falsely report + // an upgrade. Returns the highest anchored Version instead. + let entries = vec![ + version_entry("8000.836.2153.0", None, None), + version_entry("1.8.6", Some("8000.806.2252.0"), Some("8000.806.2252.0")), + version_entry("1.8.5", Some("8000.770.947.0"), Some("8000.770.947.0")), + version_entry("1.8.0", Some("8000.616.304.0"), Some("8000.616.304.0")), + ]; + + assert_eq!(latest_arp_anchored_version(&entries).as_deref(), Some("1.8.6")); + } + + #[test] + fn latest_arp_anchored_returns_none_without_bounds() { + // App Installer's versionData has no aMiV/aMaV — every version is a + // user-facing MSIX Version. Caller must fall back to + // `packages.latest_version` in this case. + let entries = vec![ + version_entry("1.28.240.0", None, None), + version_entry("1.27.470.0", None, None), + ]; + + assert!(latest_arp_anchored_version(&entries).is_none()); + } + + #[test] + fn version_data_parses_arp_bounds_from_winget_payload() { + // Sanity check that aMiV/aMaV deserialize from a real-world + // versionData.mszyml payload shape. Without `default`, packages + // predating AppsAndFeaturesEntries would fail to parse. + let payload = "sV: 1.0\nvD:\n- v: 10.0.300\n aMiV: 10.3.26.23102\n aMaV: 10.3.26.23102\n rP: manifests/m/Microsoft/DotNet/SDK/10/10.0.300/4f87\n s256H: 0e633f7fa41d0322ff185d25783951f2ed343f27965b165066d9f75d5689a48b\n- v: 9.0.0\n rP: manifests/m/Microsoft/DotNet/SDK/10/9.0.0/0000\n s256H: 0e633f7fa41d0322ff185d25783951f2ed343f27965b165066d9f75d5689a48b\n"; + + let document = serde_yaml::from_str::(payload).expect("parse versionData.mszyml"); + assert_eq!(document.versions.len(), 2); + assert_eq!(document.versions[0].arp_min_version.as_deref(), Some("10.3.26.23102")); + assert_eq!(document.versions[0].arp_max_version.as_deref(), Some("10.3.26.23102")); + assert!(document.versions[1].arp_min_version.is_none()); + assert!(document.versions[1].arp_max_version.is_none()); + } + + #[test] + fn msix_resource_string_name_resolves_to_catalog_name() { + // App Installer's MSIX manifest stores DisplayName as + // `ms-resource:appDisplayName`. Once we correlate it via PFN, we + // know the catalog calls it `App Installer` — show that instead of + // the unresolved placeholder, matching winget's output. + let mut package = InstalledPackage { + name: "ms-resource:appDisplayName".to_owned(), + local_id: r"MSIX\Microsoft.DesktopAppInstaller_1.28.239.0_arm64__8wekyb3d8bbwe".to_owned(), + installed_version: "1.28.239.0".to_owned(), + publisher: None, + scope: Some("User".to_owned()), + installer_category: Some("msix".to_owned()), + install_location: None, + package_family_names: vec!["Microsoft.DesktopAppInstaller_8wekyb3d8bbwe".to_owned()], + product_codes: Vec::new(), + upgrade_codes: Vec::new(), + correlated: Some(SearchMatch { + source_name: "winget".to_owned(), + source_kind: SourceKind::PreIndexed, + id: "Microsoft.AppInstaller".to_owned(), + name: "App Installer".to_owned(), + moniker: None, + version: Some("1.28.240.0".to_owned()), + channel: None, + match_criteria: Some("PackageFamilyName".to_owned()), + }), + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, + }; + apply_msix_resource_string_name_fix(&mut package); + assert_eq!(package.name, "App Installer"); + } + + #[test] + fn msix_resource_string_fix_is_a_noop_when_not_msix() { + // ARP rows can also carry unusual display names; the fix is gated + // on the local_id prefix so a `ms-resource:` literal that somehow + // appears in an ARP DisplayName (extremely unlikely, but possible) + // doesn't get silently rewritten. + let mut package = InstalledPackage { + name: "ms-resource:appDisplayName".to_owned(), + local_id: r"ARP\Machine\X64\{deadbeef}".to_owned(), + installed_version: "1.0".to_owned(), + publisher: None, + scope: Some("Machine".to_owned()), + installer_category: Some("msi".to_owned()), + install_location: None, + package_family_names: Vec::new(), + product_codes: Vec::new(), + upgrade_codes: Vec::new(), + correlated: Some(SearchMatch { + source_name: "winget".to_owned(), + source_kind: SourceKind::PreIndexed, + id: "Some.Package".to_owned(), + name: "Should Not Apply".to_owned(), + moniker: None, + version: Some("1.0".to_owned()), + channel: None, + match_criteria: None, + }), + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, + }; + apply_msix_resource_string_name_fix(&mut package); + assert_eq!(package.name, "ms-resource:appDisplayName"); + } + + #[test] + fn msix_resource_string_fix_skips_resolved_names() { + // The vast majority of MSIX entries have already-resolved names + // (`Microsoft Teams`, `Notepad++`, etc.). The fix must not touch + // those — `meta.name` and the installed name may legitimately + // differ (`Microsoft Teams` vs `Microsoft Teams (work or school)`). + let mut package = InstalledPackage { + name: "Microsoft Teams".to_owned(), + local_id: r"MSIX\MSTeams_25290.205.4069.4894_arm64__8wekyb3d8bbwe".to_owned(), + installed_version: "25290.205.4069.4894".to_owned(), + publisher: None, + scope: Some("User".to_owned()), + installer_category: Some("msix".to_owned()), + install_location: None, + package_family_names: vec!["MSTeams_8wekyb3d8bbwe".to_owned()], + product_codes: Vec::new(), + upgrade_codes: Vec::new(), + correlated: Some(SearchMatch { + source_name: "winget".to_owned(), + source_kind: SourceKind::PreIndexed, + id: "Microsoft.Teams".to_owned(), + name: "Microsoft Teams Catalog Name".to_owned(), + moniker: None, + version: Some("26106.1906.4665.7308".to_owned()), + channel: None, + match_criteria: Some("PackageFamilyName".to_owned()), + }), + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, + }; + apply_msix_resource_string_name_fix(&mut package); + assert_eq!(package.name, "Microsoft Teams"); + } + + #[test] + fn unflip_packed_guid_reverses_msi_installer_packing() { + // The MSI Installer hive packs GUIDs by char-reversing each of the 11 + // chunks (8/4/4/2*8). This is the encoding used inside + // `HKLM\SOFTWARE\Classes\Installer\UpgradeCodes\` subkey names + // and their value names. Verified against the user's installed + // Node.js ProductCode `{9292CBD9-...}` whose packed form (from the + // live registry) is `9DBC2929593B4D2488740C8E00C4F652`. + assert_eq!( + unflip_packed_guid("9DBC2929593B4D2488740C8E00C4F652").as_deref(), + Some("{9292cbd9-b395-42d4-8847-c0e8004c6f25}") + ); + // And the reverse case — Node.js UpgradeCode `{47c07a3a-...}` packed + // form `A3A70C74FE2431248AD5F8A59570C782`. + assert_eq!( + unflip_packed_guid("A3A70C74FE2431248AD5F8A59570C782").as_deref(), + Some("{47c07a3a-42ef-4213-a85d-8f5a59077c28}") + ); + // Reject non-32-char or non-hex inputs. + assert!(unflip_packed_guid("nothex").is_none()); + assert!(unflip_packed_guid("9DBC2929593B4D2488740C8E00C4F65").is_none()); // 31 chars + assert!(unflip_packed_guid("ZZZZZZZZ593B4D2488740C8E00C4F652").is_none()); + } + + fn installed_with_correlated(id: &str, installed_version: &str, canonical: bool) -> InstalledPackage { + InstalledPackage { + name: format!("{id} install"), + local_id: format!(r"ARP\Machine\X64\{id}"), + installed_version: installed_version.to_owned(), + publisher: None, + scope: Some("Machine".to_owned()), + installer_category: Some("msi".to_owned()), + install_location: None, + package_family_names: Vec::new(), + product_codes: Vec::new(), + upgrade_codes: Vec::new(), + correlated: Some(SearchMatch { + source_name: "winget".to_owned(), + source_kind: SourceKind::PreIndexed, + id: id.to_owned(), + name: id.to_owned(), + moniker: None, + version: Some("99.0.0".to_owned()), + channel: None, + match_criteria: None, + }), + installed_version_canonical: canonical, + correlated_requires_explicit_upgrade: false, + } + } + + #[test] + fn dedupe_keeps_canonical_row_over_raw_arp_row() { + // Two installed entries correlate to the same catalog id. One had its + // ARP version remapped to a catalog Version via aMiV/aMaV (canonical); + // the other did not (e.g. VS-installed .NET SDK whose ARP + // `40.10.18029` doesn't fit any bucket). Without the canonical + // preference, `compare_version` says `40.x.x.x > 10.0.108` so the + // wrong row wins and the upgrade silently disappears. + let raw = installed_with_correlated("Microsoft.DotNet.SDK.10", "40.10.18029", false); + let canonical = installed_with_correlated("Microsoft.DotNet.SDK.10", "10.0.108", true); + + let result = dedupe_correlated_for_upgrade(vec![raw, canonical]); + assert_eq!(result.len(), 1); + assert_eq!(result[0].installed_version, "10.0.108"); + assert!(result[0].installed_version_canonical); + } + + #[test] + fn dedupe_keeps_highest_among_canonical_rows() { + let lower = installed_with_correlated("Microsoft.WindowsAppRuntime.1.7", "1.7.7", true); + let higher = installed_with_correlated("Microsoft.WindowsAppRuntime.1.7", "1.7.9", true); + + let result = dedupe_correlated_for_upgrade(vec![lower, higher]); + assert_eq!(result.len(), 1); + assert_eq!(result[0].installed_version, "1.7.9"); + } + + #[test] + fn dedupe_leaves_uncorrelated_rows_alone() { + // Uncorrelated entries don't reach upgrade output anyway (the + // upgrade filter requires correlation), but the dedupe pass must not + // drop them — `pinget list` could share this code path in the future. + let uncorrelated = InstalledPackage { + name: "Foo".to_owned(), + local_id: r"ARP\Machine\X64\Foo".to_owned(), + installed_version: "1.0".to_owned(), + publisher: None, + scope: Some("Machine".to_owned()), + installer_category: Some("exe".to_owned()), + install_location: None, + package_family_names: Vec::new(), + product_codes: Vec::new(), + upgrade_codes: Vec::new(), + correlated: None, + installed_version_canonical: false, + correlated_requires_explicit_upgrade: false, + }; + let result = dedupe_correlated_for_upgrade(vec![uncorrelated]); + assert_eq!(result.len(), 1); + } } diff --git a/rust/crates/pinget-core/src/name_normalization.rs b/rust/crates/pinget-core/src/name_normalization.rs new file mode 100644 index 0000000..1f22aff --- /dev/null +++ b/rust/crates/pinget-core/src/name_normalization.rs @@ -0,0 +1,810 @@ +//! Pinget port of winget-cli's `NameNormalization.cpp` (the "Initial" +//! version). Produces the same normalized name + publisher strings that +//! winget stores in the catalog's `norm_names2` / `norm_publishers2` tables +//! for an ARP entry's `DisplayName` and `Publisher`. +//! +//! Without this, identity correlation only succeeds when the installed +//! display name happens to match the catalog's `PackageName` after our +//! naive alphanumeric normalization — winget can match many more entries +//! because it strips version-like tokens, locales, architectures, and +//! legal-entity suffixes before comparing. This module reproduces those +//! transformations so we can correlate the same set of ARP rows winget +//! does. +//! +//! ## fancy-regex caveats +//! +//! winget's patterns use lookbehind extensively. fancy-regex supports +//! lookbehind, but each alternation arm must be fixed-length. The C++ +//! patterns include constructs like `(?<=^|[^\p{L}\p{Nd}])` that mix +//! 0-length (`^`) with 1-length (`[^...]`), which fails to compile. The +//! ported patterns rewrite these as captured boundaries: `(^|[^...])` is +//! captured in group 1 and re-emitted via `$1` in the replacement string, +//! which gives the same semantics. + +use std::sync::OnceLock; + +use fancy_regex::Regex; + +/// Result of normalizing a display name. The architecture is extracted as +/// a side-channel because winget can later append it to the normalized +/// name to disambiguate multi-arch installs. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub(crate) struct NormalizedName { + pub(crate) name: String, + pub(crate) architecture: Architecture, + pub(crate) locale: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub(crate) enum Architecture { + #[default] + Unknown, + X86, + X64, +} + +/// One pass over the input — captures both the regex and the replacement +/// string used. Boundary-preserving patterns use `$1`; all-consuming +/// patterns use `""`. +struct Stripper { + re: &'static Regex, + replacement: &'static str, +} + +/// Normalizes an ARP DisplayName the same way winget's NameNormalizer +/// (Initial version, `preserveWhiteSpace = false`) does — producing the +/// string that ends up in the catalog's `norm_names2` table. +pub(crate) fn normalize_name(value: &str) -> NormalizedName { + let mut name = prepare_for_validation(value); + while unwrap(&mut name) {} + + // SAP Business Object program names follow a specific pattern that + // breaks under the regular flow; winget short-circuits them. + if sap_package().is_match(&name).unwrap_or(false) { + return NormalizedName { + name, + architecture: Architecture::Unknown, + locale: String::new(), + }; + } + + let architecture = remove_architecture(&mut name); + let locale = remove_locale(&mut name); + + // Preserve KB numbers from within parens before the bracket strippers + // would eat them — winget keeps `KB1234567` as part of the normalized + // name because it's the only meaningful identifier on some patches. + let kb_replaced = kb_numbers().replace_all(&name, "$1").into_owned(); + name = kb_replaced; + + while apply_strippers(program_name_strippers(), &mut name) {} + + let tokens = split_with_legal_suffix_exclusion(program_name_split(), &name, false); + name = tokens.join(""); + let nonletters_replaced = non_letters_and_digits().replace_all(&name, "").into_owned(); + name = nonletters_replaced; + + NormalizedName { + name: name.to_lowercase(), + architecture, + locale: locale.to_lowercase(), + } +} + +/// Normalizes a publisher string. Strips the same set of patterns as the +/// name path plus splits on word boundaries with the legal-entity-suffix +/// list — so `Microsoft Corporation` → `microsoft`, `JetBrains s.r.o.` → +/// `jetbrains`, but `The Git Development Community` stays intact because +/// no token in it is a recognized suffix. +pub(crate) fn normalize_publisher(value: &str) -> String { + let mut publisher = prepare_for_validation(value); + while unwrap(&mut publisher) {} + + while apply_strippers(publisher_name_strippers(), &mut publisher) {} + + // Publisher split stops at the FIRST legal-entity suffix it sees + // (after the first token), so `Foo Inc Internal Sub Bar` keeps just + // `Foo` — `Inc` cuts off everything beyond. + let tokens = split_with_legal_suffix_exclusion(publisher_name_split(), &publisher, true); + publisher = tokens.join(""); + let cleaned = non_letters_and_digits().replace_all(&publisher, "").into_owned(); + cleaned.to_lowercase() +} + +// ── Internal helpers ────────────────────────────────────────────────────── + +fn prepare_for_validation(value: &str) -> String { + let mut s = value.trim().to_owned(); + // winget supports an `@@`-delimited suffix on internal display names + // that should be stripped before normalization — keep parity even + // though it's unusual in the wild. + if let Some(idx) = s.find("@@") + && idx >= 3 + { + s.truncate(idx); + } + s +} + +fn unwrap(value: &mut String) -> bool { + if value.len() < 2 { + return false; + } + let bytes = value.as_bytes(); + let first = bytes[0]; + let last = bytes[bytes.len() - 1]; + let matches = match first { + b'"' => last == b'"', + b'(' => last == b')', + _ => false, + }; + if !matches { + return false; + } + *value = value[1..value.len() - 1].to_string(); + true +} + +fn apply(stripper: &Stripper, value: &mut String) -> bool { + let replaced = stripper.re.replace_all(value, stripper.replacement); + if replaced == value.as_str() { + return false; + } + *value = replaced.into_owned(); + true +} + +fn apply_strippers(strippers: &[Stripper], value: &mut String) -> bool { + let mut changed = false; + for s in strippers { + if apply(s, value) { + changed = true; + } + } + changed +} + +fn remove_architecture(value: &mut String) -> Architecture { + // Order matters: "32/64-bit" is a superstring of "64-bit"; "X64"/ + // "AMD64" must beat "X32"/"X86" because of "x86-64". + if apply(architecture_32_or_64_bit(), value) { + return Architecture::Unknown; + } + if apply(architecture_x64(), value) || apply(architecture_64_bit(), value) { + return Architecture::X64; + } + if apply(architecture_x32(), value) || apply(architecture_32_bit(), value) { + return Architecture::X86; + } + Architecture::Unknown +} + +fn remove_locale(value: &mut String) -> String { + // Walk locale matches; only treat them as locales if they're in + // winget's known list. Unknown locale-shaped tokens (e.g. `XY-AB` for + // a non-real locale) get preserved instead of stripped. + let re = locale(); + let mut new_value = String::with_capacity(value.len()); + let mut locale_found: Option = None; + let mut last_end = 0usize; + + for capture in re.captures_iter(value) { + let Ok(capture) = capture else { continue }; + let Some(m) = capture.get(0) else { continue }; + let folded = m.as_str().to_uppercase(); + let is_known = LOCALES.binary_search(&folded.as_str()).is_ok(); + + new_value.push_str(&value[last_end..m.start()]); + if !is_known { + new_value.push_str(m.as_str()); + } else { + match &locale_found { + None => locale_found = Some(folded), + Some(existing) if existing == &folded => {} + Some(existing) => { + let existing_lang = existing.split('-').next().unwrap_or(""); + let new_lang = folded.split('-').next().unwrap_or(""); + if existing_lang != new_lang { + locale_found = Some(String::new()); + } + } + } + } + last_end = m.end(); + } + new_value.push_str(&value[last_end..]); + *value = new_value; + locale_found.unwrap_or_default() +} + +fn split_with_legal_suffix_exclusion(re: &Regex, value: &str, stop_on_exclusion: bool) -> Vec { + let mut result = Vec::new(); + let mut last_end = 0usize; + let push_segment = |segment: &str, out: &mut Vec| -> bool { + let trimmed = segment.trim(); + if trimmed.is_empty() { + return true; + } + let folded = trimmed.to_uppercase(); + if !out.is_empty() && LEGAL_ENTITY_SUFFIXES.binary_search(&folded.as_str()).is_ok() { + return !stop_on_exclusion; + } + out.push(trimmed.to_owned()); + true + }; + + for m in re.find_iter(value) { + let Ok(m) = m else { continue }; + let segment = &value[last_end..m.start()]; + if !push_segment(segment, &mut result) { + return result; + } + last_end = m.end(); + } + let segment = &value[last_end..]; + push_segment(segment, &mut result); + result +} + +// ── Regex cells. Compiled lazily so the cost is paid once per process. ── + +macro_rules! regex_cell { + ($name:ident, $pattern:expr) => { + fn $name() -> &'static Regex { + static CELL: OnceLock = OnceLock::new(); + CELL.get_or_init(|| { + let inner: &str = $pattern; + // (?i) matches the C++ patterns' CaseInsensitive option. + Regex::new(&format!("(?i){inner}")).expect("invalid normalizer regex") + }) + } + }; +} + +macro_rules! stripper_cell { + ($name:ident, $re_fn:ident, $replacement:expr) => { + fn $name() -> &'static Stripper { + static CELL: OnceLock = OnceLock::new(); + CELL.get_or_init(|| Stripper { + re: $re_fn(), + replacement: $replacement, + }) + } + }; +} + +// Architectures: rewritten boundary `(^|[^...])` captured in group 1 and +// re-emitted via `$1` because fancy-regex won't accept the original +// variable-length `(?<=^|[^...])` lookbehind. +regex_cell!( + architecture_x32_re, + r"(^|[^\p{L}\p{Nd}])(?:X32|X86)(?=\P{Nd}|$)(?:\sEDITION)?" +); +regex_cell!( + architecture_x64_re, + r"(^|[^\p{L}\p{Nd}])(?:X64|AMD64|X86[\p{Pd}\p{Pc}]64)(?=\P{Nd}|$)(?:\sEDITION)?" +); +regex_cell!( + architecture_32_bit_re, + r"(^|[^\p{L}\p{Nd}])(?:32[\p{Pd}\p{Pc}\p{Z}]?BIT)S?(?:\sEDITION)?" +); +regex_cell!( + architecture_64_bit_re, + r"(^|[^\p{L}\p{Nd}])(?:64[\p{Pd}\p{Pc}\p{Z}]?BIT)S?(?:\sEDITION)?" +); +regex_cell!( + architecture_32_or_64_bit_re, + r"(^|[^\p{L}\p{Nd}])(?:(?:64[\\/]32|32[\\/]64)[\p{Pd}\p{Pc}\p{Z}]?BIT)S?(?:\sEDITION)?" +); +stripper_cell!(architecture_x32, architecture_x32_re, "$1"); +stripper_cell!(architecture_x64, architecture_x64_re, "$1"); +stripper_cell!(architecture_32_bit, architecture_32_bit_re, "$1"); +stripper_cell!(architecture_64_bit, architecture_64_bit_re, "$1"); +stripper_cell!(architecture_32_or_64_bit, architecture_32_or_64_bit_re, "$1"); + +// Locale: `(? &'static [Stripper] { + static CELL: OnceLock> = OnceLock::new(); + CELL.get_or_init(|| { + vec![ + // Order mirrors winget's PROGRAM_NAME_REGEXES. + clone(roblox()), + clone(bomgar()), + clone(prefix_parens()), + clone(empty_parens()), + clone(file_path_ghs()), + clone(file_path_parens()), + clone(file_path_quotes()), + clone(file_path()), + clone(version_letter()), + clone(version_delimited()), + clone(version()), + clone(en_suffix()), + clone(non_nested_bracket()), + clone(bracket_enclosed()), + clone(uri_protocol()), + clone(leading_symbols()), + clone(trailing_symbols()), + ] + }) +} + +fn publisher_name_strippers() -> &'static [Stripper] { + static CELL: OnceLock> = OnceLock::new(); + CELL.get_or_init(|| { + vec![ + clone(version_delimited()), + clone(version()), + clone(non_nested_bracket()), + clone(bracket_enclosed()), + clone(uri_protocol()), + clone(non_letters()), + clone(trailing_non_letters()), + clone(acronym_separators()), + ] + }) +} + +fn clone(s: &Stripper) -> Stripper { + Stripper { + re: s.re, + replacement: s.replacement, + } +} + +// ── Locale + legal-entity-suffix lists ──────────────────────────────────── + +// Pre-uppercased and pre-sorted so binary_search works against the regex's +// uppercase output (`(?i)` matches case-insensitively but capture text +// preserves the original casing; we uppercase before lookup). +const LOCALES: &[&str] = &[ + "AF-ZA", + "AM-ET", + "AR-AE", + "AR-BH", + "AR-DZ", + "AR-EG", + "AR-IQ", + "AR-JO", + "AR-KW", + "AR-LB", + "AR-LY", + "AR-MA", + "AR-OM", + "AR-QA", + "AR-SA", + "AR-SY", + "AR-TN", + "AR-YE", + "ARN-CL", + "AS-IN", + "AZ-CYRL-AZ", + "AZ-LATN-AZ", + "BA-RU", + "BE-BY", + "BG-BG", + "BN-BD", + "BN-IN", + "BO-CN", + "BR-FR", + "BS-CYRL-BA", + "BS-LATN-BA", + "CA-ES", + "CA-ES-VALENCIA", + "CO-FR", + "CS-CZ", + "CY-GB", + "DA-DK", + "DE-AT", + "DE-CH", + "DE-DE", + "DE-LI", + "DE-LU", + "DSB-DE", + "DV-MV", + "EL-GR", + "EN-AU", + "EN-BZ", + "EN-CA", + "EN-GB", + "EN-IE", + "EN-IN", + "EN-JM", + "EN-MY", + "EN-NZ", + "EN-PH", + "EN-SG", + "EN-TT", + "EN-US", + "EN-ZA", + "EN-ZW", + "ES-AR", + "ES-BO", + "ES-CL", + "ES-CO", + "ES-CR", + "ES-DO", + "ES-EC", + "ES-ES", + "ES-GT", + "ES-HN", + "ES-MX", + "ES-NI", + "ES-PA", + "ES-PE", + "ES-PR", + "ES-PY", + "ES-SV", + "ES-US", + "ES-UY", + "ES-VE", + "ET-EE", + "EU-ES", + "FA-IR", + "FI-FI", + "FIL-PH", + "FO-FO", + "FR-BE", + "FR-CA", + "FR-CH", + "FR-FR", + "FR-LU", + "FR-MC", + "FY-NL", + "GA-IE", + "GD-DB", + "GL-ES", + "GSW-FR", + "GU-IN", + "HA-LATN-NG", + "HE-IL", + "HI-IN", + "HR-BA", + "HR-HR", + "HSB-DE", + "HU-HU", + "HY-AM", + "ID-ID", + "IG-NG", + "II-CN", + "IS-IS", + "IT-CH", + "IT-IT", + "IU-CANS-CA", + "IU-LATN-CA", + "JA-JP", + "KA-GE", + "KK-KZ", + "KL-GL", + "KM-KH", + "KN-IN", + "KO-KR", + "KOK-IN", + "KY-KG", + "LB-LU", + "LO-LA", + "LT-LT", + "LV-LV", + "MI-NZ", + "MK-MK", + "ML-IN", + "MN-MN", + "MN-MONG-CN", + "MOH-CA", + "MR-IN", + "MS-BN", + "MS-MY", + "MT-MT", + "NB-NO", + "NE-NP", + "NL-BE", + "NL-NL", + "NN-NO", + "NSO-ZA", + "OC-FR", + "OR-IN", + "PA-IN", + "PL-PL", + "PRS-AF", + "PS-AF", + "PT-BR", + "PT-PT", + "QUT-GT", + "QUZ-BO", + "QUZ-EC", + "QUZ-PE", + "RM-CH", + "RO-RO", + "RU-RU", + "RW-RW", + "SA-IN", + "SAH-RU", + "SE-FI", + "SE-NO", + "SE-SE", + "SI-LK", + "SK-SK", + "SL-SI", + "SMA-NO", + "SMA-SE", + "SMJ-NO", + "SMJ-SE", + "SMN-FI", + "SMS-FI", + "SQ-AL", + "SR-CYRL-BA", + "SR-CYRL-CS", + "SR-CYRL-ME", + "SR-CYRL-RS", + "SR-LATN-BA", + "SR-LATN-CS", + "SR-LATN-ME", + "SR-LATN-RS", + "SV-FI", + "SV-SE", + "SW-KE", + "SYR-SY", + "TA-IN", + "TE-IN", + "TG-CYRL-TJ", + "TH-TH", + "TK-TM", + "TN-ZA", + "TR-TR", + "TT-RU", + "TZM-LATN-DZ", + "UG-CN", + "UK-UA", + "UR-PK", + "UZ-CYRL-UZ", + "UZ-LATN-UZ", + "VI-VN", + "WO-SN", + "XH-ZA", + "YO-NG", + "ZH-CN", + "ZH-HK", + "ZH-MO", + "ZH-SG", + "ZH-TW", + "ZU-ZA", +]; + +const LEGAL_ENTITY_SUFFIXES: &[&str] = &[ + "AB", + "AD", + "AG", + "APS", + "AS", + "ASA", + "BV", + "CO", + "COMPANY", + "CORP", + "CORPORATION", + "CV", + "DOO", + "EV", + "GES", + "GESMBH", + "GMBH", + "HOLDING", + "HOLDINGS", + "INC", + "INCORPORATED", + "KG", + "KS", + "LIMITED", + "LLC", + "LP", + "LTD", + "LTDA", + "MBH", + "NV", + "PLC", + "PS", + "PTY", + "PVT", + "SA", + "SARL", + "SC", + "SCA", + "SL", + "SP", + "SPA", + "SRL", + "SRO", + "SUBSIDIARY", +]; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lists_are_sorted_for_binary_search() { + let mut sorted = LOCALES.to_vec(); + sorted.sort(); + assert_eq!(sorted, LOCALES.to_vec(), "LOCALES must be pre-sorted"); + + let mut sorted_suffixes = LEGAL_ENTITY_SUFFIXES.to_vec(); + sorted_suffixes.sort(); + assert_eq!( + sorted_suffixes, + LEGAL_ENTITY_SUFFIXES.to_vec(), + "LEGAL_ENTITY_SUFFIXES must be pre-sorted" + ); + } + + // Fixture strings observed in the live catalog's norm_publishers2 / + // norm_names2 tables on the development machine. These are the actual + // outputs winget produced for those packages, so matching them is the + // criterion of success for the port. + + #[test] + fn publisher_microsoft_corporation_normalizes_to_microsoft() { + assert_eq!(normalize_publisher("Microsoft Corporation"), "microsoft"); + } + + #[test] + fn publisher_jetbrains_sro_normalizes_to_jetbrains() { + assert_eq!(normalize_publisher("JetBrains s.r.o."), "jetbrains"); + } + + #[test] + fn publisher_without_legal_suffix_is_kept_verbatim() { + // "The Git Development Community" has no recognized legal-entity + // suffix, so all tokens stay — matches the live catalog row + // (`thegitdevelopmentcommunity`). + assert_eq!( + normalize_publisher("The Git Development Community"), + "thegitdevelopmentcommunity" + ); + } + + #[test] + fn publisher_strips_inc_and_llc_and_gmbh() { + assert_eq!(normalize_publisher("Foo Inc"), "foo"); + assert_eq!(normalize_publisher("Foo Bar LLC"), "foobar"); + assert_eq!(normalize_publisher("Foo GmbH"), "foo"); + } + + #[test] + fn name_strips_version_delimited_token() { + // `2025.3.0.1` matches VersionDelimited (digits + punctuation + + // digits). Bare `2026` does NOT match and stays as part of the + // normalized name — same as the live catalog row + // (`visualstudioprofessional2026`). + assert_eq!(normalize_name("JetBrains Rider 2025.3.0.1").name, "jetbrainsrider"); + assert_eq!( + normalize_name("Visual Studio Professional 2026").name, + "visualstudioprofessional2026" + ); + } + + #[test] + fn name_strips_architecture_suffix() { + let r = normalize_name("PowerToys (Preview) x64"); + assert_eq!(r.name, "powertoys"); + assert_eq!(r.architecture, Architecture::X64); + } + + #[test] + fn name_strips_known_locale_suffix() { + let r = normalize_name("Foo en-US Edition"); + assert_eq!(r.locale, "en-us"); + } + + #[test] + fn name_keeps_unknown_locale_shaped_tokens() { + let r = normalize_name("Foo XY-AB"); + assert!(r.locale.is_empty()); + } + + #[test] + fn name_strips_parens_content() { + assert_eq!(normalize_name("Foo (beta)").name, "foo"); + } + + #[test] + fn name_normalizes_microsoft_edge() { + assert_eq!(normalize_name("Microsoft Edge").name, "microsoftedge"); + } + + #[test] + fn name_keeps_year_only_suffix() { + assert_eq!(normalize_name("Foo 2026").name, "foo2026"); + } +} diff --git a/scripts/Test-UpgradeParity.ps1 b/scripts/Test-UpgradeParity.ps1 new file mode 100644 index 0000000..c531bc8 --- /dev/null +++ b/scripts/Test-UpgradeParity.ps1 @@ -0,0 +1,522 @@ +#requires -Version 7.0 +<# +.SYNOPSIS + Asserts that `pinget upgrade` reports the same set of upgrades as + `winget upgrade` on the current machine. + +.DESCRIPTION + Runs both tools against the live system state, parses their output into + a normalized row-set keyed by (PackageId, Source), and reports a + structured diff: rows that winget surfaces but pinget doesn't (missing), + rows that pinget surfaces but winget doesn't (extra), and rows that + appear in both but disagree on installed/available version + (version mismatch). + + Rows that share the same (id, source, installed_version, available_version) + are considered semantically equal even if the Name column renders + differently (winget resolves MSIX `ms-resource:` placeholders and reads + marketing versions from manifest data; pinget shows the raw ARP values). + Such display-only differences are reported as advisories, not failures. + + The fixture written to disk is the input to a future cross-machine corpus + — re-run this on different machines (different installs, different + catalog states) and compare the JSON outputs to find correlation classes + pinget still gets wrong. The script is intentionally read-only: it never + mutates installs, sources, or pins. + +.PARAMETER Pinget + Path to the pinget executable. Defaults to the release build in the + rust target tree. + +.PARAMETER Winget + Path to the winget executable. Defaults to `winget` on PATH. + +.PARAMETER FixturePath + If set, writes a JSON fixture describing the machine, both raw outputs, + and the computed diff. Intended for sharing across machines. + +.PARAMETER IncludeUnknown + Pass `--include-unknown` to both tools. On by default; use + `-IncludeUnknown:$false` to test the stricter default behavior. + +.PARAMETER FailOnDiff + Exit non-zero when any non-cosmetic difference is found. Intended for + CI use; omit for interactive diagnosis. + +.PARAMETER UpdateSources + Run `source update` against both tools before diffing. Off by default + to keep the harness side-effect-free. + +.EXAMPLE + .\Test-UpgradeParity.ps1 + # Quick interactive check. + +.EXAMPLE + .\Test-UpgradeParity.ps1 -FixturePath ./parity.json -FailOnDiff + # Save a fixture for sharing and fail the run on any real diff. +#> +param( + [string]$Pinget = (Join-Path $PSScriptRoot "..\rust\target\release\pinget.exe"), + [string]$Winget = "winget", + [string]$FixturePath, + [bool]$IncludeUnknown = $true, + [switch]$FailOnDiff, + [switch]$UpdateSources +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" +$ProgressPreference = "SilentlyContinue" + +function Assert-Path { + param([string]$Path, [string]$Description) + if (-not (Test-Path -Path $Path)) { + throw "$Description not found at '$Path'." + } +} + +function Invoke-CaptureLines { + param( + [Parameter(Mandatory = $true)] [string]$Executable, + [Parameter(Mandatory = $true)] [string[]]$Arguments + ) + + # winget emits progress spinners on stderr that we don't want polluting + # the parsed table. Capture both streams (so we can attribute failures) + # but the parsers below ignore anything that doesn't look like a data row. + $stdoutLines = & $Executable @Arguments 2>&1 | ForEach-Object { $_.ToString() } + $exit = $LASTEXITCODE + [pscustomobject]@{ + ExitCode = $exit + Lines = @($stdoutLines) + } +} + +function Get-WingetUpgradeRows { + param( + [Parameter(Mandatory = $true)] [string]$Executable, + [bool]$IncludeUnknown + ) + + $args = @("upgrade") + if ($IncludeUnknown) { $args += "--include-unknown" } + $args += @("--accept-source-agreements", "--disable-interactivity") + $captured = Invoke-CaptureLines -Executable $Executable -Arguments $args + + # winget upgrade's table looks like: + # + # Name Id Version Available Source + # ------------------------------------------------------------------------------ + # JetBrains Rider 2025.3.0.1 JetBrains.Rider 2025.3.0.1 2026.1.1 winget + # ... + # 11 upgrades available. + # + # Column starts are fixed-width offsets implied by where each column's + # header word begins in the header line — we can't split on whitespace + # because the Name column commonly contains spaces. Find the header by + # locating the dash separator, then derive the offsets from the line + # immediately above it. + + $separatorIdx = -1 + for ($i = 0; $i -lt $captured.Lines.Count; $i++) { + $line = $captured.Lines[$i] + if ($line -match '^-{10,}$') { + $separatorIdx = $i + break + } + } + if ($separatorIdx -lt 1) { + # No table at all. winget prints "No installed package found matching + # input criteria." or "No applicable upgrade found." when nothing is + # upgradable; that's a legitimate empty state, not a parser failure. + $emptyState = ($captured.Lines | Where-Object { + $_ -match 'No installed package|No applicable upgrade|No newer package versions' + }).Count -gt 0 + return @{ + Rows = @() + ExitCode = $captured.ExitCode + Raw = $captured.Lines + Diagnostic = if ($emptyState) { $null } else { "no header/separator found — winget output did not include an upgrade table" } + } + } + + $header = $captured.Lines[$separatorIdx - 1] + $columnDefs = @( + @{ Key = 'Name'; Token = 'Name' }, + @{ Key = 'Id'; Token = 'Id' }, + @{ Key = 'Version'; Token = 'Version' }, + @{ Key = 'Available'; Token = 'Available' }, + @{ Key = 'Source'; Token = 'Source' } + ) + $offsets = [ordered]@{} + foreach ($column in $columnDefs) { + $pos = $header.IndexOf($column.Token) + if ($pos -lt 0) { + return @{ + Rows = @() + ExitCode = $captured.ExitCode + Raw = $captured.Lines + Diagnostic = "header missing column '$($column.Token)'" + } + } + $offsets[$column.Key] = $pos + } + + function Get-Slice { + param([string]$Line, [int]$Start, [int]$End) + if ($Start -ge $Line.Length) { return "" } + $effectiveEnd = [Math]::Min($End, $Line.Length) + return $Line.Substring($Start, $effectiveEnd - $Start).TrimEnd().TrimStart() + } + + $rows = New-Object System.Collections.Generic.List[object] + $orderedKeys = @($offsets.Keys) + for ($i = $separatorIdx + 1; $i -lt $captured.Lines.Count; $i++) { + $line = $captured.Lines[$i] + if ([string]::IsNullOrWhiteSpace($line)) { continue } + if ($line -match '^\d+ (upgrade|package|available)') { break } + # Skip "The following packages cannot be upgraded" footer banners that + # winget prints when --include-unknown surfaces packages without a + # known installed version. They precede a separate table we don't + # parse here. + if ($line -match '^The following packages') { break } + # Defensive: ignore lines that are just spinner/dash noise. + if ($line -match '^[\s\\/\-|]+$') { continue } + + $row = [ordered]@{} + for ($c = 0; $c -lt $orderedKeys.Count; $c++) { + $key = $orderedKeys[$c] + $start = $offsets[$key] + $end = if ($c + 1 -lt $orderedKeys.Count) { $offsets[$orderedKeys[$c + 1]] } else { [int]::MaxValue } + $row[$key] = Get-Slice -Line $line -Start $start -End $end + } + + # A real upgrade row always has an Id and a Source. Rows from + # `winget list` could omit Source for uncorrelated installs, but + # `winget upgrade` filters those out — so a missing Id or Source + # means we mis-parsed and should skip rather than emit a phantom row. + if (-not $row.Id -or -not $row.Source) { continue } + + $rows.Add([pscustomobject]$row) | Out-Null + } + + # Sanity check: if the separator was followed by many non-empty, + # non-footer lines but we parsed zero rows, the column-offset logic + # almost certainly drifted (e.g. winget renamed a header). Surface that + # loudly instead of silently reporting PASS with 0 rows. + if ($rows.Count -eq 0) { + $dataLineCount = 0 + for ($i = $separatorIdx + 1; $i -lt $captured.Lines.Count; $i++) { + $line = $captured.Lines[$i] + if ([string]::IsNullOrWhiteSpace($line)) { continue } + if ($line -match '^\d+ (upgrade|package|available)') { break } + if ($line -match '^The following packages') { break } + $dataLineCount++ + } + if ($dataLineCount -gt 0) { + return @{ + Rows = @() + ExitCode = $captured.ExitCode + Raw = $captured.Lines + Diagnostic = "winget table parsed 0 rows but $dataLineCount data-shaped lines followed the separator — column offsets may have drifted" + } + } + } + + return @{ + Rows = $rows.ToArray() + ExitCode = $captured.ExitCode + Raw = $captured.Lines + Diagnostic = $null + } +} + +function Get-PingetUpgradeRows { + param( + [Parameter(Mandatory = $true)] [string]$Executable, + [bool]$IncludeUnknown + ) + + $args = @("upgrade", "--output", "json") + if ($IncludeUnknown) { $args += "--include-unknown" } + $captured = Invoke-CaptureLines -Executable $Executable -Arguments $args + $joined = $captured.Lines -join "`n" + + try { + $parsed = $joined | ConvertFrom-Json -Depth 32 -ErrorAction Stop + } catch { + return @{ + Rows = @() + ExitCode = $captured.ExitCode + Raw = $captured.Lines + Diagnostic = "pinget JSON parse failed: $($_.Exception.Message)" + } + } + + function Get-MatchField { + param([Parameter(Mandatory = $true)] $InputObject, [string[]]$Names) + foreach ($n in $Names) { + $prop = $InputObject.PSObject.Properties[$n] + if ($null -ne $prop) { return $prop.Value } + } + return $null + } + + $rows = foreach ($match in @($parsed.matches)) { + # The Rust CLI emits snake_case keys (installed_version, ...) while + # the C# CLI emits camelCase (installedVersion, ...). The harness has + # to talk to either tool, so accept both casings on every field. + $installed = Get-MatchField $match @('installed_version', 'installedVersion') + $available = Get-MatchField $match @('available_version', 'availableVersion') + $sourceName = Get-MatchField $match @('source_name', 'sourceName') + [pscustomobject]@{ + Name = Get-MatchField $match @('name') + Id = Get-MatchField $match @('id') + Version = if ($null -ne $installed) { $installed } else { "" } + Available = if ($null -ne $available) { $available } else { "" } + Source = if ($null -ne $sourceName) { $sourceName } else { "" } + } + } + + return @{ + Rows = @($rows) + ExitCode = $captured.ExitCode + Raw = $captured.Lines + Diagnostic = $null + } +} + +function Get-RowKey { + param([pscustomobject]$Row) + return "{0}|{1}" -f $Row.Id.ToLowerInvariant(), $Row.Source.ToLowerInvariant() +} + +function Test-NamesEquivalent { + param( + [Parameter(Mandatory = $true)] [string]$WingetName, + [Parameter(Mandatory = $true)] [string]$PingetName + ) + + if ($WingetName -eq $PingetName) { return $true } + + # winget truncates the Name column at its fixed display width and marks + # the cut with a U+2026 horizontal ellipsis. That's a rendering artifact + # of the table layout, not a real difference between the two tools — if + # the un-truncated pinget name starts with the visible prefix, treat as + # identical so the cosmetic-diff bucket only flags substantive + # rendering disagreements (resource-string placeholders, marketing + # version vs raw ARP, etc.). + $ellipsis = [char]0x2026 + if ($WingetName.EndsWith($ellipsis)) { + $prefix = $WingetName.Substring(0, $WingetName.Length - 1) + if ($PingetName.StartsWith($prefix)) { return $true } + } + return $false +} + +function Compare-UpgradeRows { + param( + [Parameter(Mandatory = $true)] [object[]]$WingetRows, + [Parameter(Mandatory = $true)] [object[]]$PingetRows + ) + + $wingetByKey = @{} + foreach ($row in $WingetRows) { $wingetByKey[(Get-RowKey -Row $row)] = $row } + + $pingetByKey = @{} + foreach ($row in $PingetRows) { $pingetByKey[(Get-RowKey -Row $row)] = $row } + + $missing = New-Object System.Collections.Generic.List[object] + $extra = New-Object System.Collections.Generic.List[object] + $versionDiff = New-Object System.Collections.Generic.List[object] + $cosmeticOnly = New-Object System.Collections.Generic.List[object] + $matching = New-Object System.Collections.Generic.List[object] + + foreach ($key in $wingetByKey.Keys) { + $w = $wingetByKey[$key] + if (-not $pingetByKey.ContainsKey($key)) { + $missing.Add($w) | Out-Null + continue + } + $p = $pingetByKey[$key] + if ($w.Version -ne $p.Version -or $w.Available -ne $p.Available) { + $versionDiff.Add([pscustomobject]@{ + Id = $w.Id + Source = $w.Source + WingetVersion = $w.Version + PingetVersion = $p.Version + WingetAvailable = $w.Available + PingetAvailable = $p.Available + }) | Out-Null + } elseif ((Test-NamesEquivalent -WingetName $w.Name -PingetName $p.Name)) { + $matching.Add($w) | Out-Null + } elseif ($w.Name -ne $p.Name) { + # Same id, same versions, but rendered Name disagrees. Almost + # always cosmetic (resource-string MSIX display, marketing + # version in JetBrains DisplayName, column truncation). Surface + # for visibility but don't count toward the failure verdict. + $cosmeticOnly.Add([pscustomobject]@{ + Id = $w.Id + Source = $w.Source + WingetName = $w.Name + PingetName = $p.Name + }) | Out-Null + } else { + $matching.Add($w) | Out-Null + } + } + + foreach ($key in $pingetByKey.Keys) { + if (-not $wingetByKey.ContainsKey($key)) { + $extra.Add($pingetByKey[$key]) | Out-Null + } + } + + return [pscustomobject]@{ + Missing = $missing.ToArray() + Extra = $extra.ToArray() + VersionDiff = $versionDiff.ToArray() + CosmeticOnly = $cosmeticOnly.ToArray() + Matching = $matching.ToArray() + } +} + +function Get-MachineSnapshot { + param( + [string]$Pinget, + [string]$Winget + ) + + $wingetVersion = try { (& $Winget --version 2>&1 | Select-Object -First 1) } catch { "unavailable" } + $pingetVersion = try { (& $Pinget --version 2>&1 | Select-Object -First 1) } catch { "unavailable" } + + [pscustomobject]@{ + CapturedAt = (Get-Date).ToUniversalTime().ToString("o") + OSVersion = [System.Environment]::OSVersion.VersionString + OSArchitecture = [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture.ToString() + ProcessArchitecture = [System.Runtime.InteropServices.RuntimeInformation]::ProcessArchitecture.ToString() + WingetVersion = "$wingetVersion".Trim() + PingetVersion = "$pingetVersion".Trim() + } +} + +function Write-Section { + param([string]$Title) + Write-Host "" + Write-Host ("=" * 80) + Write-Host $Title + Write-Host ("=" * 80) +} + +function Format-Row { + param([pscustomobject]$Row) + return "{0,-40} {1,-22} -> {2,-22} [{3}]" -f $Row.Id, $Row.Version, $Row.Available, $Row.Source +} + +# --- main ----------------------------------------------------------------- + +Assert-Path -Path $Pinget -Description "pinget binary" + +if ($UpdateSources) { + Write-Host "Updating sources on both tools..." + & $Winget source update --accept-source-agreements --disable-interactivity | Out-Null + & $Pinget source update | Out-Null +} + +$machine = Get-MachineSnapshot -Pinget $Pinget -Winget $Winget +Write-Section "Machine" +$machine | Format-List | Out-String | Write-Host + +Write-Section "Capturing winget upgrade" +$wingetResult = Get-WingetUpgradeRows -Executable $Winget -IncludeUnknown:$IncludeUnknown +Write-Host ("winget exit={0}, parsed {1} row(s)" -f $wingetResult.ExitCode, $wingetResult.Rows.Count) +if ($wingetResult.Diagnostic) { + Write-Warning $wingetResult.Diagnostic +} + +Write-Section "Capturing pinget upgrade" +$pingetResult = Get-PingetUpgradeRows -Executable $Pinget -IncludeUnknown:$IncludeUnknown +Write-Host ("pinget exit={0}, parsed {1} row(s)" -f $pingetResult.ExitCode, $pingetResult.Rows.Count) +if ($pingetResult.Diagnostic) { + Write-Warning $pingetResult.Diagnostic +} + +$diff = Compare-UpgradeRows -WingetRows $wingetResult.Rows -PingetRows $pingetResult.Rows + +Write-Section "Verdict" +Write-Host ("Matching rows : {0}" -f $diff.Matching.Count) +Write-Host ("Missing : {0} (winget reports, pinget does not)" -f $diff.Missing.Count) +Write-Host ("Extra : {0} (pinget reports, winget does not)" -f $diff.Extra.Count) +Write-Host ("Version diff : {0} (both report; versions disagree)" -f $diff.VersionDiff.Count) +Write-Host ("Cosmetic only : {0} (same id/versions, different display name)" -f $diff.CosmeticOnly.Count) + +if ($diff.Missing.Count -gt 0) { + Write-Section "MISSING — winget shows, pinget does not" + $diff.Missing | ForEach-Object { Write-Host (Format-Row -Row $_) } +} +if ($diff.Extra.Count -gt 0) { + Write-Section "EXTRA — pinget shows, winget does not" + $diff.Extra | ForEach-Object { Write-Host (Format-Row -Row $_) } +} +if ($diff.VersionDiff.Count -gt 0) { + Write-Section "VERSION MISMATCH — same id, different versions" + foreach ($entry in $diff.VersionDiff) { + Write-Host ("{0} [{1}]" -f $entry.Id, $entry.Source) + Write-Host (" winget: {0,-22} -> {1}" -f $entry.WingetVersion, $entry.WingetAvailable) + Write-Host (" pinget: {0,-22} -> {1}" -f $entry.PingetVersion, $entry.PingetAvailable) + } +} +if ($diff.CosmeticOnly.Count -gt 0) { + Write-Section "COSMETIC — same id/versions, different display name" + foreach ($entry in $diff.CosmeticOnly) { + Write-Host ("{0} [{1}]" -f $entry.Id, $entry.Source) + Write-Host (" winget: {0}" -f $entry.WingetName) + Write-Host (" pinget: {0}" -f $entry.PingetName) + } +} + +$failCount = $diff.Missing.Count + $diff.Extra.Count + $diff.VersionDiff.Count +# Parser diagnostics (broken winget table layout, malformed pinget JSON) +# must be treated as failures — otherwise the harness reports a spurious +# PASS the day winget's column headers change. +$parserBroken = [bool]$wingetResult.Diagnostic -or [bool]$pingetResult.Diagnostic +$verdict = if ($failCount -eq 0 -and -not $parserBroken) { "PASS" } else { "FAIL" } + +Write-Section "Result: $verdict" + +if ($FixturePath) { + $fixture = [ordered]@{ + schema = "pinget-parity/upgrade/v1" + machine = $machine + invocation = [ordered]@{ + pinget = (Resolve-Path -LiteralPath $Pinget -ErrorAction SilentlyContinue)?.Path + winget = $Winget + includeUnknown = [bool]$IncludeUnknown + updateSources = [bool]$UpdateSources + } + winget = [ordered]@{ + exitCode = $wingetResult.ExitCode + rows = $wingetResult.Rows + raw = $wingetResult.Raw + } + pinget = [ordered]@{ + exitCode = $pingetResult.ExitCode + rows = $pingetResult.Rows + raw = $pingetResult.Raw + } + diff = [ordered]@{ + matchingCount = $diff.Matching.Count + missing = $diff.Missing + extra = $diff.Extra + versionDiff = $diff.VersionDiff + cosmeticOnly = $diff.CosmeticOnly + } + verdict = $verdict + } + $fixture | ConvertTo-Json -Depth 32 | Set-Content -Path $FixturePath -Encoding utf8 + Write-Host "Fixture written to: $FixturePath" +} + +if ($FailOnDiff -and $verdict -ne "PASS") { + exit 1 +}