# PkgEvalAnalysis

Latest pkgeval: https://s3.amazonaws.com/julialang-reports/nanosoldier/pkgeval/by_hash/9a67956_vs_17cfb8e/report.html

In [1]:
using DataFrames, Feather
using JuliaRegistryAnalysis
using Graphs, MetaGraphs
using Downloads

include("add_back_logs.jl")

download_logs (generic function with 1 method)

In [2]:
baseline = "c9a32f4"
vs = "e4ee485"


data_dir = joinpath("data_$(baseline)_vs_$(vs)")
if !isdir(data_dir)
    url = "https://github.com/JuliaCI/NanosoldierReports/blob/master/pkgeval/by_hash/$(baseline)_vs_$(vs)/data.tar.zst?raw=true"
    file = Downloads.download(url)
    mkdir(data_dir)
    mv(file, joinpath(data_dir, "$(baseline)_vs_$(vs).tar.zst"))
    cd(data_dir) do
        run(`tar -xf $(baseline)_vs_$(vs).tar.zst`)
    end
end

# The data in the data.tar.zst do not contain the actual logs so we have to download these
if !isfile(joinpath(data_dir, "primary_log.feather"))
    add_back_logs(data_dir)
end
primary  = Feather.read(joinpath(@__DIR__, data_dir, "primary_log.feather"));
against  = Feather.read(joinpath(@__DIR__, data_dir, "against_log.feather"));

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:27[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:25[39m


In [3]:
# We sort packages according to how many transitive dependencies they have
graph = JuliaRegistryAnalysis.dependency_graph(; include=(name, uuid) -> true)
name_to_vertex = Dict{String, Int}(get_prop(graph, i, :label) => i for i in 1:nv(graph));

const trans = transitiveclosure(graph.graph)

sortby = x -> begin
    v = get(name_to_vertex, x, 0)
    v == 0 && return 0
    return Graphs.indegree(trans, v)
end
    
primary = sort(primary, order(:package, by=sortby, rev=true))
against = sort(against, order(:package, by=sortby, rev=true))
nothing

In [106]:
# This is a list of different categories of failures.
# Some are common between different Julia version upgrades
# and some are "special" for this specific upgrade.

@enum FailureType begin
    # General ones
    Unknown
    InferredFailure
    PrintingChange
    TestAmbiguities
    NewAmbiguity
    ApproxError
    DocTest
    NewExceptionType
    DownloadError
    UnexpectedPass
    BadBoy
    SyntaxError
    VisualRegression
    MissingDep
    BuildError
    UnsatReq
    BSONDataTypeField
    Belapsed
    StderrCheck
    
    # From PkgEval
    MissingDependency
    Inactivity
    MissingBinary
    Untestable
    LogLimit
    TimeLimit
    Syntax
    
    # Assertions
    PHINodesGrouped
    CTXSSAValueAssigned
    MethodAddedWhenDeleted
    CycleDepth
    IfElseInvalidIR
    MalformedIsdefined
    BadSignatureEnzyme
    GCPreserveEnd
    NewObjMethInst
    IJLTypesEqual
    InfStackOverflow
end


In [26]:
# Join the primary and against data
package_results = leftjoin(primary, against, on=:package, makeunique=true, indicator=:source);

# Only keep packages that started to fail in the new version
fails = filter(test->test.source == "both" &&
                 test.status != test.status_1 &&
                 test.status in (":fail", ":kill", ":crash"), package_results)


missing_binary_idx     = fails.reason .== ":binary_dependency"
inactivity_idx         = fails.reason .== ":inactivity"
missing_dependency_idx = fails.reason .== ":missing_dependency"
untestable_idx         = fails.reason .== ":untestable"
log_limit_idx          = fails.reason .== ":log_limit"
time_limit_idx         = fails.reason .== ":time_limit"

# Initially we do not know why a package failed
fails.why = fill(Unknown, size(fails, 1));

fails.why[missing_binary_idx]     .= MissingBinary
fails.why[inactivity_idx]         .= Inactivity
fails.why[missing_dependency_idx] .= MissingDependency
fails.why[untestable_idx]         .= Untestable
fails.why[log_limit_idx]          .= LogLimit
fails.why[time_limit_idx]         .= TimeLimit

nothing

In [38]:
# Some utility functions

query(fails, s) = sort(filter(row -> occursin(s, row[:log]), fails))

function update_reason!(fails, needle, why)
    idxs = findall(row -> occursin(needle, row), fails.log)
    fails.why[idxs] .= why
    return fails
end

total_unknown(fails) = count(x -> x.why == Unknown, eachrow(fails))

total_unknown (generic function with 1 method)

In [96]:
query(fails, "!ijl_types_equal(mi->specTypes").package

1-element Vector{String}:
 "FourierFlows"

In [107]:
# Here we pattern match certain test errors and categorize them based on that.

# Generic ones
update_reason!(fails, "Test.detect_ambiguities", TestAmbiguities)
update_reason!(fails, "detect_ambiguities(", TestAmbiguities)

update_reason!(fails, "Expression: all_doctests()", DocTest)
update_reason!(fails, "Error: doctest failure in ", DocTest)

update_reason!(fails, "does not match inferred return type", InferredFailure);
update_reason!(fails, "Expression: isapprox", ApproxError)
update_reason!(fails, r"Expression: (\S*) ≈ (\S*)", ApproxError) 
update_reason!(fails, "Expression: ≈(", ApproxError)
    
update_reason!(fails, "is ambiguous. Candidates:", NewAmbiguity)
update_reason!(fails, " ambiguities found", NewAmbiguity)

update_reason!(fails, "Unsatisfiable requirements detected for package", UnsatReq)



update_reason!(fails, "Log Test Failed at", PrintingChange)
update_reason!(fails, "Expression: occursin(r\"", PrintingChange)
update_reason!(fails, "Expression: startswith(", PrintingChange)
# update_reason!(fails, r"Expression: (\N.*?) == ", PrintingChange)
update_reason!(fails, "Evaluated: occursin(", PrintingChange)
update_reason!(fails, "Evaluated: endswith(", PrintingChange)
update_reason!(fails, "- DIFF ------------------------", PrintingChange)
update_reason!(fails, "LoadError: syntax", SyntaxError)

update_reason!(fails, "Image did not match reference image", VisualRegression)

update_reason!(fails, "      Thrown: ", NewExceptionType)
update_reason!(fails, "The requested URL returned error", DownloadError)
update_reason!(fails, "gzip: stdin: not in gzip format", DownloadError)
update_reason!(fails, "Unexpected Pass", UnexpectedPass)

update_reason!(fails, "Error building ", BuildError)
update_reason!(fails, "isempty(stderr_content)", StderrCheck)

# Assertions


update_reason!(fails, "isempty(stderr_content)", StderrCheck)

# Specific ones for this release
update_reason!(fails, "!ctx.ssavalue_assigned.at", CTXSSAValueAssigned)
update_reason!(fails, "PHI nodes not grouped at top of basic block", PHINodesGrouped)
update_reason!(fails, "method cannot be added at the same time as method deleted", MethodAddedWhenDeleted)
update_reason!(fails, "cycle == depth", CycleDepth)
update_reason!(fails, "select i1 %ifelse", IfElseInvalidIR)
update_reason!(fails, "malformed isdefined expression", MalformedIsdefined)
update_reason!(fails, "Calling a function with bad signature", BadSignatureEnzyme)
update_reason!(fails, "llvm.julia.gc_preserve_end", GCPreserveEnd)
update_reason!(fails, "newobj == (jl_method_instance_t", NewObjMethInst)
update_reason!(fails, "!ijl_types_equal(mi->specType", IJLTypesEqual)
update_reason!(fails, "stack overflow in type inference", InfStackOverflow)


nothing

In [108]:
# Packages that have gotten some attention but haven't made a new version that would
# remove the package from the PkgEval list

issues_opened = [
    "Checkpointing" # https://github.com/JuliaGPU/GPUCompiler.jl/issues/361
    "CodeInfoTools.jl" # https://github.com/JuliaCompilerPlugins/CodeInfoTools.jl
    "CompatHelperLocal" # https://gitlab.com/aplavin/compathelperlocal.jl/-/issues/1
    "DocumentFunction" # https://github.com/madsjulia/DocumentFunction.jl/issues/6
    "DataStructures "# https://github.com/JuliaCollections/DataStructures.jl/pull/832
    "TightBindingApproximation" # https://github.com/JuliaLang/julia/issues/47476
    "CameraModels" # https://github.com/JuliaLang/julia/issues/48076
    "NewsLookout" # nonsense
    "Libtask" # internals
    "MaxwellSALT" # old version <zzz
    "Agents" # https://github.com/JuliaDynamics/Agents.jl/issues/733
    "GFlops" # https://github.com/triscale-innov/GFlops.jl/issues/42
    "Symbolics" # https://github.com/JuliaSymbolics/Symbolics.jl/issues/827
    "CrystalNets" # 
    "PDMats" # https://github.com/JuliaStats/PDMats.jl/pull/153/files#r1090770057
    "MethodAnalysis" # https://github.com/timholy/MethodAnalysis.jl/issues/36
]

likely_tol = [
];

fixed = [
    "DocStringExtensions" # https://github.com/JuliaDocs/DocStringExtensions.jl/pull/137
    "EndpointRanges" # 
    "AstroImages" # https://github.com/JuliaAstro/AstroImages.jl/pull/35
    "Deductive" # https://github.com/ctrekker/Deductive.jl/pull/28
    "UsingMerge" # master
    "JWAS" # https://github.com/reworkhow/JWAS.jl/pull/133
    "InMemoryDatasets" # https://github.com/sl-solution/InMemoryDatasets.jl/commit/528be734543a87786c53bba7c7dffd287e7b8f74
    "GenericArpack" # https://github.com/dgleich/GenericArpack.jl/pull/4
    "DataStructures" # https://github.com/JuliaCollections/DataStructures.jl/pull/832
    "Compat" # https://github.com/JuliaLang/Compat.jl/pull/786
    "Profile" # https://github.com/JuliaLang/julia/pull/48458
    "ExprTools" # https://github.com/invenia/ExprTools.jl/pull/36
    "NamedDims" # https://github.com/invenia/NamedDims.jl/pull/214
    "Distributions" # https://github.com/JuliaStats/Distributions.jl/pull/1671
]


ignored_packages = [
]

Any[]

In [109]:
# Total package failures that we haven't categorized

total_unknown(fails)

251

In [110]:
# Unknown failures, these are packages where we haven't yet managed to categorize their failure

filter(x -> x.why == Unknown && 
      # !(x.package in issues_opened) && 
      # !(x.package in likely_tol) && 
      # !(x.package in ignored_packages) && 
     #  !(x.package in fixed) &&
       x.status == ":crash", 
    fails).package

String[]

In [113]:
filter(x -> x.why == Unknown,  
      # !(x.package in issues_opened) && 
      # !(x.package in likely_tol) && 
      # !(x.package in ignored_packages) && 
     #  !(x.package in fixed), 
    fails).package

251-element Vector{String}:
 "MacroTools"
 "SnoopPrecompile"
 "FillArrays"
 "ColorTypes"
 "ExprTools"
 "InlineStrings"
 "StringManipulation"
 "SimpleTraits"
 "JLD2"
 "Transducers"
 "MuladdMacro"
 "AxisArrays"
 "ImageBase"
 ⋮
 "IRViz"
 "TORA"
 "IsDef"
 "PSSFSS"
 "TuringCallbacks"
 "ConstrainedDynamicsVis"
 "Stencils"
 "CharacteristicInvFourier"
 "NaiveGAflux"
 "vSmartMOM"
 "Scruff"
 "MuseInference"

In [111]:
# Categorize

z = []
for i in instances(FailureType)
    n = count(row -> row.why == i, eachrow(fails))
    n > 0 && push!(z, (n, i))
end
sort!(z; rev=true)

28-element Vector{Any}:
 (251, Unknown)
 (60, MissingBinary)
 (51, TimeLimit)
 (36, MethodAddedWhenDeleted)
 (14, ApproxError)
 (14, InferredFailure)
 (13, IfElseInvalidIR)
 (10, NewExceptionType)
 (10, DocTest)
 (7, UnsatReq)
 (6, BadSignatureEnzyme)
 (6, LogLimit)
 (5, CTXSSAValueAssigned)
 ⋮
 (3, PHINodesGrouped)
 (3, MissingDependency)
 (2, StderrCheck)
 (1, IJLTypesEqual)
 (1, NewObjMethInst)
 (1, MalformedIsdefined)
 (1, CycleDepth)
 (1, Inactivity)
 (1, BuildError)
 (1, SyntaxError)
 (1, UnexpectedPass)
 (1, NewAmbiguity)

16-element Vector{String}:
 "Dagger"
 "QuantEcon"
 "Try"
 "ParallelStencil"
 "AutomationLabsModelPredictiveControl"
 "ConcurrentUtils"
 "HarmonicBalance"
 "SyncBarriers"
 "LessUnitful"
 "ReportMetrics"
 "EQDSKReader"
 "Pigeons"
 "PreludeDicts"
 "TestFunctionRunner"
 "MPIMapReduce"
 "ConcurrentCollections"