# PkgEvalAnalysis

Latest pkgeval: https://s3.amazonaws.com/julialang-reports/nanosoldier/pkgeval/by_hash/0520b80_vs_997b49f/report.html

In [1]:
using DataFrames, Feather
using JuliaRegistryAnalysis
using Graphs, MetaGraphs
using Downloads

include("add_back_logs.jl")

[32m[1mPrecompiling[22m[39m DataFrames
[32m  ✓ [39m[90mInlineStrings[39m
[32m  ✓ [39m[90mStringManipulation[39m
[32m  ✓ [39m[90mPrettyTables[39m
[32m  ✓ [39mDataFrames
  4 dependencies successfully precompiled in 24 seconds. 23 already precompiled.
[32m[1mPrecompiling[22m[39m Feather
[32m  ✓ [39m[90mCategoricalArrays → CategoricalArraysJSONExt[39m
[32m  ✓ [39mFeather
  2 dependencies successfully precompiled in 2 seconds. 35 already precompiled.
[32m[1mPrecompiling[22m[39m JuliaRegistryAnalysis
[32m  ✓ [39m[90mStaticArrays[39m
[32m  ✓ [39m[90mStaticArrays → StaticArraysStatisticsExt[39m
[32m  ✓ [39m[90mArnoldiMethod[39m
[32m  ✓ [39m[90mJLD2[39m
[32m  ✓ [39mGraphs
[32m  ✓ [39mMetaGraphs
[32m  ✓ [39mJuliaRegistryAnalysis
  7 dependencies successfully precompiled in 9 seconds. 21 already precompiled.


download_logs (generic function with 1 method)

In [2]:
baseline = "0520b80"
vs       = "997b49f"

data_dir = joinpath("data_$(baseline)_vs_$(vs)")
if !isdir(data_dir)
    url = "https://github.com/JuliaCI/NanosoldierReports/blob/master/pkgeval/by_hash/$(baseline)_vs_$(vs)/data.tar.zst?raw=true"
    file = Downloads.download(url)
    mkdir(data_dir)
    mv(file, joinpath(data_dir, "$(baseline)_vs_$(vs).tar.zst"))
    cd(data_dir) do
        run(`tar -xf $(baseline)_vs_$(vs).tar.zst`)
    end
end

# The data in the data.tar.zst do not contain the actual logs so we have to download these
if !isfile(joinpath(data_dir, "primary_log.feather"))
    add_back_logs(data_dir)
end
primary  = Feather.read(joinpath(@__DIR__, data_dir, "primary_log.feather"));
against  = Feather.read(joinpath(@__DIR__, data_dir, "against_log.feather"));

In [3]:
# We sort packages according to how many transitive dependencies they have,
graph = JuliaRegistryAnalysis.dependency_graph(; include=(name, uuid) -> true)
name_to_vertex = Dict{String, Int}(get_prop(graph, i, :label) => i for i in 1:nv(graph));

const trans = transitiveclosure(graph.graph)

sortby = x -> begin
    v = get(name_to_vertex, x, 0)
    v == 0 && return 0
    return Graphs.indegree(trans, v)
end
    
primary = sort(primary, order(:package, by=sortby, rev=true))
against = sort(against, order(:package, by=sortby, rev=true))
nothing

dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
dep_uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
dep_uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
dep_uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
dep_uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
dep_uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"


In [25]:
# This is a list of different categories of failures.
# Some are common between different Julia version upgrades
# and some are "special" for this specific upgrade.

@enum FailureType begin
    # General ones
    Unknown
    InferredFailure
    PrintingChange
    TestAmbiguities
    NewAmbiguity
    ApproxError
    DocTest
    NewExceptionType
    DownloadError
    UnexpectedPass
    BadBoy
    SyntaxError
    VisualRegression
    MissingDep
    BuildError
    UnsatReq
    Belapsed
    StderrCheck
    TypeInferenceError
    
    # From PkgEval
    MissingDependency
    Inactivity
    MissingBinary
    Untestable
    LogLimit
    TimeLimit
    Syntax
    
    # Specific
    TokenizeConvert
    TriangularNTypeParams
    CSVReg
    DocMacro
    ConversionToPointer
    
    # Assertions
    PHINodesGrouped
    CTXSSAValueAssigned
    MethodAddedWhenDeleted
    CycleDepth
    IfElseInvalidIR
    MalformedIsdefined
    BadSignatureEnzyme
    GCPreserveEnd
    NewObjMethInst
    IJLTypesEqual
    InfStackOverflow
    CodeInstMaxWorld

    # Segfaults
    Vload
end


In [5]:
# Join the primary and against data
package_results = leftjoin(primary, against, on=:package, makeunique=true, indicator=:source);

# Only keep packages that started to fail in the new version
fails = filter(test->test.source == "both" &&
                 test.status != test.status_1 &&
                 test.status in (":fail", ":kill", ":crash"), package_results)


missing_binary_idx     = fails.reason .== ":binary_dependency"
inactivity_idx         = fails.reason .== ":inactivity"
missing_dependency_idx = fails.reason .== ":missing_dependency"
untestable_idx         = fails.reason .== ":untestable"
log_limit_idx          = fails.reason .== ":log_limit"
time_limit_idx         = fails.reason .== ":time_limit"

# Initially we do not know why a package failed
fails.why = fill(Unknown, size(fails, 1));

fails.why[missing_binary_idx]     .= MissingBinary
fails.why[inactivity_idx]         .= Inactivity
fails.why[missing_dependency_idx] .= MissingDependency
fails.why[untestable_idx]         .= Untestable
fails.why[log_limit_idx]          .= LogLimit
fails.why[time_limit_idx]         .= TimeLimit

nothing

In [6]:
# Some utility functions

query(fails, s) = sort(filter(row -> occursin(s, row[:log]), fails))

function update_reason!(fails, needle, why)
    idxs = findall(row -> occursin(needle, row), fails.log)
    fails.why[idxs] .= why
    return fails
end

total_unknown(fails) = count(x -> x.why == Unknown, eachrow(fails))

total_unknown (generic function with 1 method)

In [24]:
query(fails, "WORLD_AGE_REVALIDATION").package

2-element Vector{String}:
 "GeoParquet"
 "Parquet2"

In [26]:
# Here we pattern match certain test errors and categorize them based on that.

# Generic ones
update_reason!(fails, "Test.detect_ambiguities", TestAmbiguities)
update_reason!(fails, "detect_ambiguities(", TestAmbiguities)

update_reason!(fails, "Expression: all_doctests()", DocTest)
update_reason!(fails, "Error: doctest failure in ", DocTest)

update_reason!(fails, "does not match inferred return type", InferredFailure);
update_reason!(fails, "Expression: isapprox", ApproxError)
update_reason!(fails, r"Expression: (\S*) ≈ (\S*)", ApproxError) 
update_reason!(fails, "Expression: ≈(", ApproxError)
    
update_reason!(fails, "is ambiguous. Candidates:", NewAmbiguity)
update_reason!(fails, " ambiguities found", NewAmbiguity)

update_reason!(fails, "Unsatisfiable requirements detected for package", UnsatReq)



update_reason!(fails, "Log Test Failed at", PrintingChange)
update_reason!(fails, "Expression: occursin(r\"", PrintingChange)
update_reason!(fails, "Expression: startswith(", PrintingChange)
# update_reason!(fails, r"Expression: (\N.*?) == ", PrintingChange)
update_reason!(fails, "Evaluated: occursin(", PrintingChange)
update_reason!(fails, "Evaluated: endswith(", PrintingChange)
update_reason!(fails, "- DIFF ------------------------", PrintingChange)
update_reason!(fails, "LoadError: syntax", SyntaxError)

update_reason!(fails, "Image did not match reference image", VisualRegression)

update_reason!(fails, "      Thrown: ", NewExceptionType)
update_reason!(fails, "The requested URL returned error", DownloadError)
update_reason!(fails, "gzip: stdin: not in gzip format", DownloadError)
update_reason!(fails, "Unexpected Pass", UnexpectedPass)

update_reason!(fails, "Error building ", BuildError)
update_reason!(fails, "isempty(stderr_content)", StderrCheck)

update_reason!(fails, "isempty(stderr_content)", StderrCheck)

update_reason!(fails, "fatal error in type inference", TypeInferenceError)


# Specific ones for this release

# Assertions
update_reason!(fails, "!ctx.ssavalue_assigned.at", CTXSSAValueAssigned)
update_reason!(fails, "PHI nodes not grouped at top of basic block", PHINodesGrouped)
update_reason!(fails, "method cannot be added at the same time as method deleted", MethodAddedWhenDeleted)
update_reason!(fails, "cycle == depth", CycleDepth)
update_reason!(fails, "select i1 %ifelse", IfElseInvalidIR)
update_reason!(fails, "malformed isdefined expression", MalformedIsdefined)
update_reason!(fails, "Calling a function with bad signature", BadSignatureEnzyme)
update_reason!(fails, "llvm.julia.gc_preserve_end", GCPreserveEnd)
update_reason!(fails, "newobj == (jl_method_instance_t", NewObjMethInst)
update_reason!(fails, "!ijl_types_equal(mi->specType", IJLTypesEqual)
update_reason!(fails, "stack overflow in type inference", InfStackOverflow)
update_reason!(fails, "This might be caused by recursion over very long", InfStackOverflow)
update_reason!(fails, "(&codeinst->max_world) == WORLD_AGE_REVALIDATION_SENTINEL", CodeInstMaxWorld)

# Segfaults
update_reason!(fails, "__vload at", Vload)

    


# Specific
update_reason!(fails, "Tokenize.Lexers.Lexer{Base.GenericIOBuffer{Memory{UInt8}}", TokenizeConvert)
update_reason!(fails, "too many parameters for type AbstractTriangular", TriangularNTypeParams)
update_reason!(fails, "TypeError: in typeassert, expected Tuple{Vector{UInt8}, Int64", CSVReg)
update_reason!(fails, "@doc(DocFlag1)) == ", DocMacro)
update_reason!(fails, "conversion to pointer not defined for", ConversionToPointer)

 



nothing

In [35]:
# Packages that have gotten some attention but haven't made a new version that would
# remove the package from the PkgEval list

issues_opened = [
    "LoopFieldCalc" # https://github.com/JuliaLang/julia/issues/53585
    "Elliptic" # https://github.com/JuliaLang/julia/issues/53585
    "ScanByte" # https://github.com/jakobnissen/ScanByte.jl/issues/11
    "StaticCompiler" # https://github.com/tshort/StaticCompiler.jl/issues/155
    "StaticTools" # https://github.com/tshort/StaticCompiler.jl/issues/155
    "DataStructures" # https://github.com/JuliaLang/julia/issues/53590
    "InvertedIndices" # https://github.com/JuliaLang/julia/issues/53591
]

likely_tol = [
]

fixed = [
    "OpenSSL" #https://github.com/JuliaWeb/OpenSSL.jl/pull/33
    "Unitful" # https://github.com/JuliaLang/julia/issues/53582
    "CloseOpenIntervals" # https://github.com/JuliaSIMD/CloseOpenIntervals.jl/issues/16
    "ArrayLayouts" # https://github.com/JuliaLinearAlgebra/ArrayLayouts.jl/issues/204
    "PDMats" # https://github.com/JuliaLang/julia/issues/53583
    "ProgressMeter" # fixed on master
]

ignored_packages = [
]

Any[]

In [36]:
# Total package failures that we haven't categorized

total_unknown(fails)

502

In [37]:
# Unknown failures, these are packages where we haven't yet managed to categorize their failure

filter(x -> x.why == Unknown && 
       !(x.package in issues_opened) && 
       !(x.package in likely_tol) && 
       !(x.package in ignored_packages) && 
       !(x.package in fixed) 
      # && x.status == ":crash", 
    , fails).package

494-element Vector{String}:
 "Preferences"
 "REPL"
 "ConstructionBase"
 "InvertedIndices"
 "StaticArrayInterface"
 "Accessors"
 "LLVM"
 "KernelAbstractions"
 "StructTypes"
 "Transducers"
 "ImageCore"
 "RandomNumbers"
 "PreallocationTools"
 ⋮
 "ParameterEstimocean"
 "Vahana"
 "StructuredLight"
 "MDToolbox"
 "MixedModelsMakie"
 "MCPhylo"
 "AlphaZero"
 "MRIsim"
 "PsychExpAPIs"
 "Turkie"
 "MCPTrajectoryGameSolver"
 "UnfoldCDL"

In [33]:
fail_type = filter(x -> x.why == TokenizeConvert,  
      fails).package

print(join(fail_type, '\n'))

BenchmarkTools
Graphs
UnitfulLatexify
StochasticDiffEq
Sundials
TableShowUtils
MicroMamba
CondaPkg
Vega
ChunkSplitters
VegaLite
DataPipes
MetaGraphsNext
Catalyst
GenieSession
GenieSessionFileSession
DynamicalSystemsBase
TestItemRunner
CellListMap
Stipple
Mimi
ConstraintCommons
PDBTools
PatternFolds
QuantumPropagators
WilliamsonTransforms
ConstraintDomains
Copulas
ImplicitDifferentiation
CompositionalNetworks
ParameterizedFunctions
XAIBase
JSONRPC
StaticLint
ExcelReaders
DirectionalStatistics
TimeStruct
KomaMRIBase
Constraints
StippleUI
TestItemDetection
PyFormattedStrings
RectiGrids
LocalSearchSolvers
StipplePlotly
OptimizationMOI
Skipper
DateFormats
SPGBox
InterferometricModels
LanguageServer
GeniePlugins
GeniePackageManager
GenieDevTools
ReactionNetworkImporters
SentinelViews
UnionCollections
Countries
WannierIO
SauterSchwabQuadrature
DataManipulation
VOTables
QUBOConstraints
TransitionMatrices
KomaMRIFiles
EasyFit
GenieCache
CBLS
GenieFramework
KomaMRIPlots
KomaMRICore
MriResearchTo

In [22]:
# Categorize

z = []
for i in instances(FailureType)
    n = count(row -> row.why == i, eachrow(fails))
    n > 0 && push!(z, (n, i))
end
sort!(z; rev=true)

24-element Vector{Any}:
 (505, Unknown)
 (349, CSVReg)
 (331, TimeLimit)
 (154, TokenizeConvert)
 (57, TriangularNTypeParams)
 (50, ConversionToPointer)
 (40, ApproxError)
 (36, Inactivity)
 (9, NewExceptionType)
 (9, PrintingChange)
 (8, NewAmbiguity)
 (5, LogLimit)
 (5, UnsatReq)
 (5, VisualRegression)
 (4, TestAmbiguities)
 (2, InfStackOverflow)
 (2, GCPreserveEnd)
 (2, UnexpectedPass)
 (2, DocTest)
 (2, InferredFailure)
 (1, DocMacro)
 (1, MissingBinary)
 (1, StderrCheck)
 (1, BuildError)