# PkgEvalAnalysis

Latest pkgeval: https://s3.amazonaws.com/julialang-reports/nanosoldier/pkgeval/by_hash/c356e60_vs_bd47eca/report.html

In [1]:
using DataFrames, Feather
using JuliaRegistryAnalysis
using Graphs, MetaGraphs
using Downloads

include("add_back_logs.jl")

download_logs (generic function with 1 method)

In [2]:
baseline = "c356e60"
vs       = "bd47eca"

data_dir = joinpath("data_$(baseline)_vs_$(vs)")
if !isdir(data_dir)
    url = "https://github.com/JuliaCI/NanosoldierReports/blob/master/pkgeval/by_hash/$(baseline)_vs_$(vs)/data.tar.zst?raw=true"
    file = Downloads.download(url)
    mkdir(data_dir)
    mv(file, joinpath(data_dir, "$(baseline)_vs_$(vs).tar.zst"))
    cd(data_dir) do
        run(`tar -xf $(baseline)_vs_$(vs).tar.zst`)
    end
end

# The data in the data.tar.zst do not contain the actual logs so we have to download these
if !isfile(joinpath(data_dir, "primary_log.feather"))
    add_back_logs(data_dir)
end
primary  = Feather.read(joinpath(@__DIR__, data_dir, "primary_log.feather"));
against  = Feather.read(joinpath(@__DIR__, data_dir, "against_log.feather"));

In [3]:
# We sort packages according to how many transitive dependencies they have,
graph = JuliaRegistryAnalysis.dependency_graph(; include=(name, uuid) -> true)
name_to_vertex = Dict{String, Int}(get_prop(graph, i, :label) => i for i in 1:nv(graph));

const trans = transitiveclosure(graph.graph)

sortby = x -> begin
    v = get(name_to_vertex, x, 0)
    v == 0 && return 0
    return Graphs.indegree(trans, v)
end
    
primary = sort(primary, order(:package, by=sortby, rev=true))
against = sort(against, order(:package, by=sortby, rev=true))
nothing

dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
dep_uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
dep_uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
dep_uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
dep_uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
dep_uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
dep_uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
dep_uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
dep_uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"


In [43]:
# This is a list of different categories of failures.
# Some are common between different Julia version upgrades
# and some are "special" for this specific upgrade.

@enum FailureType begin
    # General ones
    Unknown
    InferredFailure
    PrintingChange
    TestAmbiguities
    NewAmbiguity
    ApproxError
    DocTest
    NewExceptionType
    DownloadError
    UnexpectedPass
    BadBoy
    SyntaxError
    VisualRegression
    MissingDep
    BuildError
    UnsatReq
    Belapsed
    StderrCheck
    TypeInferenceError
    
    # From PkgEval
    MissingDependency
    Inactivity
    MissingBinary
    Untestable
    LogLimit
    TimeLimit
    Syntax
    
    # Specific
    TokenizeConvert
    TriangularNTypeParams
    CSVReg
    DocMacro
    ConversionToPointer
    DocNothing
    ResizeMemory
    CompleteRemotePkg
    InferenceWorld
    WithNotDefined
    WithNotDefinedSuppressor
    OldCompat
    MakeSeedRandom
    JetTestsFailed
    WrapDef
    
    # Assertions
    PHINodesGrouped
    CTXSSAValueAssigned
    MethodAddedWhenDeleted
    CycleDepth
    IfElseInvalidIR
    MalformedIsdefined
    BadSignatureEnzyme
    GCPreserveEnd
    NewObjMethInst
    IJLTypesEqual
    InfStackOverflow
    CodeInstMaxWorld
    SubArrayConvert

    # Segfaults
    Vload
end




In [5]:
# Join the primary and against data
package_results = leftjoin(primary, against, on=:package, makeunique=true, indicator=:source);

# Only keep packages that started to fail in the new version
fails = filter(test->test.source == "both" &&
                 test.status != test.status_1 &&
                 test.status in (":fail", ":kill", ":crash"), package_results)


missing_binary_idx     = fails.reason .== ":binary_dependency"
inactivity_idx         = fails.reason .== ":inactivity"
missing_dependency_idx = fails.reason .== ":missing_dependency"
untestable_idx         = fails.reason .== ":untestable"
log_limit_idx          = fails.reason .== ":log_limit"
time_limit_idx         = fails.reason .== ":time_limit"

# Initially we do not know why a package failed
fails.why = fill(Unknown, size(fails, 1));

fails.why[missing_binary_idx]     .= MissingBinary
fails.why[inactivity_idx]         .= Inactivity
fails.why[missing_dependency_idx] .= MissingDependency
fails.why[untestable_idx]         .= Untestable
fails.why[log_limit_idx]          .= LogLimit
fails.why[time_limit_idx]         .= TimeLimit

nothing

In [6]:
# Some utility functions

query(fails, s) = sort(filter(row -> occursin(s, row[:log]), fails))

function update_reason!(fails, needle, why)
    idxs = findall(row -> occursin(needle, row), fails.log)
    fails.why[idxs] .= why
    return fails
end

total_unknown(fails) = count(x -> x.why == Unknown, eachrow(fails))

total_unknown (generic function with 1 method)

In [7]:
query(fails, "WORLD_AGE_REVALIDATION").package

String[]

In [44]:
# Here we pattern match certain test errors and categorize them based on that.

# Generic ones
update_reason!(fails, "Test.detect_ambiguities", TestAmbiguities)
update_reason!(fails, "detect_ambiguities(", TestAmbiguities)

update_reason!(fails, "Expression: all_doctests()", DocTest)
update_reason!(fails, "Error: doctest failure in ", DocTest)

update_reason!(fails, "does not match inferred return type", InferredFailure);
update_reason!(fails, "Expression: isapprox", ApproxError)
update_reason!(fails, r"Expression: (\S*) ≈ (\S*)", ApproxError) 
update_reason!(fails, "Expression: ≈(", ApproxError)
    
update_reason!(fails, "is ambiguous. Candidates:", NewAmbiguity)
update_reason!(fails, " ambiguities found", NewAmbiguity)

update_reason!(fails, "Unsatisfiable requirements detected for package", UnsatReq)



update_reason!(fails, "Log Test Failed at", PrintingChange)
update_reason!(fails, "Expression: occursin(r\"", PrintingChange)
update_reason!(fails, "Expression: startswith(", PrintingChange)
# update_reason!(fails, r"Expression: (\N.*?) == ", PrintingChange)
update_reason!(fails, "Evaluated: occursin(", PrintingChange)
update_reason!(fails, "Evaluated: endswith(", PrintingChange)
update_reason!(fails, "- DIFF ------------------------", PrintingChange)
update_reason!(fails, "LoadError: syntax", SyntaxError)

update_reason!(fails, "Image did not match reference image", VisualRegression)

update_reason!(fails, "      Thrown: ", NewExceptionType)
update_reason!(fails, "The requested URL returned error", DownloadError)
update_reason!(fails, "gzip: stdin: not in gzip format", DownloadError)
update_reason!(fails, "Unexpected Pass", UnexpectedPass)

update_reason!(fails, "Error building ", BuildError)
update_reason!(fails, "isempty(stderr_content)", StderrCheck)

update_reason!(fails, "isempty(stderr_content)", StderrCheck)

update_reason!(fails, "fatal error in type inference", TypeInferenceError)


# Specific ones for this release

# Assertions
update_reason!(fails, "!ctx.ssavalue_assigned.at", CTXSSAValueAssigned)
update_reason!(fails, "PHI nodes not grouped at top of basic block", PHINodesGrouped)
update_reason!(fails, "method cannot be added at the same time as method deleted", MethodAddedWhenDeleted)
update_reason!(fails, "cycle == depth", CycleDepth)
update_reason!(fails, "select i1 %ifelse", IfElseInvalidIR)
update_reason!(fails, "malformed isdefined expression", MalformedIsdefined)
update_reason!(fails, "Calling a function with bad signature", BadSignatureEnzyme)
update_reason!(fails, "llvm.julia.gc_preserve_end", GCPreserveEnd)
update_reason!(fails, "newobj == (jl_method_instance_t", NewObjMethInst)
update_reason!(fails, "!ijl_types_equal(mi->specType", IJLTypesEqual)
update_reason!(fails, "stack overflow in type inference", InfStackOverflow)
update_reason!(fails, "This might be caused by recursion over very long", InfStackOverflow)
update_reason!(fails, "(&codeinst->max_world) == WORLD_AGE_REVALIDATION_SENTINEL", CodeInstMaxWorld)

# Segfaults
update_reason!(fails, "__vload at", Vload)

    


# Specific
update_reason!(fails, "Tokenize.Lexers.Lexer{Base.GenericIOBuffer{Memory{UInt8}}", TokenizeConvert)
update_reason!(fails, "too many parameters for type AbstractTriangular", TriangularNTypeParams)
update_reason!(fails, "TypeError: in typeassert, expected Tuple{Vector{UInt8}, Int64", CSVReg)
update_reason!(fails, "@doc(DocFlag1)) == ", DocMacro)
update_reason!(fails, "conversion to pointer not defined for", ConversionToPointer)
update_reason!(fails, "=# @doc(", DocNothing)
update_reason!(fails, "no method matching doc(", DocNothing)
update_reason!(fails, "SubArray{UInt8,1,Memory{UInt8},Tuple{UnitRange{Int64}},true} to an object of type", SubArrayConvert)

update_reason!(fails, "no method matching resize!(::Memory{UInt8}, ::Int64)", ResizeMemory)
update_reason!(fails, "UndefVarError: `complete_remote_package` not defined in `Pkg.REPLMode`", CompleteRemotePkg)
update_reason!(fails, "get_inference_world(::Enzyme.Compiler.Interpr", InferenceWorld)
update_reason!(fails, "`@with` not defined in", WithNotDefined)
update_reason!(fails, "`@with` not defined in `Suppressor", WithNotDefinedSuppressor)
update_reason!(fails, "[34da2185] Compat v3.41.0", OldCompat)
update_reason!(fails, "`make_seed` not defined in `Random`", MakeSeedRandom)
update_reason!(fails, "JET-test failed at ", JetTestsFailed)
update_reason!(fails, "`wrap` not defined", JetTestsFailed)


nothing

In [48]:
# Packages that have gotten some attention but haven't made a new version that would
# remove the package from the PkgEval list

issues_opened = [
    "LoopFieldCalc" # https://github.com/JuliaLang/julia/issues/53585
    "Elliptic" # https://github.com/JuliaLang/julia/issues/53585
    "ScanByte" # https://github.com/jakobnissen/ScanByte.jl/issues/11
    "StaticCompiler" # https://github.com/tshort/StaticCompiler.jl/issues/155
    "StaticTools" # https://github.com/tshort/StaticCompiler.jl/issues/155
    "DataStructures" # https://github.com/JuliaLang/julia/issues/53590
    "InvertedIndices" # https://github.com/JuliaLang/julia/issues/53591
    "StructTypes" # https://github.com/JuliaData/StructTypes.jl/issues/102
    "HTTP" # https://github.com/JuliaWeb/HTTP.jl/commit/d21ae9448bcc6a0a4a3b2709bcae42edcc04b778
]

likely_tol = [
]

fixed = [
    "OpenSSL" #https://github.com/JuliaWeb/OpenSSL.jl/pull/33
    "Unitful" # https://github.com/JuliaLang/julia/issues/53582
    "CloseOpenIntervals" # https://github.com/JuliaSIMD/CloseOpenIntervals.jl/issues/16
    "ArrayLayouts" # https://github.com/JuliaLinearAlgebra/ArrayLayouts.jl/issues/204
    "PDMats" # https://github.com/JuliaLang/julia/issues/53583
    "ProgressMeter" # fixed on master
    "MaybeInPlace" # https://github.com/SciML/MaybeInplace.jl#fdsfds
    "Compose" # https://github.com/GiovineItalia/Compose.jl/pull/441
    "MaybeInplace" # https://github.com/SciML/MaybeInplace.jl/pull/6
    "Match" # https://github.com/JuliaServices/Match.jl/pull/100
]

ignored_packages = [
]

Any[]

In [49]:
# Total package failures that we haven't categorized

total_unknown(fails)

257

In [50]:
# Unknown failures, these are packages where we haven't yet managed to categorize their failure

fail_unknowns = filter(x -> x.why == Unknown && 
       !(x.package in issues_opened) && 
       !(x.package in likely_tol) && 
       !(x.package in ignored_packages) && 
       !(x.package in fixed) 
      # && x.status == ":crash", 
    , fails).package

print(join(fail_unknowns[1:50], '\n'))

REPL
ConstructionBase
StaticArrayInterface
LLVM
KernelAbstractions
RandomNumbers
RecursiveFactorization
FLoops
LLVMLoopInfo
MathTeXEngine
ImageMagick
LoweredCodeUtils
Revise
MultivariateStats
Sundials
NamedArrays
LazyGrids
Meshes
LazyBandedMatrices
ScopedValues
ContinuumArrays
IterationControl
FunctionProperties
Catlab
Metis
SemiseparableMatrices
MLJBalancing
PropertyDicts
GraphMakie
Aqua
VariantCallFormat
RegistryTools
CellListMap
LightOSM
QUBOTools
Legolas
TableTransforms
QuantumOpticsBase
CodecBGZF
ColPack
Parquet2
Variography
EasyJobsBase
Metaheuristics
ComplexityMeasures
GeoStatsFunctions
Gen
Umlaut
OpenTelemetryAPI
SimpleWorkflows

In [31]:
fail_type = filter(x -> x.why == MakeSeedRandom,  
      fails).package

print(join(fail_type, '\n'))

Knet
FixedEffectModels
JOLI
CUDAKernels
GeoStatsProcesses
ChowLiuTrees
MatrixEnsembles
GraphSignals
SimDNA
GeoStatsTransforms
GNSSDecoder
FluxExtra
QXContexts
FastaLoader
JetPack
ChemistryFeaturization
GeometricFlux
GreedyAlign
IPMeasures
GraphNetCore
SurrogatesFlux
PositionVelocityTime
QXTools
AtomGraphs
TopoChains
LegolasFlux
ChaChaCiphers
SymArrays
PlanktonIndividuals
SimSpread
StLifeIns
EvidentialFlux
FluxPrune
LiteQTL
SpinGlassPEPS
BVHFiles
TeneT
KnetNLPModels
TransformerBlocks
SMLMBoxer
GumbelSoftmax
AztecDiamonds
DeepUnfoldedCDLMotif
Swalbe
MetidaCu
Hopfields
OperatorLearning
KnetLayers
ImageQuilting
VisualizeMotifs
CDLmotif
MatrixMerge
Flux3D
SystemBenchmark
ConstraintLearning
RAPIDS
MOTIFs
Juice
HighDimPDE
YaoQX
SurrogatesMOE
GNSSReceiver
DiffRaster2D
CorrelationTrackers
GraphNets
BinomialSynapses
PPLM
MaterialReconstruction
TrillionDollarWords
MDToolbox
FastTabular
MRIsim
AlphaZero
NeuralGraphPDE
ParameterEstimocean
UnfoldCDL
TaijaPlotting

In [30]:
# Categorize

z = []
for i in instances(FailureType)
    n = count(row -> row.why == i, eachrow(fails))
    n > 0 && push!(z, (n, i))
end
sort!(z; rev=true)

31-element Vector{Any}:
 (265, Unknown)
 (90, TimeLimit)
 (77, MakeSeedRandom)
 (49, TriangularNTypeParams)
 (44, ApproxError)
 (26, WithNotDefinedSuppressor)
 (15, DocNothing)
 (14, SubArrayConvert)
 (11, CSVReg)
 (10, CompleteRemotePkg)
 (10, NewAmbiguity)
 (8, InferenceWorld)
 (7, NewExceptionType)
 ⋮
 (4, VisualRegression)
 (4, TestAmbiguities)
 (3, OldCompat)
 (3, BuildError)
 (2, InfStackOverflow)
 (2, GCPreserveEnd)
 (2, Inactivity)
 (1, LogLimit)
 (1, MissingDependency)
 (1, UnexpectedPass)
 (1, DocTest)
 (1, InferredFailure)