# PkgEvalAnalysis

Latest pkgeval: https://github.com/JuliaCI/NanosoldierReports/blob/master/pkgeval/by_hash/433b84b_vs_134c343/report.md

In [21]:
using DataFrames, Feather

In [22]:
@enum FailureType begin
    # General ones
    Unknown
    InferredFailure
    PrintingChange
    TestAmbiguities
    NewAmbiguity
    ApproxError
    DocTest
    NewExceptionType
    DownloadError
    UnexpectedPass
    BadBoy
    SyntaxError
    VisualRegression
    MissingDep
    BuildError
    UnsatReq
    BSONDataTypeField
    
    # Specific
    GeneratedASTImpure
    VarargType
    AtomicMacro
    InferenceStackOverFlow
    OldRevise
    DataTypeField
    IllegalSparseBuffers
    CxxWrap
    JlConcreteType
    CheckNonSingular
end


In [23]:
# Unpack the data unless it has already been unpacked

if !isdir("data")
    run(`tar -xvf data.tar.xz`)
end
primary  = Feather.read("data/primary.feather");
against = Feather.read("data/against.feather");

In [24]:
# Join the primary and against data

package_results = leftjoin(primary, against,
     on=:uuid, makeunique=true, indicator=:source);

In [25]:
# These are packages that started failing when the RNG stream changed.
# These might have other problems but oh well...

rng_fails = 
["ACTRModels"
,"ADCME"
,"ARCHModels"
,"AbstractMCMC"
,"AdaptiveResonance"
,"Agents"
,"AllanDeviations"
,"Alpine"
,"ApproxBayes"
,"Arpack"
,"BandedMatrices"
,"BasicBSpline"
,"BasicInterpolators"
,"BayesNets"
,"BayesianExperiments"
,"CIAOAlgorithms"
,"CalculatedABC"
,"CalibrationTests"
,"Cassette"
,"CausalInference"
,"ClinicalTrialUtilities"
,"ConsistencyResampling"
,"Convex"
,"Convex1d"
,"CoordinateDescent"
,"Cropbox"
,"CrossEntropyMethod"
,"CumulantsFeatures"
,"DIVAnd"
,"DatagenCopulaBased"
,"Deconvolution"
,"DensityRatioEstimation"
,"DifferentialEvolutionMCMC"
,"DiscreteEvents"
,"DisjointCliqueCover"
,"Distributions"
,"DynamicHMC"
,"EMpht"
,"Equations"
,"Evolutionary"
,"ExactWrightFisher"
,"Extremes"
,"FINUFFT"
,"Faker"
,"Ferrite"
,"FeynmanKacParticleFilters"
,"ForwardDiff"
,"Fredholm"
,"FunctionWrappers"
,"GEEBRA"
,"GLM"
,"GSL"
,"GaussianFilters"
,"GaussianProcesses"
,"GeoEstimation"
,"GeoSimulation"
,"GeoStatsBase"
,"GeoTables"
,"HomotopyContinuation"
,"HypothesisTests"
,"IRKGaussLegendre"
,"ImageFeatures"
,"Infinity"
,"InterpolatedPDFs"
,"InvariantMeasures"
,"IterativeSolvers"
,"KLDivergences"
,"Kalman"
,"KissABC"
,"Knet"
,"LSHFunctions"
,"LorentzVectors"
,"LowLevelParticleFilters"
,"LowRankApprox"
,"LsqFit"
,"Luxor"
,"MCHammer"
,"MLDataPattern"
,"MLJScientificTypes"
,"MagnitudeDistributions"
,"MathOptSetDistances"
,"MatrixPencils"
,"MaximumLikelihoodProblems"
,"Meshes"
,"MinimalRLCore"
,"ModelingToolkit"
,"MonteCarloMeasurements"
,"NMF"
,"NaiveBayes"
,"NeXLMatrixCorrection"
,"Neighborhood"
,"NiLang"
,"Noise"
,"NonUniformRandomVariateGeneration"
,"NumericalAlgorithms"
,"OnlineStats"
,"OpenQuantumBase"
,"OpenQuantumSystems"
,"Optim"
,"PDSampler"
,"Perceptrons"
,"PermutationGroups"
,"Petri"
,"Photometry"
,"Plots"
,"PointProcessInference"
,"Poltergeist"
,"QuantumOpticsBase"
,"QuartetNetworkGoodnessFit"
,"RandomExtensions"
,"RecursiveFactorization"
,"ResettableStacks"
,"RiemannTheta"
,"RobotDynamics"
,"RobustAdaptiveMetropolisSampler"
,"SDDP"
,"ScatteredInterpolation"
,"ScoreDrivenModels"
,"SequentialSamplingModels"
,"SigmaRidgeRegression"
,"Simulate"
,"SpatialEcology"
,"StatsBase"
,"StochasticOptimalTransport"
,"StratiGraphics"
,"StressTest"
,"SymmetricTensors"
,"TabularMakie"
,"TexTables"
,"UncertaintyQuantification"
,"ValueOrientedRiskManagementInsurance"
,"Variography"
,"VoronoiCells"
,"VoronoiDelaunay"
,"WoodburyMatrices"
,"ZigZagBoomerang"
,""
,"Bukdu"
,"CorrelationFunctions"
,"FWFTables"
,"H3"
,"Jive"
,"LOLTools"
,"Millboard"
,"XUnit"
,""
,"DSP"
,"DataInterpolations"
,"NMRTools"
,"NearestNeighborDescent"
,"OptimalTransport"
,"Pitaya"
,"SCIP "
,"Tar "
,""
,"JetPackWaveFD"
,""
,""
,"AbstractOperators"
,"BayesianLinearRegression"
,"Controlz"
,"FilesystemDatastructures"
,"GAlgebra"
,"GeostatInversion"
,"IncompleteLU"
,"JuMP"
,"MCMCDiagnostics"
,"MixedModelsSim"
,"Nabla"
,"PLCTag"
,"PProf"
,"PSDMatrices"
,"PencilArrays"
,"PowerModels"
,"QuadEig"
,"RandomMatrices"
,"Reactive"
,"RemoveLFS"
,"Shapley"
,"SpatialJackknife"
,"TextClassification"] |> Set;

In [26]:
# Filter out packages that started failing but didn't start failing when the RNG stream changed

fails = filter(test->test.source == "both" &&
                 test.status != test.status_1 &&
                 !(test.name in rng_fails) &&
                 test.status in (":fail", ":kill"), package_results)

# Initially we do not know why a package failed
fails.why = fill(Unknown, size(fails, 1));

In [27]:
# Some utility functions

query(fails, s) = filter(row -> occursin(s, row[:log]), fails)    

function update_reason!(fails, needle, why)
    idxs = findall(row -> occursin(needle, row), fails.log)
    fails.why[idxs] .= why
    return fails
end

total_unknown(fails) = count(x -> x.why == Unknown, eachrow(fails))

total_unknown (generic function with 1 method)

In [28]:
query(fails, "Evaluated: 16.137131899747356 ≈ 23.41850406193619 (atol=0.005, rtol=0.0)")

Unnamed: 0_level_0,julia,name,uuid,version,status,reason,duration,log,julia_1,name_1
Unnamed: 0_level_1,String,String,String,String,String,String,Float64,String?,String?,String?


In [29]:
# Here we pattern match certain test errors and categorize them based on that.

# Generic ones
update_reason!(fails, "Test.detect_ambiguities", TestAmbiguities)
update_reason!(fails, "detect_ambiguities(", TestAmbiguities)

update_reason!(fails, "Expression: all_doctests()", DocTest)
update_reason!(fails, "Error: doctest failure in ", DocTest)

update_reason!(fails, "does not match inferred return type", InferredFailure);
update_reason!(fails, "Expression: isapprox", ApproxError)
update_reason!(fails, r"Expression: (\S*) ≈ (\S*)", ApproxError) 
update_reason!(fails, "Expression: ≈(", ApproxError)
    
update_reason!(fails, "is ambiguous. Candidates:", NewAmbiguity)
update_reason!(fails, " ambiguities found", NewAmbiguity)

update_reason!(fails, "Unsatisfiable requirements detected for package", UnsatReq)



update_reason!(fails, "Evaluated: \"", PrintingChange)
update_reason!(fails, "Log Test Failed at", PrintingChange)
update_reason!(fails, "Expression: occursin(r\"", PrintingChange)
update_reason!(fails, "Expression: startswith(", PrintingChange)
update_reason!(fails, r"Expression: (\N.*?) == ", PrintingChange)
update_reason!(fails, "Evaluated: occursin(", PrintingChange)
update_reason!(fails, "Evaluated: endswith(", PrintingChange)
update_reason!(fails, "- DIFF ------------------------", PrintingChange)
update_reason!(fails, "LoadError: syntax", SyntaxError)

update_reason!(fails, "Image did not match reference image", VisualRegression)

update_reason!(fails, "      Thrown: ", NewExceptionType)
update_reason!(fails, "The requested URL returned error", DownloadError)
update_reason!(fails, "gzip: stdin: not in gzip format", DownloadError)
update_reason!(fails, "Unexpected Pass", UnexpectedPass)

update_reason!(fails, "Error building ", BuildError)



# Specific ones for this release




update_reason!(fails, "Illegal buffers for SparseMatrixCSC construction", IllegalSparseBuffers)
update_reason!(fails, "The function body AST defined by ", GeneratedASTImpure)

update_reason!(fails, "DataType has no field", DataTypeField)
update_reason!(fails, "type DataType has no field ninitialized", DataTypeField)
update_reason!(fails, "@ BSON ~/.julia/packages/BSON/aEqHo/src/write.jl:46", BSONDataTypeField)

update_reason!(fails, "TypeError: in <:, expected Type, got Vararg", VarargType)


update_reason!(fails, "both CUDA and Base export \"@atomic\"", AtomicMacro)

update_reason!(fails, "Test.detect_ambiguities", TestAmbiguities)
update_reason!(fails, "intersect_aside", InferenceStackOverFlow);

update_reason!(fails, r"Package (\S.*) not found in current path:", MissingDep);
update_reason!(fails, "register_julia_module at /home/pkgeval/.julia/packages/CxxWrap/OcN1Z/src/CxxWrap.jl:405 [inlined]", CxxWrap);
update_reason!(fails, "C++ exception while wrapping module StdLib: invalid subtyping in definition of StdString with supertype CppBasicString", CxxWrap);



update_reason!(fails, "Assertion `jl_is_concrete_type(jfty)' failed.", JlConcreteType)

update_reason!(fails, "MethodError: no method matching checknonsingular(::Int64, ::Val{true})", CheckNonSingular)
update_reason!(fails, "NMFk ~/.julia/packages/NMFk/ZRNWT/src/NMFkHelpers.jl:454", BadBoy)

update_reason!(fails, "ERROR: LoadError: UndefVarError: OrcJIT not defined", BadBoy)




update_reason!(fails, "MethodError: no method matching Base.TOMLCache()", OldRevise)


nothing

In [35]:
# Packages that have gotten  

issues_opened = [
 "UnsteadyFlowSolvers",  # https://github.com/KiranUofG/UnsteadyFlowSolvers.jl/pull/46
 "Arpack", # https://github.com/JuliaLinearAlgebra/Arpack.jl/issues/132
 "RecursiveFactorization", #https://github.com/YingboMa/RecursiveFactorization.jl/issues/26
 "Phylo", # https://github.com/JuliaLang/julia/issues/41425
 "EmojiSymbols", # https://github.com/wookay/EmojiSymbols.jl/commit/35f97e0614d7d42cd7113263d7c841d5a659e841#commitcomment-52862922
 "SimpleTraits", # https://github.com/mauro3/SimpleTraits.jl/pull/76
 "SortingAlgorithms", # https://github.com/JuliaCollections/SortingAlgorithms.jl/issues/45
 "MemPool", # https://github.com/JuliaData/MemPool.jl/pull/53
 "GenericSchur", # https://github.com/RalphAS/GenericSchur.jl/issues/5
 "ArnoldiMethod", # https://github.com/RalphAS/GenericSchur.jl/issues/5
 "MeasureTheory", # https://github.com/cscherrer/MeasureTheory.jl/pull/112
 "CustomUnitRanges", #https://github.com/JuliaLang/julia/pull/40632
    
 "MatrixFactorizations", # https://github.com/JuliaMatrices/MatrixFactorizations.jl/issues/26
 "Symbolics", # https://github.com/JuliaSymbolics/Symbolics.jl/issues/290
 "GeometricProblems", # https://github.com/JuliaSymbolics/Symbolics.jl/issues/290
 "NMFk", # https://github.com/TensorDecompositions/NMFk.jl/issues/18
 "ExactConversions", # https://github.com/FedericoStra/ExactConversions.jl/issues/2
 "KernelAbstractions",
]

likely_tol = [
    "Caesar",
    "HOODESolver",
    "QPDAS",
    "Bridge",
    "MultivariateStats",
    "NeuralArithmetic",
    "TimeseriesPrediction",
    "RRRMC",
];

fixed = [
    "MemPool",
    "FileTrees",
    "DiffEqJump", # via   RecursiveFactorization fixed 
    "NDTensors", # https://github.com/JuliaLang/julia/pull/41469
    "QXTns",  # https://github.com/JuliaLang/julia/pull/41469
    "DECAES", # https://github.com/jondeuce/DECAES.jl/commit/2a2492aab7a45c18e08a11476a8cbfc8505b70dd
    "StringAnalysis", # https://github.com/zgornel/StringAnalysis.jl/pull/14
]




ignored_packages = [
    # https://github.com/JuliaLang/julia/issues/41446
    "Adapode",
    "AdaptiveDistanceFields",
    "DobotMagician",
    "TensorValues",
    "Ghost",
    "Yota",
    "InteractiveCodeSearch",
    ########
    # Internals
    "Cthulhu",
    "MethodAnalysis",
    "Enzyme",
    ########
    # Deprecated
]

10-element Vector{String}:
 "Adapode"
 "AdaptiveDistanceFields"
 "DobotMagician"
 "TensorValues"
 "Ghost"
 "Yota"
 "InteractiveCodeSearch"
 "Cthulhu"
 "MethodAnalysis"
 "Enzyme"

In [31]:
# Total package failures that we haven't categorized

total_unknown(fails)

161

# Examples

Here are some examples of how one might do queries and categorize errors

In [32]:
# Unknown failures

filter(x -> x.why == Unknown && 
       !(x.name in issues_opened) && 
       !(x.name in likely_tol) && 
       !(x.name in ignored_packages) && 
       !(x.name in fixed), 
    fails)

Unnamed: 0_level_0,julia,name,uuid
Unnamed: 0_level_1,String,String,String
1,"v""1.7.0-beta2-77653d63646""",Faust,"UUID(""b0df9959-3f7b-4e37-9fbf-63cb195e8303"")"
2,"v""1.7.0-beta2-77653d63646""",XLSX,"UUID(""fdbf4ff8-1666-58a4-91e7-1b58723a45e0"")"
3,"v""1.7.0-beta2-77653d63646""",DataAugmentation,"UUID(""88a5189c-e7ff-4f85-ac6b-e6158070f02e"")"
4,"v""1.7.0-beta2-77653d63646""",FlashWeave,"UUID(""2be3f83a-7913-5748-9f20-7d448995b934"")"
5,"v""1.7.0-beta2-77653d63646""",Adversarial,"UUID(""8264478e-c961-11e9-041d-dda11a3b20ed"")"
6,"v""1.7.0-beta2-77653d63646""",YaoTargetQobj,"UUID(""be2ed200-ac97-4d62-828c-ed82e987dbf4"")"
7,"v""1.7.0-beta2-77653d63646""",AddLatest,"UUID(""404e4c7d-d40a-4c57-9c40-90ee175e5491"")"
8,"v""1.7.0-beta2-77653d63646""",DeferredFutures,"UUID(""d2772ebe-fee8-5277-ba6e-705bb5633645"")"
9,"v""1.7.0-beta2-77653d63646""",CoulombIntegrals,"UUID(""42e5845a-f9f1-11e8-0afe-795ea56ed153"")"
10,"v""1.7.0-beta2-77653d63646""",Remark,"UUID(""79b45036-8e38-5d04-8f49-b9fb23ff5a0d"")"


In [33]:
# Categorize

z = []
for i in instances(FailureType)
    n = count(row -> row.why == i, eachrow(fails))
    n > 0 && push!(z, (n, i))
end
sort!(z; rev=true)

22-element Vector{Any}:
 (161, Unknown)
 (57, DataTypeField)
 (43, PrintingChange)
 (20, ApproxError)
 (16, CxxWrap)
 (11, IllegalSparseBuffers)
 (11, BSONDataTypeField)
 (10, BadBoy)
 (9, NewAmbiguity)
 (8, VarargType)
 (8, DocTest)
 (7, NewExceptionType)
 (5, GeneratedASTImpure)
 (5, UnsatReq)
 (4, SyntaxError)
 (4, InferredFailure)
 (3, InferenceStackOverFlow)
 (2, CheckNonSingular)
 (2, BuildError)
 (1, AtomicMacro)
 (1, VisualRegression)
 (1, TestAmbiguities)

In [34]:
# Package names of some particular failure

d = filter(x -> x.why == VarargType, fails).name
#d = filter(!in(issues_opened), d)
#d = filter(!in(likely_tol), d)
#join(d, '\n') |> print

8-element Vector{String}:
 "InteractiveCodeSearch"
 "ExprTools"
 "Adapode"
 "TensorValues"
 "AdaptiveDistanceFields"
 "Yota"
 "DobotMagician"
 "Ghost"