# PkgEvalAnalysis

Latest pkgeval: https://s3.amazonaws.com/julialang-reports/nanosoldier/pkgeval/by_hash/fb69baf_vs_40279f9/report.html

Looking at RangeHelpers.

In [1]:
using DataFrames, Feather

┌ Info: Precompiling DataFrames [a93c6f00-e57d-5684-b7b6-d8193f3e46c0]
└ @ Base loading.jl:1664
┌ Info: Precompiling Feather [becb17da-46f6-5d3c-ad1b-1c5fe96bc73c]
└ @ Base loading.jl:1664


In [60]:
@enum FailureType begin
    # General ones
    Unknown
    InferredFailure
    PrintingChange
    TestAmbiguities
    NewAmbiguity
    ApproxError
    DocTest
    NewExceptionType
    DownloadError
    UnexpectedPass
    BadBoy
    SyntaxError
    VisualRegression
    MissingDep
    BuildError
    UnsatReq
    BSONDataTypeField
    Belapsed
    StderrCheck
    
    # From PkgEval
    MissingDependency
    Inactivity
    MissingBinary
    Untestable
    LogLimit
    TimeLimit
    Syntax
    
    # Special
    AssertionObvious
    StackExport
    BLASVectorLike
    SortAmbig
    Uncolon
    JlTypeInf
    QuickSortAlg
end


In [8]:
# Unpack the data unless it has already been unpacked

if !isdir("data")
    run(`tar -xvf data.tar.xz`)
end
primary  = Feather.read("data/primary.feather");
against = Feather.read("data/against.feather");

In [10]:
# Join the primary and against data

package_results = leftjoin(primary, against,
     on=:package, makeunique=true, indicator=:source);

In [11]:
# Filter out packages that started failing but didn't start failing when the RNG stream changed

fails = filter(test->test.source == "both" &&
                 test.status != test.status_1 &&
                 test.status in (":fail", ":kill"), package_results)

# Initially we do not know why a package failed
fails.why = fill(Unknown, size(fails, 1));

missing_binary_idx     = fails.reason .== ":binary_dependency"
inactivity_idx         = fails.reason .== ":inactivity"
missing_dependency_idx = fails.reason .== ":missing_dependency"
untestable_idx         = fails.reason .== ":untestable"
log_limit_idx          = fails.reason .== ":log_limit"
time_limit_idx         = fails.reason .== ":time_limit"

# Initially we do not know why a package failed
fails.why = fill(Unknown, size(fails, 1));

fails.why[missing_binary_idx]     .= MissingBinary
fails.why[inactivity_idx]         .= Inactivity
fails.why[missing_dependency_idx] .= MissingDependency
fails.why[untestable_idx]         .= Untestable
fails.why[log_limit_idx]          .= LogLimit
fails.why[time_limit_idx]         .= TimeLimit

nothing

In [12]:
# Some utility functions

query(fails, s) = filter(row -> occursin(s, row[:log]), fails)    

function update_reason!(fails, needle, why)
    idxs = findall(row -> occursin(needle, row), fails.log)
    fails.why[idxs] .= why
    return fails
end

total_unknown(fails) = count(x -> x.why == Unknown, eachrow(fails))

total_unknown (generic function with 1 method)

In [68]:
query(fails, "`QuickSortAlg` not defined").package

6-element Vector{String}:
 "CartesianJoin"
 "ExtendableSparse"
 "DLMReader"
 "StatisticalGraphics"
 "IncompleteLU"
 "InMemoryDatasets"

In [65]:
# Here we pattern match certain test errors and categorize them based on that.

# Generic ones
update_reason!(fails, "Test.detect_ambiguities", TestAmbiguities)
update_reason!(fails, "detect_ambiguities(", TestAmbiguities)

update_reason!(fails, "Expression: all_doctests()", DocTest)
update_reason!(fails, "Error: doctest failure in ", DocTest)

update_reason!(fails, "does not match inferred return type", InferredFailure);
update_reason!(fails, "Expression: isapprox", ApproxError)
update_reason!(fails, r"Expression: (\S*) ≈ (\S*)", ApproxError) 
update_reason!(fails, "Expression: ≈(", ApproxError)
    
update_reason!(fails, "is ambiguous. Candidates:", NewAmbiguity)
update_reason!(fails, " ambiguities found", NewAmbiguity)

update_reason!(fails, "Unsatisfiable requirements detected for package", UnsatReq)



update_reason!(fails, "Log Test Failed at", PrintingChange)
update_reason!(fails, "Expression: occursin(r\"", PrintingChange)
update_reason!(fails, "Expression: startswith(", PrintingChange)
update_reason!(fails, r"Expression: (\N.*?) == ", PrintingChange)
update_reason!(fails, "Evaluated: occursin(", PrintingChange)
update_reason!(fails, "Evaluated: endswith(", PrintingChange)
update_reason!(fails, "- DIFF ------------------------", PrintingChange)
update_reason!(fails, "LoadError: syntax", SyntaxError)

update_reason!(fails, "Image did not match reference image", VisualRegression)

update_reason!(fails, "      Thrown: ", NewExceptionType)
update_reason!(fails, "The requested URL returned error", DownloadError)
update_reason!(fails, "gzip: stdin: not in gzip format", DownloadError)
update_reason!(fails, "Unexpected Pass", UnexpectedPass)

update_reason!(fails, "Error building ", BuildError)
update_reason!(fails, "isempty(stderr_content)", StderrCheck)


# Specific ones for this release
update_reason!(fails, "Assertion `obvious_subtype == 3", AssertionObvious)
update_reason!(fails, "Base export \"stack\"; uses of it in module", StackExport)
update_reason!(fails, "ArgumentError: only support vector like inputs", BLASVectorLike)
update_reason!(fails, "MethodError: sort!(::", SortAmbig)
update_reason!(fails, "no method matching uncolon", Uncolon)
update_reason!(fails, "undefined symbol: jl_typeinf_begin", JlTypeInf)
update_reason!(fails, "`QuickSortAlg` not defined", QuickSortAlg)



nothing

In [66]:
# Packages that have gotten  

issues_opened = [
    "Checkpointing" # https://github.com/JuliaGPU/GPUCompiler.jl/issues/361
    "CodeInfoTools.jl" # https://github.com/JuliaCompilerPlugins/CodeInfoTools.jl
    "CompatHelperLocal" # https://gitlab.com/aplavin/compathelperlocal.jl/-/issues/1
    "DataStructures "# https://github.com/JuliaCollections/DataStructures.jl/pull/832
]

likely_tol = [
];

fixed = [
    "DocStringExtensions" # https://github.com/JuliaDocs/DocStringExtensions.jl/pull/137
    "EndpointRanges.jl" # 
]


ignored_packages = [
]

Any[]

In [67]:
# Total package failures that we haven't categorized

total_unknown(fails)

188

# Examples

Here are some examples of how one might do queries and categorize errors

In [70]:
# Unknown failures

filter(x -> x.why == Unknown && 
       !(x.package in issues_opened) && 
       !(x.package in likely_tol) && 
       !(x.package in ignored_packages) && 
       !(x.package in fixed), 
    fails)

Unnamed: 0_level_0,configuration,package,version,status,reason,duration
Unnamed: 0_level_1,String,String,String,String,String,Float64
1,primary,UsingMerge,"v""0.0.5""",:fail,:unknown,22.058
2,primary,DocumentFunction,"v""1.1.0""",:fail,:unknown,20.154
3,primary,PrettyPrinting,"v""0.4.0""",:fail,:unknown,40.305
4,primary,GeneralizedSylvesterSolver,"v""0.1.2""",:fail,:unknown,48.291
5,primary,StructTypes,"v""1.10.0""",:fail,:abort,23.59
6,primary,DiscreteMarkovChains,"v""0.2.1""",:fail,:test_failures,142.094
7,primary,SyncBarriers,"v""0.1.1""",:fail,:unknown,22.571
8,primary,AeroMDAO,"v""0.3.10""",:fail,:unknown,48.501
9,primary,GeoDataFrames,"v""0.3.0""",:fail,:test_failures,57.987
10,primary,GraphMatFun,"v""0.3.1""",:fail,:test_failures,327.333


In [26]:
# Categorize

z = []
for i in instances(FailureType)
    n = count(row -> row.why == i, eachrow(fails))
    n > 0 && push!(z, (n, i))
end
sort!(z; rev=true)

12-element Vector{Any}:
 (215, Unknown)
 (48, PrintingChange)
 (10, AssertionObvious)
 (9, UnsatReq)
 (6, TimeLimit)
 (6, NewExceptionType)
 (5, ApproxError)
 (3, DocTest)
 (2, NewAmbiguity)
 (1, LogLimit)
 (1, MissingBinary)
 (1, Inactivity)

In [29]:
filter(x -> x.why == LogLimit,  fails).package

1-element Vector{String}:
 "ApproxFunFourier"