src/utils.jl

"helper function to calculate a run-length encoding of a tuple type"
Base.@pure function runlength(::Type{T}) where {T <: Tuple}
    rle = Tuple{Type, Int}[]
    fieldcount(T) == 0 && return rle
    curT = fieldtype(T, 1)
    prevT = curT
    len = 1
    for i = 2:fieldcount(T)
        @inbounds curT = fieldtype(T, i)
        if curT === prevT
            len += 1
        else
            push!(rle, (prevT, len))
            prevT = curT
            len = 1
        end
    end
    push!(rle, (curT, len))
    return rle
end

"""
    Tables.eachcolumn(f, sch::Tables.Schema{names, types}, x::Union{Tables.AbstractRow, Tables.AbstractColumns})
    Tables.eachcolumn(f, sch::Tables.Schema{names, nothing}, x::Union{Tables.AbstractRow, Tables.AbstractColumns})

Takes a function `f`, table schema `sch`, `x`, which is an object that satisfies the `AbstractRow` or `AbstractColumns` interfaces;
it generates calls to get the value for each column (`Tables.getcolumn(x, nm)`) and then calls `f(val, index, name)`, where `f` is the
user-provided function, `val` is the column value (`AbstractRow`) or entire column (`AbstractColumns`), `index` is the column index as an `Int`,
and `name` is the column name as a `Symbol`.

An example using `Tables.eachcolumn` is:
```julia
rows = Tables.rows(tbl)
sch = Tables.schema(rows)
if sch === nothing
    state = iterate(rows)
    state === nothing && return
    row, st = state
    sch = Tables.schema(Tables.columnnames(row), nothing)
    while state !== nothing
        Tables.eachcolumn(sch, row) do val, i, nm
            bind!(stmt, i, val)
        end
        state = iterate(rows, st)
        state === nothing && return
        row, st = state
    end
else
    for row in rows
        Tables.eachcolumn(sch, row) do val, i, nm
            bind!(stmt, i, val)
        end
    end
end
```
Note in this example we account for the input table potentially returning `nothing` from `Tables.schema(rows)`; in that case, we start
iterating the rows, and build a partial schema using the column names from the first row `sch = Tables.schema(Tables.columnnames(row), nothing)`,
which is valid to pass to `Tables.eachcolumn`.
"""
function eachcolumn end

quot(s::Symbol) = Meta.QuoteNode(s)
quot(x::Int) = x

@inline function eachcolumn(f::F, sch::Schema{names, types}, row::T) where {F, names, types, T}
    N = fieldcount(types)
    if N <= SPECIALIZATION_THRESHOLD
        Base.@nexprs 100 i -> begin
            if i <= N
                f(getcolumn(row, fieldtype(types, i), i, names[i]), i, names[i])
            end
        end
    else
        for (i, nm) in enumerate(names)
            f(getcolumn(row, fieldtype(types, i), i, nm), i, nm)
        end
    end
    return
end

@inline function eachcolumn(f::F, sch::Schema{names, nothing}, row::T) where {F, names, T}
    N = length(names)
    if N <= SPECIALIZATION_THRESHOLD
        Base.@nexprs 100 i -> begin
            if i <= N
                f(getcolumn(row, names[i]), i, names[i])
            end
        end
    else
        for (i, nm) in enumerate(names)
            f(getcolumn(row, nm), i, nm)
        end
    end
    return
end

@inline function eachcolumn(f::F, sch::Schema{nothing, nothing}, row::T) where {F, T}
    for (i, nm) in enumerate(sch.names)
        f(getcolumn(row, nm), i, nm)
    end
    return
end

# these are specialized `eachcolumn`s where we also want
# the indexing of `columns` to be constant propagated, so it needs to be returned from the generated function
@inline function eachcolumns(f::F, sch::Schema{names, types}, row::T, columns::S, args...) where {F, names, types, T, S}
    N = fieldcount(types)
    if N <= SPECIALIZATION_THRESHOLD
        Base.@nexprs 100 i -> begin
            if i <= N
                f(getcolumn(row, fieldtype(types, i), i, names[i]), i, names[i], columns[i], args...)
            end
        end
    else
        for (i, nm) in enumerate(names)
            f(getcolumn(row, fieldtype(types, i), i, nm), i, nm, columns[i], args...)
        end
    end
    return
end

@inline function eachcolumns(f::F, sch::Schema{names, nothing}, row::T, columns::S, args...) where {F, names, T, S}
    N = length(names)
    if N <= SPECIALIZATION_THRESHOLD
        Base.@nexprs 100 i -> begin
            if i <= N
                f(getcolumn(row, names[i]), i, names[i], columns[i], args...)
            end
        end
    else
        for (i, nm) in enumerate(names)
            f(getcolumn(row, nm), i, nm, columns[i], args...)
        end
    end
    return
end

@inline function eachcolumns(f::F, sch::Schema{nothing, nothing}, row::T, columns::S, args...) where {F, T, S}
    for (i, nm) in enumerate(sch.names)
        f(getcolumn(row, nm), i, nm, columns[i], args...)
    end
    return
end

"""
    rowmerge(row, other_rows...)
    rowmerge(row; fields_to_merge...)

Return a `NamedTuple` by merging `row` (an `AbstractRow`-compliant value) with `other_rows`
(one or more `AbstractRow`-compliant values) via `Base.merge`. This function is similar to
`Base.merge(::NamedTuple, ::NamedTuple...)`, but accepts `AbstractRow`-compliant values
instead of `NamedTuple`s.

A convenience method `rowmerge(row; fields_to_merge...) = rowmerge(row, fields_to_merge)`
is defined that enables the `fields_to_merge` to be specified as keyword arguments.
"""
rowmerge(row, other) = merge(_row_to_named_tuple(row), _row_to_named_tuple(other))
rowmerge(row, other, more...) = merge(_row_to_named_tuple(row), rowmerge(other, more...))
rowmerge(row; fields_to_merge...) = rowmerge(row, values(fields_to_merge))

_row_to_named_tuple(row::NamedTuple) = row
_row_to_named_tuple(row) = NamedTuple(Row(row))

"""
    ByRow <: Function

`ByRow(f)` returns a function which applies function `f` to each element in a vector.

`ByRow(f)` can be passed two types of arguments:
- One or more 1-based `AbstractVector`s of equal length: In this case the returned value
is a vector resulting from applying `f` to elements of passed vectors element-wise.
Function `f` is called exactly once for each element of passed vectors (as opposed to `map`
which assumes for some types of source vectors (e.g. `SparseVector`) that the
wrapped function is pure, and may call the function `f` only once for multiple
equal values.
- A `Tables.ColumnTable` holding 1-based columns of equal length: In this case the function
`f` is passed a `NamedTuple` created for each row of passed table.

The return value of `ByRow(f)` is always a vector.

`ByRow` expects that at least one argument is passed to it and in the case of
`Tables.ColumnTable` passed that the table has at least one column. In some
contexts of operations on tables (for example `DataFrame`) the user might want
to pass no arguments (or an empty `Tables.ColumnTable`) to `ByRow`. This case
must be separately handled by the code implementing the logic of processing the
`ByRow` operation on this specific parent table (the reason is that passing such
arguments to `ByRow` does not allow it to determine the number of rows of the
source table).

# Examples
```
julia> Tables.ByRow(x -> x^2)(1:3)
3-element Vector{Int64}:
 1
 4
 9

julia> Tables.ByRow((x, y) -> x*y)(1:3, 2:4)
3-element Vector{Int64}:
  2
  6
 12

julia> Tables.ByRow(x -> x.a)((a=1:2, b=3:4))
2-element Vector{Int64}:
 1
 2

 julia> Tables.ByRow(x -> (a=x.a*2, b=sin(x.b), c=x.c))((a=[1, 2, 3],
                                                         b=[1.2, 3.4, 5.6],
                                                         c=["a", "b", "c"]))
3-element Vector{NamedTuple{(:a, :b, :c), Tuple{Int64, Float64, String}}}:
 (a = 2, b = 0.9320390859672263, c = "a")
 (a = 4, b = -0.2555411020268312, c = "b")
 (a = 6, b = -0.6312666378723216, c = "c")

```
"""
struct ByRow{T} <: Function
    fun::T
end

# invoke the generic AbstractVector function to ensure function is called
# exactly once for each element
function (f::ByRow)(cols::AbstractVector...)
    if !(all(col -> ==(length(first(cols)))(length(col)) && firstindex(col) == 1, cols))
        throw(ArgumentError("All passed vectors must have the same length and use 1-based indexing"))
    end
    return invoke(map,
                  Tuple{typeof(f.fun), ntuple(i -> AbstractVector, length(cols))...},
                  f.fun, cols...)
end

function (f::ByRow)(table::ColumnTable)
    if !(all(col -> ==(length(first(table)))(length(col)) && firstindex(col) == 1, table))
        throw(ArgumentError("All passed vectors must have the same length and use 1-based indexing"))
    end
    return [f.fun(nt) for nt in Tables.namedtupleiterator(table)]
end

(f::ByRow)() = throw(ArgumentError("no arguments passed"))
(f::ByRow)(::NamedTuple{(), Tuple{}}) =
    throw(ArgumentError("no columns passed in Tables.ColumnTable"))