### Importing Modules and Model File

In [94]:
begin
    using Pkg
    Pkg.activate(".")
    Pkg.resolve()
    Pkg.add("Flux")
    Pkg.instantiate()

end

include("modules/DataIO.jl")
include("modules/Classification.jl")
include("modules/Model.jl")

using FLoops,
    FASTX,
    LinearAlgebra,
    Normalization,
    Statistics,
    Plots,Flux

using
    .DataIO,
    .Model,
    .Classification

In [None]:
fasta::String = "/home/salipe/Desktop/datasets/test_voc/test/Alpha.fasta"
modelCachedFile::String = "$(pwd())/.project_cache/trained_model.dat" 

sequences = Vector{Tuple{String,Base.CodeUnits}}()
for record in open(FASTAReader, fasta)
    seq::String = sequence(String, record)
    id::String = identifier(record)
    push!(sequences, (replace(id, r"\/|\|" => "_"), codeunits(seq)))
    break
end

model::Union{Nothing,Dict{String,Tuple{BitArray,Vector{Vector{Float64}},Vector{String}}}} = DataIO.load_cache(modelCachedFile)
@show model

### Procesing model data

In [None]:
finalHist::Union{Nothing,Vector{UInt16}} = nothing
finalMask::BitArray = trues(maximum(x -> length(x[2][1]), model))

plt = plot(title="Points")

for (key, (marked, _, kmers)) in model

    cache_path = "$(pwd())/.project_cache/$(key)_outmask.dat"
    cache::Union{Nothing,Tuple{String,Tuple{Vector{UInt16},BitArray},Vector{String}}} = DataIO.load_cache(cache_path)
    # plot!(marked)

    finalMask[1:length(marked)] = finalMask[1:length(marked)] .* marked
    if isnothing(finalHist)
        finalHist = cache[2][1]
    else
        current_hist = cache[2][1]
        max_len = max(length(finalHist), length(current_hist))

        # Create padded versions
        padded_final = [finalHist; ones(UInt16, max_len - length(finalHist))]
        padded_current = [current_hist; ones(UInt16, max_len - length(current_hist))]

        # Element-wise multiplication
        finalHist = padded_final .* padded_current
    end

end

start = 0
current = false


# plot!(finalMask)
# plot(plt)

In [None]:
teste = Vector{UnitRange{Int64}}()
for (i, bit) in enumerate(finalMask)
    if bit && !current
        start = i
        current = true
    elseif !bit && current
        current = false
        push!(teste,(start:i-1))
    end
end
if current
    push!(teste,(start:length(finalMask)))
end

@show teste

In [None]:
chart = Vector{Tuple{String,Vector{Int16}}}()
(id, inputSequence) = sequences[1]

for (key, (marked, _, kmers)) in model
    probs = Vector{Integer}()
    inputlen = minimum(length, [inputSequence, marked])
    limitedMark::BitArray = marked[1:inputlen]
    start = 0
    current = false

    for (i, bit) in enumerate(limitedMark)
        if bit && !current
            start = i
            current = true
        elseif !bit && current
            current = false
            count = @views Classification.countPatterns(inputSequence[start:i-1], kmers)

            push!(probs, convert(Int16, count))
        end
    end
    if current
        if (length([start:inputlen]) < length(kmers[1]))
            start = start - length(kmers[1])
        end

        count = @views Classification.countPatterns(inputSequence[start:inputlen], kmers)
        push!(probs, convert(Int16, count))


    end

    push!(chart, (key, probs))
end


@show chart


In [None]:

max_len = maximum(x->length(x[2]), chart)
x = [reshape([var[2];zeros(Int16, max_len - length(var[2]))],  (1,max_len)) for var in chart]

N, L, C = 1, max_len, 5  # Batch size 1, sequence length 35, 5 classes
logits = Array{Int16}(undef,1,max_len, 5)

for i in 1:length(x)
    logits[:,:,i] = x[i]
end


y_indices = rand(1:C, N, L)  # Class indices for each position

# Convert to one-hot (optional)
y_true = Flux.onehotbatch(vec(y_indices), 1:C)  # Shape (C, N*L)
y_true = reshape(y_true, C, N, L)  # Reshape to (N, L, C)
y_true = permutedims(y_true, (2, 3, 1))  # Final shape (N, L, C)

# Compute loss
log_probs = Flux.logsoftmax(logits; dims=3)
loss = -mean(y_true .* log_probs)




2.081078681494729

In [116]:
@show y_true

1×35×5 Array{Bool, 3}:
[:, :, 1] =
 1  0  0  1  0  1  0  0  0  0  0  0  1  …  0  1  0  0  1  0  0  0  0  0  0  0

[:, :, 2] =
 0  0  0  0  1  0  0  0  0  0  0  0  0  …  1  0  1  1  0  1  0  0  0  0  0  0

[:, :, 3] =
 0  1  0  0  0  0  1  0  0  0  0  0  0  …  0  0  0  0  0  0  0  0  0  1  0  0

[:, :, 4] =
 0  0  1  0  0  0  0  0  1  1  1  0  0  …  0  0  0  0  0  0  0  0  1  0  1  1

[:, :, 5] =
 0  0  0  0  0  0  0  1  0  0  0  1  0  …  0  0  0  0  0  0  1  1  0  0  0  0