diff --git a/Project.toml b/Project.toml index fba705b..a500550 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "MAT" uuid = "23992714-dd62-5051-b70f-ba57cb901cac" -version = "0.10.7" +version = "0.11.0" [deps] BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" diff --git a/docs/Project.toml b/docs/Project.toml index 644fb3d..d03159b 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,8 +1,7 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +MAT = "23992714-dd62-5051-b70f-ba57cb901cac" [compat] Documenter = "1" -Literate = "2" diff --git a/docs/make.jl b/docs/make.jl index baf27b0..e48e71c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,47 +1,13 @@ execute = isempty(ARGS) || ARGS[1] == "run" -org, reps = :JuliaIO, :MAT -eval(:(using $reps)) +org, repo = :JuliaIO, :MAT +eval(:(using $repo)) using Documenter -using Literate # https://juliadocs.github.io/Documenter.jl/stable/man/syntax/#@example-block ENV["GKSwstype"] = "100" ENV["GKS_ENCODING"] = "utf-8" -# generate examples using Literate -lit = joinpath(@__DIR__, "lit") -src = joinpath(@__DIR__, "src") -gen = joinpath(@__DIR__, "src/generated") - -base = "$org/$reps.jl" -repo_root_url = - "https://github.com/$base/blob/main/docs/lit/examples" -nbviewer_root_url = - "https://nbviewer.org/github/$base/tree/gh-pages/dev/generated/examples" -binder_root_url = - "https://mybinder.org/v2/gh/$base/gh-pages?filepath=dev/generated/examples" - - -repo = eval(:($reps)) -DocMeta.setdocmeta!(repo, :DocTestSetup, :(using $reps); recursive=true) - -for (root, _, files) in walkdir(lit), file in files - splitext(file)[2] == ".jl" || continue # process .jl files only - ipath = joinpath(root, file) - opath = splitdir(replace(ipath, lit => gen))[1] - Literate.markdown(ipath, opath; documenter = execute, # run examples - repo_root_url, nbviewer_root_url, binder_root_url) - Literate.notebook(ipath, opath; execute = false, # no-run notebooks - repo_root_url, nbviewer_root_url, binder_root_url) -end - - -# Documentation structure -ismd(f) = splitext(f)[2] == ".md" -pages(folder) = - [joinpath("generated/", folder, f) for f in readdir(joinpath(gen, folder)) if ismd(f)] - isci = get(ENV, "CI", nothing) == "true" format = Documenter.HTML(; @@ -52,14 +18,14 @@ format = Documenter.HTML(; ) makedocs(; - modules = [repo], + modules = [MAT], authors = "Contributors", sitename = "$repo.jl", format, pages = [ "Home" => "index.md", + "Object Arrays" => "object_arrays.md", "Methods" => "methods.md", -# "Examples" => pages("examples") ], warnonly = [:missing_docs,], ) diff --git a/docs/src/index.md b/docs/src/index.md index 57355d7..d3169ca 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -10,3 +10,65 @@ This Julia package [(MAT.jl)](https://github.com/JuliaIO/MAT.jl) provides tools for reading and writing MATLAB format data files in Julia. + +## Basic Usage + +To read a single variable from a MAT file (compressed files are detected and handled automatically): + +```julia +using MAT +file = matopen("matfile.mat") +read(file, "varname") # note that this does NOT introduce a variable ``varname`` into scope +close(file) +``` + +To write a variable to a MAT file: + +```julia +file = matopen("matfile.mat", "w") +write(file, "varname", variable) +close(file) +``` + +To read all variables from a MAT file as a Dict: + +```julia +vars = matread("matfile.mat") +``` + +To write a Dict to a MAT file, using its keys as variable names. +The `compress` argument is optional, and compression is off by default: + +```julia +matwrite("matfile.mat", Dict( + "myvar1" => 0, + "myvar2" => 1 +); compress = true) +``` + +To write in MATLAB v4 format: + +```julia +matwrite("matfile.mat", Dict( + "myvar1" => 0, + "myvar2" => 1 +);version="v4") +``` + +To get a list of variable names in a MAT file: + +```julia +file = matopen("matfile.mat") +varnames = keys(file) +close(file) +``` + +To check for the presence of a variable name in a MAT file: + +```julia +file = matopen("matfile.mat") +if haskey(file, "variable") + # something +end +close(file) +``` diff --git a/docs/src/methods.md b/docs/src/methods.md index 0f6c160..078b32d 100644 --- a/docs/src/methods.md +++ b/docs/src/methods.md @@ -6,5 +6,5 @@ ## Methods usage ```@autodocs -Modules = [MAT] +Modules = [MAT, MAT.MAT_types] ``` diff --git a/docs/src/object_arrays.md b/docs/src/object_arrays.md new file mode 100644 index 0000000..df70188 --- /dev/null +++ b/docs/src/object_arrays.md @@ -0,0 +1,117 @@ +# Objects and struct arrays + +To better handle special cases we have these types since MAT 0.11: +* [`MatlabStructArray`](@ref MatlabStructArray) +* [`MatlabClassObject`](@ref MatlabClassObject) + +## Struct arrays vs Cell arrays + +Cell arrays are written for `Array{Any}` or any other unsupported element type: + +```julia +sarr = Any[ + Dict("x"=>1.0, "y"=>2.0), + Dict("x"=>3.0, "y"=>4.0) +] +matwrite("matfile.mat", Dict("cell" => sarr)) + +``` + +Inside MATLAB you will find: + +```matlab +>> load('matfile.mat') +>> cell + +cell = + + 2×1 cell array + + {1×1 struct} + {1×1 struct} +``` + +Read and write behavior for struct arrays is different. For struct arrays we use the `MatlabStructArray` type. You can also write with MAT.jl using Dict arrays `AbstractArray{<:AbstractDict}` if all the Dicts have equal keys, which will automatically convert internally to `MatlabStructArray`. + +```julia +sarr = Dict{String, Any}[ + Dict("x"=>1.0, "y"=>2.0), + Dict("x"=>3.0, "y"=>4.0) +] +matwrite("matfile.mat", Dict("s" => sarr)) +# which is the same as: +matwrite("matfile.mat", Dict("s" => MatlabStructArray(sarr))) +# which is the same as: +matwrite("matfile.mat", Dict("s" => MatlabStructArray(["x", "y"], [[1.0, 3.0], [2.0, 4.0]]))) +``` + +Now you'll find the following inside MATLAB: + +```matlab +>> load('matfile.mat') +>> s + +s = + +[2x1 struct, 576 bytes] +x: 1 +y: 2 +``` + +Note that when you read the file again, you'll find the `MatlabStructArray`, which you can convert back to the Dict array with `Array`: + +```julia +julia> sarr = matread("matfile.mat")["struct_array"] +MatlabStructArray{1} with 2 columns: + "x": Any[1.0, 3.0] + "y": Any[2.0, 4.0] + +julia> sarr["x"] +2-element Vector{Any}: + 1.0 + 3.0 + +julia> Array(sarr) +2-element Vector{Dict{String, Any}}: + Dict("x" => 1.0, "y" => 2.0) + Dict("x" => 3.0, "y" => 4.0) + +``` + +Note that before v0.11 MAT.jl will read struct arrays as a Dict with concatenated arrays in the fields/keys, which is equal to `Dict(sarr)`. + +## Object Arrays + +You can write an old class object with the `MatlabClassObject` and arrays of objects with `MatlabStructArray` by providing the class name. These are also the types you obtain when you read files. + +Write a single class object: +```julia +d = Dict("foo" => 5.0) +obj = MatlabClassObject(d, "TestClassOld") +matwrite("matfile.mat", Dict("tc_old" => obj)) +``` + +A class object array +```julia +class_array = MatlabStructArray(["foo"], [[5.0, "bar"]], "TestClassOld") +matwrite("matfile.mat", Dict("class_array" => class_array)) +``` + +Also a class object array, but will be converted to `MatlabStructArray` internally: +```julia +class_array = MatlabClassObject[ + MatlabClassObject(Dict("foo" => 5.0), "TestClassOld"), + MatlabClassObject(Dict("foo" => "bar"), "TestClassOld") +] +matwrite("matfile.mat", Dict("class_array" => class_array)) +``` + +A cell array: +```julia +cell_array = Any[ + MatlabClassObject(Dict("foo" => 5.0), "TestClassOld"), + MatlabClassObject(Dict("a" => "bar"), "AnotherClass") +] +matwrite("matfile.mat", Dict("cell_array" => cell_array)) +``` + diff --git a/src/MAT.jl b/src/MAT.jl index a11efee..b38126c 100644 --- a/src/MAT.jl +++ b/src/MAT.jl @@ -26,6 +26,9 @@ module MAT using HDF5, SparseArrays +include("MAT_types.jl") +using .MAT_types + include("MAT_HDF5.jl") include("MAT_v5.jl") include("MAT_v4.jl") @@ -33,6 +36,7 @@ include("MAT_v4.jl") using .MAT_HDF5, .MAT_v5, .MAT_v4 export matopen, matread, matwrite, @read, @write +export MatlabStructArray, MatlabClassObject # Open a MATLAB file const HDF5_HEADER = UInt8[0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a] @@ -140,47 +144,37 @@ end # Write a dict to a MATLAB file """ - matwrite(filename, d::Dict; compress::Bool = false, version::String) + matwrite(filename, d::Dict; compress::Bool = false, version::String = "v7.3") Write a dictionary containing variable names as keys and values as values to a Matlab file, opening and closing it automatically. """ -function matwrite(filename::AbstractString, dict::AbstractDict{S, T}; compress::Bool = false, version::String ="") where {S, T} - +function matwrite(filename::AbstractString, dict::AbstractDict{S, T}; compress::Bool = false, version::String ="v7.3") where {S, T} if version == "v4" file = open(filename, "w") m = MAT_v4.Matlabv4File(file, false) - try - for (k, v) in dict - local kstring - try - kstring = ascii(convert(String, k)) - catch x - error("matwrite requires a Dict with ASCII keys") - end - write(m, kstring, v) - end - finally - close(file) - end - - else - + _write_dict(m, dict) + elseif version == "v7.3" file = matopen(filename, "w"; compress = compress) - try - for (k, v) in dict - local kstring - try - kstring = ascii(convert(String, k)) - catch x - error("matwrite requires a Dict with ASCII keys") - end - write(file, kstring, v) + _write_dict(file, dict) + else + error("writing for \"$(version)\" is not supported") + end +end + +function _write_dict(fileio, dict::AbstractDict) + try + for (k, v) in dict + local kstring + try + kstring = ascii(convert(String, k)) + catch x + error("matwrite requires a Dict with ASCII keys") end - finally - close(file) + write(fileio, kstring, v) end - + finally + close(fileio) end end diff --git a/src/MAT_HDF5.jl b/src/MAT_HDF5.jl index b0d36d6..023b7fe 100644 --- a/src/MAT_HDF5.jl +++ b/src/MAT_HDF5.jl @@ -32,6 +32,7 @@ using HDF5, SparseArrays import Base: names, read, write, close import HDF5: Reference +import ..MAT_types: MatlabStructArray, StructArrayField, convert_struct_array, MatlabClassObject const HDF5Parent = Union{HDF5.File, HDF5.Group} const HDF5BitsOrBool = Union{HDF5.BitsType,Bool} @@ -118,6 +119,7 @@ const name_type_attr_matlab = "MATLAB_class" const empty_attr_matlab = "MATLAB_empty" const sparse_attr_matlab = "MATLAB_sparse" const int_decode_attr_matlab = "MATLAB_int_decode" +const object_decode_attr_matlab = "MATLAB_object_decode" ### Reading function read_complex(dtype::HDF5.Datatype, dset::HDF5.Dataset, ::Type{T}) where T @@ -128,6 +130,21 @@ function read_complex(dtype::HDF5.Datatype, dset::HDF5.Dataset, ::Type{T}) where return read(dset, Complex{T}) end +function read_references(dset::HDF5.Dataset) + refs = read(dset, Reference) + out = Array{Any}(undef, size(refs)) + f = HDF5.file(dset) + for i = 1:length(refs) + dset = f[refs[i]] + try + out[i] = m_read(dset) + finally + close(dset) + end + end + return out +end + function m_read(dset::HDF5.Dataset) if haskey(dset, empty_attr_matlab) # Empty arrays encode the dimensions as the dataset @@ -139,7 +156,8 @@ function m_read(dset::HDF5.Dataset) # Not sure if this check is necessary but it is checked in # `m_read(g::HDF5.Group)` if haskey(dset, "MATLAB_fields") - return Dict{String,Any}(join(n)=>[] for n in read_attribute(dset, "MATLAB_fields")) + field_names = [join(n) for n in read_attribute(dset, "MATLAB_fields")] + return MatlabStructArray(field_names, tuple(dims...)) else return Dict{String,Any}() end @@ -149,22 +167,14 @@ function m_read(dset::HDF5.Dataset) end end - mattype = haskey(dset, name_type_attr_matlab) ? read_attribute(dset, name_type_attr_matlab) : "cell" + mattype = haskey(dset, name_type_attr_matlab) ? read_attribute(dset, name_type_attr_matlab) : "struct_array_field" if mattype == "cell" # Cell arrays, represented as an array of refs - refs = read(dset, Reference) - out = Array{Any}(undef, size(refs)) - f = HDF5.file(dset) - for i = 1:length(refs) - dset = f[refs[i]] - try - out[i] = m_read(dset) - finally - close(dset) - end - end - return out + return read_references(dset) + elseif mattype == "struct_array_field" + # This will be converted into MatlabStructArray in `m_read(g::HDF5.Group)` + return StructArrayField(read_references(dset)) elseif !haskey(str2type_matlab,mattype) @warn "MATLAB $mattype values are currently not supported" return missing @@ -192,46 +202,37 @@ function add!(A, x) A end -# reading a struct, struct array, or sparse matrix -function m_read(g::HDF5.Group) - mattype = read_attribute(g, name_type_attr_matlab) - if mattype != "struct" - # Check if this is a sparse matrix. - fn = keys(g) - if haskey(attributes(g), sparse_attr_matlab) - # This is a sparse matrix. - # ir is the row indices, jc is the column boundaries. - # We add one to account for the zero-based (MATLAB) to one-based (Julia) transition - jc = add!(convert(Vector{Int}, read(g, "jc")), 1) - if "data" in fn && "ir" in fn && "jc" in fn - # This matrix is not empty. - ir = add!(convert(Vector{Int}, read(g, "ir")), 1) - dset = g["data"] - T = str2type_matlab[mattype] - try - dtype = datatype(dset) - class_id = HDF5.API.h5t_get_class(dtype.id) - try - data = class_id == HDF5.API.H5T_COMPOUND ? read_complex(dtype, dset, T) : read(dset, T) - finally - close(dtype) - end - finally - close(dset) - end - else - # This matrix is empty. - ir = Int[] - data = str2type_matlab[mattype][] +function read_sparse_matrix(g::HDF5.Group, mattype::String) + local data + fn = keys(g) + # ir is the row indices, jc is the column boundaries. + # We add one to account for the zero-based (MATLAB) to one-based (Julia) transition + jc = add!(convert(Vector{Int}, read(g, "jc")), 1) + if "data" in fn && "ir" in fn && "jc" in fn + # This matrix is not empty. + ir = add!(convert(Vector{Int}, read(g, "ir")), 1) + dset = g["data"] + T = str2type_matlab[mattype] + try + dtype = datatype(dset) + class_id = HDF5.API.h5t_get_class(dtype.id) + try + data = class_id == HDF5.API.H5T_COMPOUND ? read_complex(dtype, dset, T) : read(dset, T) + finally + close(dtype) end - return SparseMatrixCSC(convert(Int, read_attribute(g, sparse_attr_matlab)), length(jc)-1, jc, ir, data) - elseif mattype == "function_handle" - @warn "MATLAB $mattype values are currently not supported" - return missing - else - @warn "Unknown non-struct group of type $mattype detected; attempting to read as struct" + finally + close(dset) end + else + # This matrix is empty. + ir = Int[] + data = str2type_matlab[mattype][] end + return SparseMatrixCSC(convert(Int, read_attribute(g, sparse_attr_matlab)), length(jc)-1, jc, ir, data) +end + +function read_struct_as_dict(g::HDF5.Group) if haskey(g, "MATLAB_fields") fn = [join(f) for f in read_attribute(g, "MATLAB_fields")] else @@ -246,7 +247,38 @@ function m_read(g::HDF5.Group) close(dset) end end - s + return s +end + +# reading a struct, struct array, or sparse matrix +function m_read(g::HDF5.Group) + mattype = read_attribute(g, name_type_attr_matlab) + is_object = false + if mattype != "struct" + attr = attributes(g) + # Check if this is a sparse matrix. + if haskey(attr, sparse_attr_matlab) + return read_sparse_matrix(g, mattype) + elseif mattype == "function_handle" + @warn "MATLAB $mattype values are currently not supported" + return missing + else + if haskey(attr, object_decode_attr_matlab) && read_attribute(g, object_decode_attr_matlab)==2 + # I think this means it's an old object class similar to mXOBJECT_CLASS in MAT_v5 + is_object = true + else + @warn "Unknown non-struct group of type $mattype detected; attempting to read as struct" + end + end + end + if is_object + class = mattype + else + class = "" + end + s = read_struct_as_dict(g) + out = convert_struct_array(s, class) + return out end """ @@ -453,9 +485,27 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, str::A end end +# Char +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, c::AbstractChar) + m_write(mfile, parent, name, string(c)) +end + # Write cell arrays function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, data::AbstractArray{T}) where T data = _normalize_arr(data) + refs = _write_references!(mfile, parent, data) + # Write the references as the chosen variable + cset, ctype = create_dataset(parent, name, refs) + try + write_dataset(cset, ctype, refs) + write_attribute(cset, name_type_attr_matlab, "cell") + finally + close(ctype) + close(cset) + end +end + +function _write_references!(mfile::MatlabHDF5File, parent::HDF5Parent, data::AbstractArray) pathrefs = "/#refs#" fid = HDF5.file(parent) local g @@ -481,6 +531,7 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, data:: else a = g["a"] if !haskey(attributes(a), "MATLAB_empty") + close(a) error("Must create the empty item, with name a, first") end close(a) @@ -499,14 +550,53 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, data:: finally close(g) end - # Write the references as the chosen variable - cset, ctype = create_dataset(parent, name, refs) - try - write_dataset(cset, ctype, refs) - write_attribute(cset, name_type_attr_matlab, "cell") - finally - close(ctype) - close(cset) + return refs +end + + +# Struct array: Array of Dict => MATLAB struct array +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, + arr::AbstractArray{<:AbstractDict}) + m_write(mfile, parent, name, MatlabStructArray(arr)) +end + +# MATLAB struct array +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, arr::MatlabStructArray) + first_value = first(arr.values) + if isempty(first_value) + # write an empty struct array + adata = [size(first_value)...] + dset, dtype = create_dataset(parent, name, adata) + try + write_attribute(dset, empty_attr_matlab, 0x01) + write_attribute(dset, name_type_attr_matlab, "struct") + write_attribute(dset, "MATLAB_fields", HDF5.VLen(arr.names)) + write_dataset(dset, dtype, adata) + finally + close(dtype); close(dset) + end + else + g = create_group(parent, name) + try + if isempty(arr.class) + write_attribute(g, name_type_attr_matlab, "struct") + write_attribute(g, "MATLAB_fields", HDF5.VLen(arr.names)) + else + write_attribute(g, name_type_attr_matlab, arr.class) + write_attribute(g, object_decode_attr_matlab, UInt32(2)) + end + for (fieldname, field_values) in arr + refs = _write_references!(mfile, parent, field_values) + dset, dtype = create_dataset(g, fieldname, refs) + try + write_dataset(dset, dtype, refs) + finally + close(dtype); close(dset) + end + end + finally + close(g) + end end end @@ -524,14 +614,35 @@ function check_struct_keys(k::Vector) asckeys end +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, arr::AbstractArray{MatlabClassObject}) + m_write(mfile, parent, name, MatlabStructArray(arr)) +end + +function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, obj::MatlabClassObject) + g = create_group(parent, name) + try + write_attribute(g, name_type_attr_matlab, obj.class) + write_attribute(g, object_decode_attr_matlab, UInt32(2)) + for (ki, vi) in zip(keys(obj), values(obj)) + m_write(mfile, g, ki, vi) + end + finally + close(g) + end +end + # Write a struct from arrays of keys and values function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, k::Vector{String}, v::Vector) g = create_group(parent, name) - write_attribute(g, name_type_attr_matlab, "struct") - for i = 1:length(k) - m_write(mfile, g, k[i], v[i]) + try + write_attribute(g, name_type_attr_matlab, "struct") + for i = 1:length(k) + m_write(mfile, g, k[i], v[i]) + end + write_attribute(g, "MATLAB_fields", HDF5.VLen(k)) + finally + close(g) end - write_attribute(g, "MATLAB_fields", HDF5.VLen(k)) end # Write Associative as a struct diff --git a/src/MAT_types.jl b/src/MAT_types.jl new file mode 100644 index 0000000..1ca8e08 --- /dev/null +++ b/src/MAT_types.jl @@ -0,0 +1,279 @@ +# MAT_types.jl +# Internal types used by MAT.jl +# +# Copyright (C) 2012 Matthijs Cox +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +########################################### +## Reading and writing MATLAB .mat files ## +########################################### + +module MAT_types + + export MatlabStructArray, StructArrayField, convert_struct_array + export MatlabClassObject + + # struct arrays are stored as columns per field name + """ + MatlabStructArray{N}( + names::Vector{String}, + values::Vector{Array{Any,N}}, + class::String = "", + ) + + Data structure to store matlab struct arrays, which stores the field names separate from the field values. + The field values are stored as columns of `Array{Any,N}` per Matlab field, which is how MAT files store these structures. + + These are distinct from cell arrays of structs, + which are handled as in MAT.jl as `Array{Any,N}` with `Dict{String,Any}` inside, + for example `Any[Dict("x"=>1), Dict("x"=>2)]`. + + Old class object arrays can be handled by providing a non-empty class name. + + # Example + + ```julia + using MAT + + s_arr = MatlabStructArray(["a", "b"], [[1, 2],["foo", 5]]) + + # write-read + matwrite("matfile.mat", Dict("struct_array" => s_arr)) + read_s_arr = matread("matfile.mat")["struct_array"] + + # convert to Dict Array + dict_array = Array{Dict{String,Any}}(s_arr) + + # convert to Dict (with arrays as fields) + dict = Dict{String,Any}(s_arr) + ``` + """ + struct MatlabStructArray{N} + names::Vector{String} + values::Vector{Array{Any,N}} + class::String + function MatlabStructArray(names::Vector{String}, values::Vector{Array{Any,N}}, class::String=""; check::Bool=true) where N + check && check_struct_array(names, values) + return new{N}(names, values, class) + end + function MatlabStructArray{N}(names::Vector{String}, values::Vector{Array{Any,N}}, class::String="") where N + return new{N}(names, values, class) + end + end + + function check_struct_array(names::Vector{String}, values::Vector{Array{Any,N}}) where N + if length(names) != length(values) + error("MatlabStructArray requires equal number of names and values") + end + first_value, rest_values = Iterators.peel(values) + first_len = length(first_value) + if !all(x->length(x)==first_len, rest_values) + error("MatlabStructArray requires all value columns to be of equal length") + end + end + + function MatlabStructArray(names::AbstractVector{<:AbstractString}, values::AbstractArray{A}, class=""; check::Bool=true) where {N, A<:AbstractArray{T, N} where {T}} + MatlabStructArray(string.(names), Vector{Array{Any,N}}(values), string(class); check=check) + end + function MatlabStructArray(names::Vector{String}, values::AbstractArray{A}, class=""; check::Bool=true) where {N, A<:AbstractArray{T, N} where {T}} + MatlabStructArray(names, Vector{Array{Any,N}}(values), string(class); check=check) + end + + # empty array + function MatlabStructArray(names::AbstractVector{<:AbstractString}, dims::Tuple) + N = length(dims) + return MatlabStructArray{N}(names, [Array{Any, N}(undef, dims...) for n in names]) + end + MatlabStructArray(names::AbstractVector{<:AbstractString}) = MatlabStructArray(names, (0,0)) + + Base.eltype(::Type{MatlabStructArray{N}}) where N = Pair{String, Array{Any,N}} + Base.length(arr::MatlabStructArray) = length(arr.names) + Base.keys(arr::MatlabStructArray) = arr.names + Base.values(arr::MatlabStructArray) = arr.values + Base.haskey(arr::MatlabStructArray, k::AbstractString) = k in keys(arr) + + function Base.iterate(arr::T, i=next_state(arr)) where T<:MatlabStructArray + if i == 0 + return nothing + else + return (eltype(T)(arr.names[i], arr.values[i]), next_state(arr,i)) + end + end + next_state(arr, i=0) = length(arr)==i ? 0 : i+1 + + function Base.show(io::IO, ::MIME"text/plain", arr::MatlabStructArray) + summary(io, arr) + ncol = length(arr.values) + print(io, " with $(ncol) ") + col_word = ncol==1 ? "column" : "columns" + print(io, col_word, ":") + for (k,v) in arr + print(io, "\n \"$k\": $v") + end + end + + function Base.:(==)(m1::MatlabStructArray{N},m2::MatlabStructArray{N}) where N + return isequal(m1.names, m2.names) && isequal(m1.values, m2.values) && isequal(m1.class, m2.class) + end + + function Base.isapprox(m1::MatlabStructArray,m2::MatlabStructArray; kwargs...) + return isequal(m1.names, m2.names) && isapprox(m1.values, m2.values; kwargs...) + end + + function find_index(m::MatlabStructArray, s::AbstractString) + idx = findfirst(isequal(s), m.names) + if isnothing(idx) + error("field \"$s\" not found in MatlabStructArray") + end + return idx + end + + function Base.getindex(m::MatlabStructArray, s::AbstractString) + idx = find_index(m, s) + return getindex(m.values, idx) + end + + function Base.get(m::MatlabStructArray, s::AbstractString, default) + idx = findfirst(isequal(s), m.names) + if isnothing(idx) + return default + else + return getindex(m.values, idx) + end + end + + # convert Dict array to MatlabStructArray + function MatlabStructArray(arr::AbstractArray{<:AbstractDict, N}, class::String="") where N + first_dict, remaining_dicts = Iterators.peel(arr) + first_keys = keys(first_dict) + field_names = string.(first_keys) + # Ensure same field set for all elements + for d in remaining_dicts + if !issetequal(keys(d), first_keys) + error("Cannot convert Dict array to MatlabStructArray. All elements must share identical field names") + end + end + field_values = Vector{Array{Any,N}}(undef, length(field_names)) + for (idx,k) in enumerate(first_keys) + this_field_values = Array{Any, N}(undef, size(arr)) + for (idx, d) in enumerate(arr) + this_field_values[idx] = d[k] + end + field_values[idx] = this_field_values + end + return MatlabStructArray{N}(field_names, field_values, class) + end + + function Base.Dict(arr::MatlabStructArray) + return Base.Dict{String, Any}(arr) + end + function Base.Dict{String, Any}(arr::MatlabStructArray) + Base.Dict{String, Any}(arr.names .=> arr.values) + end + + Base.Array{D}(arr::MatlabStructArray{N}) where {D<:AbstractDict,N} = Array{D,N}(arr) + + function Base.Array{D, N}(arr::MatlabStructArray{N}) where {D<:AbstractDict,N} + first_field = first(arr.values) + sz = size(first_field) + result = Array{D, N}(undef, sz) + for idx in eachindex(first_field) + element_values = (v[idx] for v in arr.values) + result[idx] = create_struct(D, arr.names, element_values, arr.class) + end + return result + end + + function create_struct(::Type{D}, keys, values, class::String) where {T, D<:AbstractDict{T}} + return D(T.(keys) .=> values) + end + + struct StructArrayField{N} + values::Array{Any,N} + end + dimension(::StructArrayField{N}) where N = N + + """ + MatlabClassObject( + d::Dict{String, Any}, + class::String, + ) <: AbstractDict{String, Any} + + Type to store old class objects. Inside MATLAB a class named \"TestClassOld\" would be defined within `@TestClassOld` folders. + + If you want to write these objects you have to make sure the keys in the Dict match the class defined properties/fields. + """ + struct MatlabClassObject <: AbstractDict{String, Any} + d::Dict{String, Any} + class::String + end + + Base.eltype(::Type{MatlabClassObject}) = Pair{String, Any} + Base.length(m::MatlabClassObject) = length(m.d) + Base.keys(m::MatlabClassObject) = keys(m.d) + Base.values(m::MatlabClassObject) = values(m.d) + Base.getindex(m::MatlabClassObject, i) = getindex(m.d, i) + Base.setindex!(m::MatlabClassObject, v, k) = setindex!(m.d, v, k) + Base.iterate(m::MatlabClassObject, i) = iterate(m.d, i) + Base.iterate(m::MatlabClassObject) = iterate(m.d) + Base.haskey(m::MatlabClassObject, k) = haskey(m.d, k) + Base.get(m::MatlabClassObject, k, default) = get(m.d, k, default) + + function MatlabStructArray(arr::AbstractArray{MatlabClassObject}) + first_obj, remaining_obj = Iterators.peel(arr) + class = first_obj.class + if !all(x->isequal(class, x.class), remaining_obj) + error("to write a MatlabClassObject array all classes must be equal. Use `Array{Any}` to write a cell array") + end + return MatlabStructArray(arr, class) + end + + function convert_struct_array(d::Dict{String, Any}, class::String="") + # there is no possibility of having cell arrays mixed with struct arrays (afaik) + field_values = first(values(d)) + if field_values isa StructArrayField + return MatlabStructArray{dimension(field_values)}( + collect(keys(d)), + [arr.values for arr in values(d)], + class, + ) + else + if isempty(class) + return d + else + return MatlabClassObject(d, class) + end + end + end + + function Base.Array(arr::MatlabStructArray{N}) where N + if isempty(arr.class) + return Array{Dict{String,Any}, N}(arr) + else + return Array{MatlabClassObject, N}(arr) + end + end + + function create_struct(::Type{D}, keys, values, class::String) where D<:MatlabClassObject + d = Dict{String, Any}(string.(keys) .=> values) + return MatlabClassObject(d, class) + end +end \ No newline at end of file diff --git a/src/MAT_v4.jl b/src/MAT_v4.jl index 07fa547..c3bb69c 100644 --- a/src/MAT_v4.jl +++ b/src/MAT_v4.jl @@ -227,6 +227,10 @@ function colvals(A::AbstractSparseMatrix) cols end +function write(parent::Matlabv4File, name::String, s::AbstractChar) + write(parent, name, string(s)) +end + function write(parent::Matlabv4File, name::String, s) M = Int(parent.swap_bytes) O = 0 diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 7370dd4..0c12d01 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -28,6 +28,7 @@ module MAT_v5 using CodecZlib, BufferedStreams, HDF5, SparseArrays import Base: read, write, close +import ..MAT_types: MatlabStructArray, MatlabClassObject round_uint8(data) = round.(UInt8, data) complex_array(a, b) = complex.(a, b) @@ -170,6 +171,8 @@ end function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_object::Bool) if is_object class = String(read_element(f, swap_bytes, UInt8)) + else + class = "" end field_length = read_element(f, swap_bytes, Int32)[1] field_names = read_element(f, swap_bytes, UInt8) @@ -184,27 +187,28 @@ function read_struct(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}, is_obje field_name_strings[i] = String(index == 0 ? sname : sname[1:index-1]) end - data = Dict{String, Any}() - sizehint!(data, n_fields+1) - if is_object - data["class"] = class - end - + local data if n_el == 1 # Read a single struct into a dict + data = Dict{String, Any}() + sizehint!(data, n_fields+1) for field_name in field_name_strings data[field_name] = read_matrix(f, swap_bytes)[2] end - else - # Read multiple structs into a dict of arrays - for field_name in field_name_strings - data[field_name] = Array{Any}(undef, dimensions...) + if is_object + data = MatlabClassObject(data, class) end + else + # Read empty or multiple structs + nfields = length(field_name_strings) + N = length(dimensions) + field_values = Array{Any, N}[Array{Any}(undef, dimensions...) for _ in 1:nfields] for i = 1:n_el - for field_name in field_name_strings - data[field_name][i] = read_matrix(f, swap_bytes)[2] + for field in 1:nfields + field_values[field][i] = read_matrix(f, swap_bytes)[2] end end + data = MatlabStructArray{N}(field_name_strings, field_values, class) end data diff --git a/test/read.jl b/test/read.jl index 4d8c9d4..410a10e 100644 --- a/test/read.jl +++ b/test/read.jl @@ -101,16 +101,28 @@ for _format in ["v6", "v7", "v7.3"] ) check("cell.mat", result) - result = Dict( + result = Dict{String,Any}( "s" => Dict{String,Any}( "a" => 1.0, "b" => [1.0 2.0], "c" => [1.0 2.0 3.0] ), - "s2" => Dict{String,Any}("a" => Any[1.0 2.0]) + "s2" => MAT.MatlabStructArray(["a"], [Any[1.0 2.0]]) ) check("struct.mat", result) + result = Dict( + "s00" => MAT.MatlabStructArray(["a", "b", "c"], (0,0)), + "s01" => MAT.MatlabStructArray(["a", "b", "c"], (0,1)), + "s10" => MAT.MatlabStructArray(["a", "b", "c"], (1,0)) + ) + check("empty_struct_arrays.mat", result) + + result = Dict{String,Any}( + "s" => Dict{String, Any}("c"=>Matrix{Any}(undef, 0, 0), "b"=>Matrix{Any}(undef, 0, 0), "a"=>Matrix{Any}(undef, 0, 0)), + ) + check("empty_cell_struct.mat", result) + result = Dict( "logical" => false, "logical_mat" => [ @@ -194,15 +206,7 @@ let objtestfile = "obj.mat" @test key in keys(vars) end # check if class name was read correctly - @test vars["A"]["class"] == "Assoc" -end - -# test reading of empty struct -let objtestfile = "empty_struct.mat" - vars = matread(joinpath(dirname(@__FILE__), objtestfile)) - @test "a" in keys(vars) - @test vars["a"]["size"] == [] - @test vars["a"]["params"] == [] + @test vars["A"].class == "Assoc" end # test reading of a Matlab figure @@ -236,4 +240,21 @@ let objtestfile = "old_class.mat" vars = matread(joinpath(dirname(@__FILE__), "v7.3", objtestfile)) @test "tc_old" in keys(vars) @test "foo" in keys(vars["tc_old"]) + @test vars["tc_old"].class == "TestClassOld" end + +let objtestfile = "old_class_array.mat" + vars = matread(joinpath(dirname(@__FILE__), "v7.3", objtestfile)) + c_arr = vars["class_arr"] + @test c_arr isa MatlabStructArray + @test c_arr.class == "TestClassOld" + @test c_arr["foo"] == Any[5.0 "test"] + + vars = matread(joinpath(dirname(@__FILE__), "v7", objtestfile)) + c_arr = vars["class_arr"] + @test c_arr isa MatlabStructArray + @test c_arr.class == "TestClassOld" + @test c_arr["foo"] == Any[5.0 "test"] +end + + diff --git a/test/runtests.jl b/test/runtests.jl index 159a125..6789b43 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,9 @@ using SparseArrays, LinearAlgebra +using Test, MAT -include("read.jl") -include("readwrite4.jl") -include("write.jl") +@testset "MAT" begin + include("types.jl") + include("read.jl") + include("readwrite4.jl") + include("write.jl") +end diff --git a/test/types.jl b/test/types.jl new file mode 100644 index 0000000..aaa87da --- /dev/null +++ b/test/types.jl @@ -0,0 +1,88 @@ +using MAT, Test + +@testset "MatlabStructArray" begin + d_arr = Dict{String, Any}[ + Dict("x"=>[1.0,2.0], SubString("y")=>3.0), + Dict("x"=>[5.0,6.0], "y"=>[]) + ] + s_arr = MatlabStructArray(d_arr) + @test s_arr["y"][2] == d_arr[2]["y"] + @test s_arr["x"][1] == d_arr[1]["x"] + + # constructor errors to protect the user + @test_throws ErrorException MatlabStructArray(["a", "b"], [[]]) + @test_throws ErrorException MatlabStructArray(["a", "b"], [[],[0.1, 0.2]]) + + # equality checks + @test isequal(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["a"], [[0.1, 0.2]])) + @test !isequal(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["a"], [[0.1, 0.2]], "TestClass")) + @test !isequal(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["b"], [[0.1, 0.2]])) + @test isapprox(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["a"], [[0.1+eps(0.1), 0.2]])) + @test !isapprox(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["b"], [[0.1, 0.2]])) + @test !isapprox(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["a"], [[0.11, 0.2]])) + + # empty struct array constructor + s_arr = MatlabStructArray(["x", "y"], (0,1)) + @test s_arr["x"] == Matrix{Any}(undef, 0, 1) + @test s_arr["y"] == Matrix{Any}(undef, 0, 1) + @test MatlabStructArray(["a"]) == MatlabStructArray(["a"], (0,0)) + + # convert to Dict to support easy conversion to legacy read behavior + s_arr = MatlabStructArray(d_arr) + d = Dict(s_arr) + @test d isa Dict{String, Any} + @test collect(keys(d)) == keys(s_arr) + @test collect(values(d)) == values(s_arr) + # Dict like interfaces + @test length(s_arr) == 2 + @test collect(Dict(s_arr)) == collect(s_arr) + @test haskey(s_arr, "x") + @test get(s_arr, "x", nothing) == s_arr["x"] + @test !haskey(s_arr, "wrong") + @test get(s_arr, "wrong", nothing) === nothing + + # possibility to convert back to dict array via `Array` + s_arr = MatlabStructArray(d_arr) + @test Array(s_arr) == d_arr + d_arr_reshape = reshape(d_arr, 1, 2) + @test Array(MatlabStructArray(d_arr_reshape)) == d_arr_reshape + d_symbol = Array{Dict{Symbol,Any}}(MatlabStructArray(d_arr)) + @test d_symbol[2][:x] == d_arr[2]["x"] + @test Array(MatlabStructArray(d_symbol)) == d_arr + + # class object array conversion + s_arr = MatlabStructArray(d_arr, "TestClass") + c_arr = Array(s_arr) + @test c_arr isa Array{MatlabClassObject} + @test all(c->c.class=="TestClass", c_arr) + @test MatlabStructArray(c_arr) == s_arr + + # test error of unequal structs + wrong_sarr = Dict{String, Any}[ + Dict("x"=>[1.0,2.0], "y"=>[3.0,4.0]), + Dict("x"=>[5.0,6.0]) + ] + msg = "Cannot convert Dict array to MatlabStructArray. All elements must share identical field names" + @test_throws ErrorException(msg) MatlabStructArray(wrong_sarr) +end + +@testset "MatlabClassObject" begin + d = Dict{String,Any}("a" => 5) + obj = MatlabClassObject(d, "TestClassOld") + @test keys(obj) == keys(d) + @test values(obj) == values(d) + @test collect(obj) == collect(d) + @test obj["a"] == d["a"] + @test haskey(obj, "a") + @test get(obj, "b", "default") == "default" + + obj["b"] = 7 + @test obj["b"] == 7 + + c_arr = [MatlabClassObject(d, "TestClassOld"), MatlabClassObject(d, "TestClassOld")] + s_arr = MatlabStructArray(c_arr) + @test s_arr.class == "TestClassOld" + + wrong_arr = [MatlabClassObject(d, "TestClassOld"), MatlabClassObject(d, "Bah")] + @test_throws ErrorException MatlabStructArray(wrong_arr) +end \ No newline at end of file diff --git a/test/v6/empty_cell_struct.mat b/test/v6/empty_cell_struct.mat new file mode 100644 index 0000000..1316c56 Binary files /dev/null and b/test/v6/empty_cell_struct.mat differ diff --git a/test/v6/empty_struct_arrays.mat b/test/v6/empty_struct_arrays.mat new file mode 100644 index 0000000..ed3ced2 Binary files /dev/null and b/test/v6/empty_struct_arrays.mat differ diff --git a/test/v7.3/empty_cell_struct.mat b/test/v7.3/empty_cell_struct.mat new file mode 100644 index 0000000..1316c56 Binary files /dev/null and b/test/v7.3/empty_cell_struct.mat differ diff --git a/test/empty_struct.mat b/test/v7.3/empty_struct_arrays.mat similarity index 74% rename from test/empty_struct.mat rename to test/v7.3/empty_struct_arrays.mat index b7d6f70..7c1a2bd 100644 Binary files a/test/empty_struct.mat and b/test/v7.3/empty_struct_arrays.mat differ diff --git a/test/v7.3/old_class_array.mat b/test/v7.3/old_class_array.mat new file mode 100644 index 0000000..fbbde58 Binary files /dev/null and b/test/v7.3/old_class_array.mat differ diff --git a/test/v7/empty_cell_struct.mat b/test/v7/empty_cell_struct.mat new file mode 100644 index 0000000..1316c56 Binary files /dev/null and b/test/v7/empty_cell_struct.mat differ diff --git a/test/v7/empty_struct_arrays.mat b/test/v7/empty_struct_arrays.mat new file mode 100644 index 0000000..1345b58 Binary files /dev/null and b/test/v7/empty_struct_arrays.mat differ diff --git a/test/v7/old_class_array.mat b/test/v7/old_class_array.mat new file mode 100644 index 0000000..734f6b3 Binary files /dev/null and b/test/v7/old_class_array.mat differ diff --git a/test/write.jl b/test/write.jl index ae4a2eb..02acc62 100644 --- a/test/write.jl +++ b/test/write.jl @@ -34,6 +34,15 @@ function test_compression_effective(data) end end +@testset "write error messages" begin + msg = "writing for \"v7\" is not supported" + @test_throws ErrorException(msg) matwrite(tmpfile, Dict("s" => 1); version="v7") + + msg = "matwrite requires a Dict with ASCII keys" + @test_throws ErrorException(msg) matwrite(tmpfile, Dict(:s => 1)) + @test_throws ErrorException(msg) matwrite(tmpfile, Dict(:s => 1); version="v4") +end + test_write(Dict( "int8" => Int8(1), "uint8" => UInt8(1), @@ -74,6 +83,12 @@ test_write(Dict( "string" => "string" )) +# cannot distinguish char from single element string +test_write(Dict("char" => 'a')) +# inconsistent behavior in v4 +matwrite(tmpfile, Dict("char" => 'a'), version="v4") +@test matread(tmpfile)["char"] == "a" + test_write(Dict( "cell" => Any[1 2.01 "string" Any["string1" "string2"]] )) @@ -135,4 +150,40 @@ test_write(Dict("adjoint_arr"=>[1 2 3;4 5 6;7 8 9]')) test_write(Dict("reshape_arr"=>reshape([1 2 3;4 5 6;7 8 9]',1,9))) test_write(Dict("adjoint_arr"=>Any[1 2 3;4 5 6;7 8 9]')) -test_write(Dict("reshape_arr"=>reshape(Any[1 2 3;4 5 6;7 8 9]',1,9))) \ No newline at end of file +test_write(Dict("reshape_arr"=>reshape(Any[1 2 3;4 5 6;7 8 9]',1,9))) + +# test nested struct array - interface via Dict array +@testset "MatlabStructArray writing" begin + sarr = Dict{String, Any}[ + Dict("x"=>[1.0,2.0], SubString("y")=>3.0), + Dict("x"=>[5.0,6.0], "y"=>[Dict("a"=>7), Dict("a"=>8)]) + ] + # we have to test Array size is maintained inside mat files + sarr = reshape(sarr, 1, 2) + matwrite(tmpfile, Dict("s_array" => sarr)) + read_sarr = matread(tmpfile)["s_array"] + @test read_sarr isa MatlabStructArray + @test read_sarr["y"][2] isa MatlabStructArray + + sarr = Dict{String, Any}[ + Dict("x"=>[1.0,2.0], SubString("y")=>3.0), + Dict("x"=>[5.0,6.0], "y"=>[]) + ] + test_write(Dict("s_array" => MatlabStructArray(sarr))) + + empty_sarr = MatlabStructArray(["a", "b", "c"]) + test_write(Dict("s_array" => empty_sarr)) + + # old matlab class object array + carr = MatlabStructArray(["foo"], [[5, "bar"]], "TestClassOld") + test_write(Dict("class_array" => carr)) + + d = Dict{String,Any}("foo" => 5) + obj = MatlabClassObject(d, "TestClassOld") + test_write(Dict("tc_old" => obj)) + + carr = [MatlabClassObject(d, "TestClassOld"), MatlabClassObject(d, "TestClassOld")] + matwrite(tmpfile, Dict("class_array" => carr)) + carr_read = matread(tmpfile)["class_array"] + @test carr_read == MatlabStructArray(carr) +end \ No newline at end of file