Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update TFRecord.jl to work with julia 1.7+ and ProtoBuf v1.0+ #26

Merged
merged 6 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 15 additions & 20 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
name: CI
on:
pull_request:
branches:
- master
push:
branches:
- master
tags: '*'
- main
tags: ['*']
pull_request:
concurrency:
# Skip intermediate builds: always.
# Cancel intermediate builds: only if it is a pull request build.
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
Expand All @@ -15,11 +18,12 @@ jobs:
fail-fast: false
matrix:
version:
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
- '1.6'
- '1.7'
- '1.8'
- '1'
os:
- ubuntu-latest
- macOS-latest
- windows-latest
arch:
- x64
steps:
Expand All @@ -28,19 +32,10 @@ jobs:
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v2
with:
file: lcov.info
files: lcov.info
13 changes: 3 additions & 10 deletions .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
name: CompatHelper

on:
schedule:
- cron: '00 00 * * *'
- cron: 0 0 * * *
workflow_dispatch:

jobs:
CompatHelper:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: [1.2.0]
julia-arch: [x86]
os: [ubuntu-latest]
runs-on: ubuntu-latest
steps:
- name: Pkg.add("CompatHelper")
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main()
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV}}
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
run: julia -e 'using CompatHelper; CompatHelper.main()'
6 changes: 2 additions & 4 deletions .github/workflows/TagBot.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
name: TagBot
on:
issue_comment: # THIS BIT IS NEW
issue_comment:
types:
- created
workflow_dispatch:
jobs:
TagBot:
# THIS 'if' LINE IS NEW
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
# NOTHING BELOW HAS CHANGED
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
# ssh: ${{ secrets.DOCUMENTER_KEY }}
ssh: ${{ secrets.DOCUMENTER_KEY }}
16 changes: 16 additions & 0 deletions .github/workflows/register.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Register Package
on:
workflow_dispatch:
inputs:
version:
description: Version to register or component to bump
required: true
jobs:
register:
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: julia-actions/RegisterAction@latest
with:
token: ${{ secrets.GITHUB_TOKEN }}
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/Manifest.toml

example.tfrecord
example.tfrecord
.vscode/settings.json
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2020 Jun Tian <tianjun.cpp@gmail.com> and contributors
Copyright (c) 2022 Jun Tian <tianjun.cpp@gmail.com> and contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
12 changes: 6 additions & 6 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name = "TFRecord"
uuid = "841416d8-1a6a-485a-b0fc-1328d0f53d5e"
authors = ["Jun Tian <tianjun.cpp@gmail.com> and contributors"]
version = "0.4.1"
version = "0.4.2"

[deps]
BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
ProtoBuf = "3349acd9-ac6a-5e09-bcdb-63829b23a429"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"

[compat]
BufferedStreams = "1.0"
BufferedStreams = "1.1"
CodecZlib = "0.7"
EnumX = "1"
MacroTools = "0.5"
ProtoBuf = "0.10, 0.11"
ProtoBuf = "1.0"
TranscodingStreams = "0.9"
julia = "1.5"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
1 change: 1 addition & 0 deletions src/TFRecord.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module TFRecord

include("jlout/example_pb.jl")
using .example_pb
include("core.jl")

end
51 changes: 29 additions & 22 deletions src/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using Base.Threads
using CodecZlib
using BufferedStreams
using MacroTools: @forward
using ProtoBuf: ProtoType
using ProtoBuf
using TranscodingStreams: NoopStream

# Ref: https://github.com/tensorflow/tensorflow/blob/295ad2781683835be974faba0a191528d8079768/tensorflow/core/lib/hash/crc32c.h#L50-L59
Expand All @@ -30,15 +30,16 @@ byte data[n]
uint32 masked_crc32_of_data
```
"""
# TODO check
function read_record(io::IO)
n = Base.read(io, sizeof(UInt64))
masked_crc32_n = Base.read(io, UInt32)
crc32c(n) == unmask(masked_crc32_n) || error("record corrupted, did you set the correct compression?")
@assert crc32c(n) == unmask(masked_crc32_n) "record corrupted, did you set the correct compression?"

data = Base.read(io, Int(reinterpret(UInt64, n)[])) # !!! watch https://github.com/JuliaIO/TranscodingStreams.jl/pull/104
masked_crc32_data = Base.read(io, UInt32)
crc32c(data) == unmask(masked_crc32_data) || error("record corrupted, did you set the correct compression?")
data
@assert crc32c(data) == unmask(masked_crc32_data) "record corrupted, did you set the correct compression?"
return data
end

"""
Expand Down Expand Up @@ -72,14 +73,19 @@ function read(
open(decompressor_stream(compression), file_name, "r") do io
buffered_io = BufferedInputStream(io, bufsize)
while !eof(buffered_io)
instance = readproto(IOBuffer(read_record(buffered_io)), record_type())
buff = IOBuffer(read_record(buffered_io))
d = ProtoDecoder(buff)
instance = decode(d, record_type)
put!(ch, instance)
# close(buffered_io)
end
end
end
end
end



#####
# TFRecordWriter
#####
Expand All @@ -102,8 +108,9 @@ for example `100M`.
"""
function write(s::AbstractString, x; compression=nothing, bufsize=1024*1024)
open(compressor_stream(compression), s, "w") do io
buffered_io = BufferedOutputStream(open(s, "w"), bufsize)
buffered_io = BufferedOutputStream(io, bufsize)
write(buffered_io, x)
close(buffered_io)
end
end

Expand All @@ -113,9 +120,11 @@ function write(io::IO, xs)
end
end

function write(io::IO, x::ProtoType)

function write(io::IO, x::Example)
buff = IOBuffer()
writeproto(buff, x)
e = ProtoEncoder(buff)
encode(e, x)

data_crc = mask(crc32c(seekstart(buff)))
data = take!(seekstart(buff))
Expand All @@ -135,23 +144,21 @@ end
# convert
#####

Base.convert(::Type{Feature}, x::Int) = Feature(;int64_list=Int64List(value=[x]))
Base.convert(::Type{Feature}, x::Bool) = Feature(;int64_list=Int64List(value=[Int(x)]))
Base.convert(::Type{Feature}, x::Float32) = Feature(;float_list=FloatList(value=[x]))
Base.convert(::Type{Feature}, x::AbstractString) = Feature(;bytes_list=BytesList(value=[unsafe_wrap(Vector{UInt8}, x)]))
Base.convert(::Type{Feature}, x::Int) = Feature(OneOf(:int64_list,Int64List([x])))
Base.convert(::Type{Feature}, x::Bool) = Feature(OneOf(:int64_list,Int64List([Int(x)])))
Base.convert(::Type{Feature}, x::Float32) = Feature(OneOf(:float_list,FloatList([x])))
Base.convert(::Type{Feature}, x::AbstractString) = Feature(OneOf(:bytes_list,BytesList([unsafe_wrap(Vector{UInt8}, x)])))

Base.convert(::Type{Feature}, x::Vector{Int}) = Feature(;int64_list=Int64List(value=x))
Base.convert(::Type{Feature}, x::Vector{Bool}) = Feature(;int64_list=Int64List(value=convert(Vector{Int}, x)))
Base.convert(::Type{Feature}, x::Vector{Float32}) = Feature(;float_list=FloatList(value=x))
Base.convert(::Type{Feature}, x::Vector{<:AbstractString}) = Feature(;bytes_list=BytesList(value=[unsafe_wrap(Vector{UInt8}, s) for s in x]))
Base.convert(::Type{Feature}, x::Vector{Array{UInt8,1}}) = Feature(;bytes_list=BytesList(value=x))
Base.convert(::Type{Feature}, x::Vector{Int}) = Feature(OneOf(:int64_list,Int64List(x)))
Base.convert(::Type{Feature}, x::Vector{Bool}) = Feature(OneOf(:int64_list,Int64List(convert(Vector{Int}, x))))
Base.convert(::Type{Feature}, x::Vector{Float32}) = Feature(OneOf(:float_list,FloatList(x)))
Base.convert(::Type{Feature}, x::Vector{<:AbstractString}) = Feature(OneOf(:bytes_list,BytesList([unsafe_wrap(Vector{UInt8}, s) for s in x])))
Base.convert(::Type{Feature}, x::Vector{Array{UInt8,1}}) = Feature(OneOf(:bytes_list,BytesList(x)))

Base.convert(::Type{Features}, x::Dict) = Features(;feature=Dict(k=>convert(Feature, v) for (k, v) in x))
Base.convert(::Type{Features}, x::Dict) = Features(Dict(k=>convert(Feature, v) for (k, v) in x))

function Base.convert(::Type{Example}, x::Dict)
d = Example()
d.features = convert(Features, x)
d
return Example(convert(Features, x))
end

# (De)compression
Expand All @@ -178,4 +185,4 @@ function decompressor_stream(compression)
else
throw(ArgumentError("Unsupported decompression method: $compression"))
end
end
end
Loading