Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions excelize.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
package excelize

import (
"archive/zip"
"bytes"
"encoding/xml"
"io"
Expand All @@ -24,6 +23,8 @@ import (
"strings"
"sync"

"github.com/klauspost/compress/flate"
"github.com/klauspost/compress/zip"
"golang.org/x/net/html/charset"
)

Expand Down Expand Up @@ -73,6 +74,28 @@ type ZipWriter interface {
Close() error
}

// Compression defines the compression level for the ZIP archive used to store
// the spreadsheet.
type Compression int

const (
// CompressionDefault uses standard deflate compression (the default).
// This produces the smallest files but uses more CPU and memory during
// Save/WriteTo.
CompressionDefault Compression = iota
// CompressionNone disables ZIP compression entirely. The spreadsheet
// parts are stored uncompressed. This significantly reduces CPU time and
// memory usage during Save/WriteTo at the cost of larger output files
// (typically 5-10× larger). Recommended for memory-constrained
// environments (e.g. AWS Lambda) or when the output will be compressed
// by another layer (e.g. gzip transport, S3 transfer acceleration).
CompressionNone
// CompressionBestSpeed uses the fastest deflate compression level. This
// is a good middle ground: roughly 2× faster than default compression
// with only moderately larger output.
CompressionBestSpeed
)

// Options define the options for opening and reading the spreadsheet.
//
// MaxCalcIterations specifies the maximum iterations for iterative
Expand Down Expand Up @@ -124,14 +147,23 @@ type Options struct {
// StreamingChunkSize is the number of bytes of XML data accumulated in
// memory before a streaming worksheet spills to a temp file. A smaller
// value reduces peak memory usage at the cost of more disk I/O. Zero
// means use the default (StreamChunkSize = 16 MiB).
// means use the default (StreamChunkSize = 16 MiB). Set to -1 to
// disable temp files entirely (all data stays in memory); this
// eliminates disk I/O overhead and can be significantly faster when
// sufficient memory is available.
StreamingChunkSize int
// StreamingBufSize is the size of the bufio.Writer used for all disk
// writes after the StreamingChunkSize threshold is crossed. Larger values
// reduce write syscall counts at the cost of slightly more memory. The
// measured inflection point on NVMe and HDD alike is 128 KiB. Zero means
// use the default (defaultBioSize = 128 KiB).
StreamingBufSize int
// Compression specifies the compression level for the output ZIP
// archive. The default (CompressionDefault) uses standard deflate. Use
// CompressionNone in memory-constrained environments like AWS Lambda to
// eliminate compressor overhead, or CompressionBestSpeed for a balance
// of speed and size.
Compression Compression
}

// OpenFile take the name of a spreadsheet file and returns a populated
Expand Down Expand Up @@ -257,6 +289,31 @@ func (f *File) CharsetTranscoder(fn func(charset string, input io.Reader) (rdr i
// SetZipWriter set user defined zip writer function for saving the workbook.
func (f *File) SetZipWriter(fn func(io.Writer) ZipWriter) *File { f.ZipWriter = fn; return f }

// configureZipCompression applies the Compression option to the zip writer.
// It is a no-op for the default compression level or for custom ZipWriter
// implementations that are not *zip.Writer.
func (f *File) configureZipCompression(zw ZipWriter) {
if f.options == nil || f.options.Compression == CompressionDefault {
return
}
zipW, ok := zw.(*zip.Writer)
if !ok {
return
}
var level int
switch f.options.Compression {
case CompressionNone:
level = flate.NoCompression
case CompressionBestSpeed:
level = flate.BestSpeed
default:
return
}
zipW.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) {
return flate.NewWriter(out, level)
})
}

// Creates new XML decoder with charset reader.
func (f *File) xmlNewDecoder(rdr io.Reader) (ret *xml.Decoder) {
ret = xml.NewDecoder(rdr)
Expand Down
2 changes: 1 addition & 1 deletion excelize_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package excelize

import (
"archive/zip"
"bytes"
"compress/gzip"
"encoding/xml"
Expand All @@ -20,6 +19,7 @@ import (
"testing"
"time"

"github.com/klauspost/compress/zip"
"github.com/stretchr/testify/assert"
"golang.org/x/net/html/charset"
)
Expand Down
182 changes: 172 additions & 10 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,10 @@ func (f *File) Write(w io.Writer, opts ...Options) error {
return err
}

// WriteTo implements io.WriterTo to write the file.
// WriteTo implements io.WriterTo to write the file. When no password
// encryption is required, the ZIP archive is streamed directly to w without
// buffering the entire compressed output in memory. When password encryption
// is required, a temporary file is used to reduce memory usage.
func (f *File) WriteTo(w io.Writer, opts ...Options) (int64, error) {
for i := range opts {
f.options = &opts[i]
Expand All @@ -127,18 +130,100 @@ func (f *File) WriteTo(w io.Writer, opts ...Options) (int64, error) {
return 0, err
}
}
buf, err := f.WriteToBuffer()
// Password encryption requires post-processing the entire output.
// Use a temporary file to reduce peak memory usage.
if f.options != nil && f.options.Password != "" {
return f.writeToWithEncryption(w)
}
// Stream the ZIP directly to w. This avoids holding the full compressed
// archive in a bytes.Buffer, which can be 50-200 MB+ for large reports.
cw := &countWriter{w: w}
zw := f.ZipWriter(cw)
f.configureZipCompression(zw)
if err := f.writeToZip(zw); err != nil {
_ = zw.Close()
return cw.n, err
}
return cw.n, zw.Close()
}

// writeToWithEncryption writes an encrypted file using a temporary file to
// reduce memory usage. This avoids buffering the entire ZIP in memory before
// encryption.
func (f *File) writeToWithEncryption(w io.Writer) (int64, error) {
var tmpDir string
if f.options != nil {
tmpDir = f.options.TmpDir
}
// Create temporary file for the unencrypted ZIP
tmpFile, err := os.CreateTemp(tmpDir, "excelize-encrypt-*.zip")
if err != nil {
return 0, err
}
tmpPath := tmpFile.Name()
defer func() {
_ = tmpFile.Close()
_ = os.Remove(tmpPath)
}()

// Write ZIP to temp file
f.zip64Entries = nil // Reset before writing
zw := f.ZipWriter(tmpFile)
f.configureZipCompression(zw)
if err := f.writeToZip(zw); err != nil {
_ = zw.Close()
return 0, err
}
if err := zw.Close(); err != nil {
return 0, err
}

// If ZIP64 entries exist, we need to fixup the local file headers
if len(f.zip64Entries) > 0 {
if err := f.writeZip64LFHFile(tmpFile); err != nil {
return 0, err
}
}

// Read the ZIP file back and encrypt it
if _, err := tmpFile.Seek(0, 0); err != nil {
return 0, err
}
rawZip, err := io.ReadAll(tmpFile)
if err != nil {
return 0, err
}

// Encrypt and write to output
encrypted, err := Encrypt(rawZip, f.options)
if err != nil {
return 0, err
}
return buf.WriteTo(w)
n, err := w.Write(encrypted)
return int64(n), err
}

// countWriter wraps an io.Writer and counts bytes written.
type countWriter struct {
w io.Writer
n int64
}

func (cw *countWriter) Write(p []byte) (int, error) {
n, err := cw.w.Write(p)
cw.n += int64(n)
return n, err
}

// WriteToBuffer provides a function to get bytes.Buffer from the saved file,
// and it allocates space in memory. Be careful when the file size is large.
// Consider using WriteTo with a file for large password-protected files to
// reduce memory usage.
func (f *File) WriteToBuffer() (*bytes.Buffer, error) {
buf := new(bytes.Buffer)
f.zip64Entries = nil // Reset before writing
zw := f.ZipWriter(buf)
f.configureZipCompression(zw)

if err := f.writeToZip(zw); err != nil {
_ = zw.Close()
Expand All @@ -147,7 +232,11 @@ func (f *File) WriteToBuffer() (*bytes.Buffer, error) {
if err := zw.Close(); err != nil {
return buf, err
}
err := f.writeZip64LFH(buf)
// Only perform ZIP64 fixup if we actually have ZIP64 entries
var err error
if len(f.zip64Entries) > 0 {
err = f.writeZip64LFH(buf)
}
if f.options != nil && f.options.Password != "" {
b, err := Encrypt(buf.Bytes(), f.options)
if err != nil {
Expand Down Expand Up @@ -180,13 +269,9 @@ func (f *File) writeToZip(zw ZipWriter) error {
if err != nil {
return err
}
var from io.Reader
if from, err = stream.rawData.Reader(); err != nil {
_ = stream.rawData.Close()
return err
}
written, err := io.Copy(fi, from)
written, err := stream.rawData.CopyTo(fi)
if err != nil {
_ = stream.rawData.Close()
return err
}
if written > math.MaxUint32 {
Expand Down Expand Up @@ -272,3 +357,80 @@ func (f *File) writeZip64LFH(buf *bytes.Buffer) error {
}
return nil
}

// writeZip64LFHFile performs ZIP64 local file header fixup on a file.
// This is used when encrypting to avoid loading the entire file into memory.
func (f *File) writeZip64LFHFile(file *os.File) error {
if len(f.zip64Entries) == 0 {
return nil
}
// Seek to start of file
if _, err := file.Seek(0, 0); err != nil {
return err
}
// Read file info to get size
info, err := file.Stat()
if err != nil {
return err
}
fileSize := info.Size()

// Process file in chunks to avoid loading entire file into memory
const chunkSize = 1024 * 1024 // 1MB chunks
buf := make([]byte, chunkSize)
var offset int64

for offset < fileSize {
// Read chunk
n, err := file.ReadAt(buf, offset)
if err != nil && err != io.EOF {
return err
}
if n == 0 {
break
}

// Search for local file headers in this chunk
searchBuf := buf[:n]
searchOffset := 0
for searchOffset < n {
idx := bytes.Index(searchBuf[searchOffset:], []byte{0x50, 0x4b, 0x03, 0x04})
if idx == -1 {
break
}
idx += searchOffset
absoluteIdx := offset + int64(idx)

// Check if we have enough data for the header
if idx+30 > n {
// Header spans chunk boundary, will be caught in next iteration
break
}

filenameLen := int(binary.LittleEndian.Uint16(searchBuf[idx+26 : idx+28]))
if idx+30+filenameLen > n {
// Filename spans chunk boundary, will be caught in next iteration
break
}

filename := string(searchBuf[idx+30 : idx+30+filenameLen])
if inStrSlice(f.zip64Entries, filename, true) != -1 {
// Update version field at offset idx+4
versionBuf := make([]byte, 2)
binary.LittleEndian.PutUint16(versionBuf, 45)
if _, err := file.WriteAt(versionBuf, absoluteIdx+4); err != nil {
return err
}
}
searchOffset = idx + 1
}

offset += int64(n)
// Overlap by 30 bytes to catch headers that span chunks
if offset < fileSize {
offset -= 30
}
}

return nil
}
Loading