Skip to content

Commit

Permalink
add option --iobuf (file r/w buffersize):
Browse files Browse the repository at this point in the history
the default (256 KiB) appears optimal in the most popular scenario
(linux host with storage on local physical disk, usually NVMe)

was previously a mix of 64 and 512 KiB;
now the same value is enforced everywhere

download-as-tar is now 20% faster with the default value
  • Loading branch information
9001 committed Mar 23, 2024
1 parent d30ae84 commit 2b24c50
Show file tree
Hide file tree
Showing 11 changed files with 37 additions and 23 deletions.
1 change: 1 addition & 0 deletions copyparty/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,7 @@ def add_fs(ap):
ap2 = ap.add_argument_group("filesystem options")
rm_re_def = "5/0.1" if ANYWIN else "0/0"
ap2.add_argument("--rm-retry", metavar="T/R", type=u, default=rm_re_def, help="if a file cannot be deleted because it is busy, continue trying for \033[33mT\033[0m seconds, retry every \033[33mR\033[0m seconds; disable with 0/0 (volflag=rm_retry)")
ap2.add_argument("--iobuf", metavar="BYTES", type=int, default=256*1024, help="file I/O buffer-size; if your volumes are on a network drive, try increasing to \033[32m524288\033[0m or even \033[32m4194304\033[0m (and let me know if that improves your performance)")


def add_upload(ap):
Expand Down
2 changes: 1 addition & 1 deletion copyparty/ftpd.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def open(self, filename: str, mode: str) -> typing.IO[Any]:
raise FSE("Cannot open existing file for writing")

self.validpath(ap)
return open(fsenc(ap), mode)
return open(fsenc(ap), mode, self.args.iobuf)

def chdir(self, path: str) -> None:
nwd = join(self.cwd, path)
Expand Down
22 changes: 11 additions & 11 deletions copyparty/httpcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ def __init__(self, conn: "HttpConn") -> None:
self.parser: Optional[MultipartParser] = None
# end placeholders

self.bufsz = 1024 * 32
self.html_head = ""

def log(self, msg: str, c: Union[int, str] = 0) -> None:
Expand Down Expand Up @@ -1641,7 +1640,7 @@ def dump_to_file(self, is_put: bool) -> tuple[int, str, str, int, str, str]:
bos.makedirs(fdir)

open_ka: dict[str, Any] = {"fun": open}
open_a = ["wb", 512 * 1024]
open_a = ["wb", self.args.iobuf]

# user-request || config-force
if ("gz" in vfs.flags or "xz" in vfs.flags) and (
Expand Down Expand Up @@ -1900,7 +1899,7 @@ def bakflip(self, f: typing.BinaryIO, ofs: int, sz: int, sha: str) -> None:
f.seek(ofs)
with open(fp, "wb") as fo:
while nrem:
buf = f.read(min(nrem, 512 * 1024))
buf = f.read(min(nrem, self.args.iobuf))
if not buf:
break

Expand Down Expand Up @@ -2162,7 +2161,7 @@ def handle_post_binary(self) -> bool:
except:
pass

f = f or open(fsenc(path), "rb+", 512 * 1024)
f = f or open(fsenc(path), "rb+", self.args.iobuf)

try:
f.seek(cstart[0])
Expand All @@ -2185,7 +2184,8 @@ def handle_post_binary(self) -> bool:
)
ofs = 0
while ofs < chunksize:
bufsz = min(chunksize - ofs, 4 * 1024 * 1024)
bufsz = max(4 * 1024 * 1024, self.args.iobuf)
bufsz = min(chunksize - ofs, bufsz)
f.seek(cstart[0] + ofs)
buf = f.read(bufsz)
for wofs in cstart[1:]:
Expand Down Expand Up @@ -2482,7 +2482,7 @@ def handle_plain_upload(
v2 = lim.dfv - lim.dfl
max_sz = min(v1, v2) if v1 and v2 else v1 or v2

with ren_open(tnam, "wb", 512 * 1024, **open_args) as zfw:
with ren_open(tnam, "wb", self.args.iobuf, **open_args) as zfw:
f, tnam = zfw["orz"]
tabspath = os.path.join(fdir, tnam)
self.log("writing to {}".format(tabspath))
Expand Down Expand Up @@ -2778,7 +2778,7 @@ def handle_text_upload(self) -> bool:
if bos.path.exists(fp):
wunlink(self.log, fp, vfs.flags)

with open(fsenc(fp), "wb", 512 * 1024) as f:
with open(fsenc(fp), "wb", self.args.iobuf) as f:
sz, sha512, _ = hashcopy(p_data, f, self.args.s_wr_slp)

if lim:
Expand Down Expand Up @@ -3010,8 +3010,7 @@ def tx_file(self, req_path: str) -> bool:
upper = gzip_orig_sz(fs_path)
else:
open_func = open
# 512 kB is optimal for huge files, use 64k
open_args = [fsenc(fs_path), "rb", 64 * 1024]
open_args = [fsenc(fs_path), "rb", self.args.iobuf]
use_sendfile = (
# fmt: off
not self.tls
Expand Down Expand Up @@ -3146,6 +3145,7 @@ def tx_zip(

bgen = packer(
self.log,
self.args,
fgen,
utf8="utf" in uarg,
pre_crc="crc" in uarg,
Expand Down Expand Up @@ -3223,7 +3223,7 @@ def tx_md(self, vn: VFS, fs_path: str) -> bool:
sz_md = 0
lead = b""
fullfile = b""
for buf in yieldfile(fs_path):
for buf in yieldfile(fs_path, self.args.iobuf):
if sz_md < max_sz:
fullfile += buf
else:
Expand Down Expand Up @@ -3296,7 +3296,7 @@ def tx_md(self, vn: VFS, fs_path: str) -> bool:
if fullfile:
self.s.sendall(fullfile)
else:
for buf in yieldfile(fs_path):
for buf in yieldfile(fs_path, self.args.iobuf):
self.s.sendall(html_bescape(buf))

self.s.sendall(html[1])
Expand Down
6 changes: 4 additions & 2 deletions copyparty/star.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import print_function, unicode_literals

import argparse
import re
import stat
import tarfile
Expand Down Expand Up @@ -44,11 +45,12 @@ class StreamTar(StreamArc):
def __init__(
self,
log: "NamedLogger",
args: argparse.Namespace,
fgen: Generator[dict[str, Any], None, None],
cmp: str = "",
**kwargs: Any
):
super(StreamTar, self).__init__(log, fgen)
super(StreamTar, self).__init__(log, args, fgen)

self.ci = 0
self.co = 0
Expand Down Expand Up @@ -126,7 +128,7 @@ def ser(self, f: dict[str, Any]) -> None:
inf.gid = 0

self.ci += inf.size
with open(fsenc(src), "rb", 512 * 1024) as fo:
with open(fsenc(src), "rb", self.args.iobuf) as fo:
self.tar.addfile(inf, fo)

def _gen(self) -> None:
Expand Down
3 changes: 3 additions & 0 deletions copyparty/sutil.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import print_function, unicode_literals

import argparse
import os
import tempfile
from datetime import datetime
Expand All @@ -20,10 +21,12 @@ class StreamArc(object):
def __init__(
self,
log: "NamedLogger",
args: argparse.Namespace,
fgen: Generator[dict[str, Any], None, None],
**kwargs: Any
):
self.log = log
self.args = args
self.fgen = fgen
self.stopped = False

Expand Down
8 changes: 5 additions & 3 deletions copyparty/szip.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import print_function, unicode_literals

import argparse
import calendar
import stat
import time
Expand Down Expand Up @@ -218,12 +219,13 @@ class StreamZip(StreamArc):
def __init__(
self,
log: "NamedLogger",
args: argparse.Namespace,
fgen: Generator[dict[str, Any], None, None],
utf8: bool = False,
pre_crc: bool = False,
**kwargs: Any
) -> None:
super(StreamZip, self).__init__(log, fgen)
super(StreamZip, self).__init__(log, args, fgen)

self.utf8 = utf8
self.pre_crc = pre_crc
Expand All @@ -248,7 +250,7 @@ def ser(self, f: dict[str, Any]) -> Generator[bytes, None, None]:

crc = 0
if self.pre_crc:
for buf in yieldfile(src):
for buf in yieldfile(src, self.args.iobuf):
crc = zlib.crc32(buf, crc)

crc &= 0xFFFFFFFF
Expand All @@ -257,7 +259,7 @@ def ser(self, f: dict[str, Any]) -> Generator[bytes, None, None]:
buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc)
yield self._ct(buf)

for buf in yieldfile(src):
for buf in yieldfile(src, self.args.iobuf):
if not self.pre_crc:
crc = zlib.crc32(buf, crc)

Expand Down
3 changes: 3 additions & 0 deletions copyparty/tftpd.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@ def _open(self, vpath: str, mode: str, *a: Any, **ka: Any) -> Any:
if not self.args.tftp_nols and bos.path.isdir(ap):
return self._ls(vpath, "", 0, True)

if not a:
a = [self.args.iobuf]

return open(ap, mode, *a, **ka)

def _mkdir(self, vpath: str, *a) -> None:
Expand Down
2 changes: 1 addition & 1 deletion copyparty/up2k.py
Original file line number Diff line number Diff line change
Expand Up @@ -3920,7 +3920,7 @@ def _hashlist_from_file(self, path: str, prefix: str = "") -> list[str]:
csz = up2k_chunksize(fsz)
ret = []
suffix = " MB, {}".format(path)
with open(fsenc(path), "rb", 512 * 1024) as f:
with open(fsenc(path), "rb", self.args.iobuf) as f:
if self.mth and fsz >= 1024 * 512:
tlt = self.mth.hash(f, fsz, csz, self.pp, prefix, suffix)
ret = [x[0] for x in tlt]
Expand Down
7 changes: 4 additions & 3 deletions copyparty/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2361,10 +2361,11 @@ def build_netmap(csv: str):
return NetMap(ips, cidrs, True)


def yieldfile(fn: str) -> Generator[bytes, None, None]:
with open(fsenc(fn), "rb", 512 * 1024) as f:
def yieldfile(fn: str, bufsz: int) -> Generator[bytes, None, None]:
readsz = min(bufsz, 128 * 1024)
with open(fsenc(fn), "rb", bufsz) as f:
while True:
buf = f.read(128 * 1024)
buf = f.read(readsz)
if not buf:
break

Expand Down
5 changes: 3 additions & 2 deletions scripts/sfx.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,9 @@ def u8(gen):


def yieldfile(fn):
with open(fn, "rb") as f:
for block in iter(lambda: f.read(64 * 1024), b""):
s = 64 * 1024
with open(fn, "rb", s * 4) as f:
for block in iter(lambda: f.read(s), b""):
yield block


Expand Down
1 change: 1 addition & 0 deletions tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def __init__(self, a=None, v=None, c=None, **ka0):
dbd="wal",
fk_salt="a" * 16,
idp_gsep=re.compile("[|:;+,]"),
iobuf=256 * 1024,
lang="eng",
log_badpwd=1,
logout=573,
Expand Down

0 comments on commit 2b24c50

Please sign in to comment.