From 05d2199ba27ac93eec8b9c052f41cda0df002272 Mon Sep 17 00:00:00 2001 From: Alex Titarenko Date: Sat, 14 Mar 2026 20:57:26 -0700 Subject: [PATCH 1/6] add new API for FsClient to support streamable writes --- src/clients/fs/FileSystemAccessApiFsClient.ts | 46 ++++++++++++- src/models/FileSystem.ts | 66 ++++++++++++++++++- src/models/FsClient.ts | 30 +++++++++ src/utils/git-list-pack.ts | 2 +- 4 files changed, 139 insertions(+), 5 deletions(-) diff --git a/src/clients/fs/FileSystemAccessApiFsClient.ts b/src/clients/fs/FileSystemAccessApiFsClient.ts index 8af885c..5ef1c2e 100644 --- a/src/clients/fs/FileSystemAccessApiFsClient.ts +++ b/src/clients/fs/FileSystemAccessApiFsClient.ts @@ -10,7 +10,8 @@ import { EncodingOptions, FsClient, RmOptions, - StatsLike + StatsLike, + WritableStreamHandle } from '../../' import { BasicStats } from './BasicStats' @@ -279,6 +280,44 @@ export class FileSystemAccessApiFsClient implements FsClient { throw new Error('Symlinks are not supported.') } + public async createWritableStream(path: string): Promise { + const { folderPath, leafSegment } = this.getFolderPathAndLeafSegment(path) + const targetDir = await this.getDirectoryByPath(folderPath) + + const fileHandle = await targetDir.getFileHandle(leafSegment, { create: true }) + const writable = await fileHandle.createWritable() + + return { + write: async (data: Uint8Array) => { + // FileSystemWritableFileStream.write() may write the entire underlying + // ArrayBuffer instead of just the TypedArray view when byteOffset > 0. + // This happens with Buffer.slice() which shares the backing memory. + // Create a clean copy when the view doesn't cover the full buffer. + if (data.byteOffset !== 0 || data.buffer.byteLength !== data.byteLength) { + data = new Uint8Array(data) + } + await writable.write(data) + }, + close: async () => { + await writable.close() + } + } + } + + public async readFileSlice(path: string, start: number, end: number): Promise { + const { folderPath, leafSegment } = this.getFolderPathAndLeafSegment(path) + const targetDir = await this.getDirectoryByPath(folderPath) + + const fileHandle = await this.getEntry<'file'>(targetDir, leafSegment, 'file') + if (!fileHandle) { + throw new ENOENT(path) + } + + const file = await fileHandle.getFile() + const blob = file.slice(start, end) + return new Uint8Array(await blob.arrayBuffer()) + } + /** * Return true if a entry exists, false if it doesn't exist. * Rethrows errors that aren't related to entry existance. @@ -388,13 +427,14 @@ export class FileSystemAccessApiFsClient implements FsClient { if (this.options.useSyncAccessHandle) { const accessHandle = await fileHandle.createSyncAccessHandle() - const dataArray = typeof data === 'string' ? this.textEncoder.encode(data) : data + const dataArray = typeof data === 'string' ? this.textEncoder.encode(data) : new Uint8Array(data) accessHandle.write(dataArray.buffer as ArrayBuffer, { at: 0 }) await accessHandle.flush() await accessHandle.close() } else { const writable = await fileHandle.createWritable() - await writable.write(typeof data === 'string' ? data : data.buffer as ArrayBuffer) + const writeData = typeof data === 'string' ? data : new Uint8Array(data) + await writable.write(writeData) await writable.close() } }, 'writeFile', name) diff --git a/src/models/FileSystem.ts b/src/models/FileSystem.ts index 397a7ec..fa282b8 100644 --- a/src/models/FileSystem.ts +++ b/src/models/FileSystem.ts @@ -2,7 +2,7 @@ import { Buffer } from 'buffer' import { compareStrings } from '../utils/compareStrings' import { dirname } from '../utils/dirname' -import { EncodingOptions, FsClient, RmOptions, WriteOptions } from './FsClient' +import { EncodingOptions, FsClient, RmOptions, WritableStreamHandle, WriteOptions } from './FsClient' /** * This is just a collection of helper functions really. At least that's how it started. @@ -167,4 +167,68 @@ export class FileSystem { async writelink(path: string, buffer: Buffer) { return this.fs.symlink(buffer.toString('utf8'), path) } + + /** + * Creates a writable stream for the given path, creating missing directories if needed. + * Returns null if the underlying FsClient does not support streaming writes. + */ + async createWritableStream(path: string): Promise { + if (!this.fs.createWritableStream) { + return null + } + + try { + return await this.fs.createWritableStream(path) + } catch (err) { + await this.mkdir(dirname(path)) + return await this.fs.createWritableStream(path) + } + } + + /** + * Rename a file or directory. + */ + async rename(oldPath: string, newPath: string) { + await this.fs.rename(oldPath, newPath) + } + + /** + * Read a slice of a file [start, end). Returns null if not supported by the underlying FS. + */ + async readFileSlice(path: string, start: number, end: number): Promise { + if (!this.fs.readFileSlice) { + return null + } + const data = await this.fs.readFileSlice(path, start, end) + return Buffer.from(data) + } + + /** + * Returns true if the underlying FS supports reading file slices. + */ + get supportsFileSlice(): boolean { + return !!this.fs.readFileSlice + } + + async stat(path: string) { + return this.fs.stat(path) + } + + /** + * Returns an async iterable that reads a file in fixed-size chunks. + * Only call when `supportsFileSlice` is true. + */ + async *readFileChunks(path: string, chunkSize: number): AsyncIterableIterator { + if (!this.fs.readFileSlice) { + return + } + const stat = await this.fs.stat(path) + let offset = 0 + while (offset < stat.size) { + const end = Math.min(offset + chunkSize, stat.size) + const data = await this.fs.readFileSlice(path, offset, end) + yield Buffer.from(data) + offset = end + } + } } diff --git a/src/models/FsClient.ts b/src/models/FsClient.ts index b90d5eb..8bdaa3f 100644 --- a/src/models/FsClient.ts +++ b/src/models/FsClient.ts @@ -86,6 +86,18 @@ export type StatsLike = Stats & { isSymbolicLink(): boolean } +/** + * A handle for streaming writes to a file. + * Returned by {@link FsClient.createWritableStream} when supported. + * @group FsClient + */ +export type WritableStreamHandle = { + /** Write a chunk of data to the stream. */ + write(data: Uint8Array): Promise + /** Close the stream, flushing all buffered data. */ + close(): Promise +} + /** * An interface that abstracts file system operations. * @@ -198,6 +210,24 @@ export interface FsClient { * @throws {@link API.ENOENT} */ symlink(target: string, path: string): Promise + + /** + * Creates a writable stream for the given file path. + * This enables streaming writes for large files without buffering the entire content in memory. + * + * This method is optional. If not implemented, callers should fall back to buffering + * and using {@link writeFile}. + */ + createWritableStream?(path: string): Promise + + /** + * Reads a slice of a file from the given byte range [start, end). + * This enables reading portions of large files without loading them entirely into memory. + * + * This method is optional. If not implemented, callers should fall back to + * reading the entire file with {@link readFile}. + */ + readFileSlice?(path: string, start: number, end: number): Promise } /** diff --git a/src/utils/git-list-pack.ts b/src/utils/git-list-pack.ts index 0c6b8ae..8dda4a9 100644 --- a/src/utils/git-list-pack.ts +++ b/src/utils/git-list-pack.ts @@ -19,7 +19,7 @@ type Data = { } /** @internal */ -export async function listpack(stream: Buffer[], onData: (data: Data) => Promise) { +export async function listpack(stream: Uint8Array[] | AsyncIterableIterator, onData: (data: Data) => Promise) { const reader = new StreamReader(stream) const PACKBuff = (await reader.read(4))! const PACK = PACKBuff.toString('utf8') From 474f28a45607de05737f3ddccb9b20f1cb75f4d1 Mon Sep 17 00:00:00 2001 From: Alex Titarenko Date: Sat, 14 Mar 2026 23:55:33 -0700 Subject: [PATCH 2/6] write packfile directly into file, without collecting data into memory first --- src/commands/fetch.ts | 127 ++++++++++++++++++++++---- src/models/GitPackIndex.ts | 147 ++++++++++++++++++++++++++++++- src/utils/writePackfileStream.ts | 59 +++++++++++++ 3 files changed, 315 insertions(+), 18 deletions(-) create mode 100644 src/utils/writePackfileStream.ts diff --git a/src/commands/fetch.ts b/src/commands/fetch.ts index 0000fae..fca7e2f 100644 --- a/src/commands/fetch.ts +++ b/src/commands/fetch.ts @@ -11,6 +11,7 @@ import { import { Buffer } from 'buffer' import { Cache } from '../models/Cache' import { FileSystem } from '../models/FileSystem' +import { WritableStreamHandle } from '../models/FsClient' import { GitCommit } from '../models/GitCommit' import { GitConfigManager } from '../managers/GitConfigManager' import { GitPackIndex } from '../models/GitPackIndex' @@ -23,6 +24,7 @@ import { _currentBranch } from '../commands/currentBranch' import { abbreviateRef } from '../utils/abbreviateRef' import { collect } from '../utils/collect' import { emptyPackfile } from '../utils/emptyPackfile' +import { writePackfileStream } from '../utils/writePackfileStream' import { filterCapabilities } from '../utils/filterCapabilities' import { forAwait } from '../utils/forAwait' import { getGitClientAgent } from '../utils/pkg' @@ -366,9 +368,6 @@ export async function _fetch({ }) } - const packfile = Buffer.from(await collect(response.packfile)) - const packfileSha = packfile.slice(-20).toString('hex') - const res: FetchResult = { defaultBranch: HEAD ?? null, fetchHead: FETCH_HEAD.oid, @@ -383,24 +382,120 @@ export async function _fetch({ res.pruned = pruned } - // This is a quick fix for the empty .git/objects/pack/pack-.pack file error, - // which due to the way `git-list-pack` works causes the program to hang when it tries to read it. - // TODO: Longer term, we should actually: - // a) NOT concatenate the entire packfile into memory (line 78), - // b) compute the SHA of the stream except for the last 20 bytes, using the same library used in push.ts, and - // c) compare the computed SHA with the last 20 bytes of the stream before saving to disk, and throwing a "packfile got corrupted during download" error if the SHA doesn't match. - if (packfileSha !== '' && !emptyPackfile(packfile)) { - res.packfile = `objects/pack/pack-${packfileSha}.pack` - const fullpath = join(gitdir, res.packfile) - await fs.write(fullpath, packfile) - const getExternalRefDelta = (oid: string) => readObject({ fs, cache, gitdir, oid }) + const tempPackPath = join(gitdir, 'objects/pack/pack-temp.pack') + const writableStream = await fs.createWritableStream(tempPackPath) + + if (writableStream) { + res.packfile = await processPackfileStreaming({ + fs, cache, gitdir, writableStream, tempPackPath, + packfile: response.packfile, onProgress, + }) + } else { + res.packfile = await processPackfileInMemory({ + fs, cache, gitdir, + packfile: response.packfile, onProgress, + }) + } + + return res +} + +/** + * Streaming path: write packfile chunks directly to FS without accumulating in memory. + * Returns the packfile path (e.g. `objects/pack/pack-.pack`) or undefined if empty. + */ +async function processPackfileStreaming({ + fs, cache, gitdir, writableStream, tempPackPath, packfile, onProgress, +}: { + fs: FileSystem + cache: Cache + gitdir: string + writableStream: WritableStreamHandle + tempPackPath: string + packfile: Uint8Array[] | AsyncIterableIterator + onProgress?: ProgressCallback +}): Promise { + const { packfileSha, isEmpty, totalSize } = await writePackfileStream(packfile, writableStream) + await writableStream.close() + + if (packfileSha === '' || isEmpty) { + await fs.rm(tempPackPath) + return undefined + } + + const packfilePath = `objects/pack/pack-${packfileSha}.pack` + const fullpath = join(gitdir, packfilePath) + const getExternalRefDelta = (oid: string) => readObject({ fs, cache, gitdir, oid }) + + if (fs.supportsFileSlice) { + // File-backed pack index: never loads entire packfile into memory. + const CHUNK_SIZE = 1024 * 1024 + const idx = await GitPackIndex.fromPackFile({ + readFileSlice: (start: number, end: number) => fs.readFileSlice(tempPackPath, start, end) as Promise, + readFileChunks: () => fs.readFileChunks(tempPackPath, CHUNK_SIZE), + packfileSha, + totalSize, + getExternalRefDelta, + onProgress, + }) + await fs.write(fullpath.replace(/\.pack$/, '.idx'), await idx.toBuffer()) + // Stream-copy pack to final path to avoid loading entire file (OPFS rename reads whole file). + const finalWritable = await fs.createWritableStream(fullpath) + if (finalWritable) { + for await (const chunk of fs.readFileChunks(tempPackPath, CHUNK_SIZE)) { + await finalWritable.write(chunk) + } + await finalWritable.close() + await fs.rm(tempPackPath) + } else { + await fs.rename(tempPackPath, fullpath) + } + } else { + // readFileSlice not available: read entire file back for index creation (1x memory). + const packfileData = (await fs.read(tempPackPath)) as Buffer const idx = await GitPackIndex.fromPack({ - pack: packfile, + pack: packfileData, getExternalRefDelta, onProgress, }) + await fs.write(fullpath, packfileData) await fs.write(fullpath.replace(/\.pack$/, '.idx'), await idx.toBuffer()) + await fs.rm(tempPackPath) } - return res + return packfilePath +} + +/** + * Fallback path: accumulate the entire packfile in memory (existing behavior). + * Returns the packfile path or undefined if empty. + */ +async function processPackfileInMemory({ + fs, cache, gitdir, packfile, onProgress, +}: { + fs: FileSystem + cache: Cache + gitdir: string + packfile: Uint8Array[] | AsyncIterableIterator + onProgress?: ProgressCallback +}): Promise { + const pack = Buffer.from(await collect(packfile)) + const packfileSha = pack.slice(-20).toString('hex') + + if (packfileSha === '' || emptyPackfile(pack)) { + return undefined + } + + const packfilePath = `objects/pack/pack-${packfileSha}.pack` + const fullpath = join(gitdir, packfilePath) + await fs.write(fullpath, pack) + const getExternalRefDelta = (oid: string) => readObject({ fs, cache, gitdir, oid }) + const idx = await GitPackIndex.fromPack({ + pack, + getExternalRefDelta, + onProgress, + }) + await fs.write(fullpath.replace(/\.pack$/, '.idx'), await idx.toBuffer()) + + return packfilePath } diff --git a/src/models/GitPackIndex.ts b/src/models/GitPackIndex.ts index 29727e3..7a89197 100644 --- a/src/models/GitPackIndex.ts +++ b/src/models/GitPackIndex.ts @@ -67,6 +67,8 @@ export class GitPackIndex { private readDepth: number = 0 private externalReadDepth: number = 0 private offsetCache: {[key: number]: { type: string, object: Buffer }} + private packSliceReader?: (start: number, end: number) => Promise + private objectEndOffsets?: Map constructor(stuff: GitPackIndexParams) { this.pack = stuff.pack @@ -79,6 +81,18 @@ export class GitPackIndex { this.offsetCache = {} } + /** + * Enable file-backed reading so that readSlice reads from disk + * instead of requiring the entire packfile in memory. + */ + enableFileBackedReads( + sliceReader: (start: number, end: number) => Promise, + endOffsets: Map + ) { + this.packSliceReader = sliceReader + this.objectEndOffsets = endOffsets + } + static async fromIdx({ idx, getExternalRefDelta }: { idx: Buffer, getExternalRefDelta?: GetExternalRefDelta }) { const reader = new BufferCursor(idx) const magic = reader.slice(4).toString('hex') @@ -246,6 +260,127 @@ export class GitPackIndex { return p } + /** + * Build a pack index from an on-disk packfile using streaming reads. + * Unlike `fromPack`, this never loads the entire packfile into memory. + * Peak memory is proportional to the largest individual object, not the file size. + */ + static async fromPackFile( + { readFileSlice, readFileChunks, packfileSha, totalSize, getExternalRefDelta, onProgress }: + { + readFileSlice: (start: number, end: number) => Promise, + readFileChunks: () => AsyncIterableIterator, + packfileSha: string, + totalSize: number, + getExternalRefDelta?: GetExternalRefDelta, + onProgress?: ProgressCallback + }) { + const listpackTypes: {[key: number]: string} = { + 1: 'commit', + 2: 'tree', + 3: 'blob', + 4: 'tag', + 6: 'ofs-delta', + 7: 'ref-delta', + } + const offsetToObject: { + [key: number]: { type: string, offset: number, oid?: string, end?: number, crc?: number } + } = {} + + const hashes: string[] = [] + const crcs: {[key: string]: number} = {} + const offsets = new Map() + let totalObjectCount: number | null = null + let lastPercent: number | null = null + + // Pass 1: stream file in chunks to discover object offsets + await listpack(readFileChunks(), async ({ data, type: typeNum, reference, offset, num }) => { + if (totalObjectCount === null) totalObjectCount = num + const percent = Math.floor( + ((totalObjectCount - num) * 100) / totalObjectCount + ) + if (percent !== lastPercent) { + if (onProgress) { + await onProgress({ + phase: 'Receiving objects', + loaded: totalObjectCount - num, + total: totalObjectCount, + }) + } + } + lastPercent = percent + const type = listpackTypes[typeNum] + + if (['commit', 'tree', 'blob', 'tag'].includes(type)) { + offsetToObject[offset] = { type, offset } + } else if (type === 'ofs-delta') { + offsetToObject[offset] = { type, offset } + } else if (type === 'ref-delta') { + offsetToObject[offset] = { type, offset } + } + }) + + // Compute CRCs by reading each object's slice from file + const offsetArray = Object.keys(offsetToObject).map(Number) + const objectEndOffsets = new Map() + for (const [i, start] of offsetArray.entries()) { + const end = i + 1 === offsetArray.length ? totalSize - 20 : offsetArray[i + 1] + const slice = await readFileSlice(start, end) + const crc = crc32.buf(slice) >>> 0 + offsetToObject[start].end = end + offsetToObject[start].crc = crc + objectEndOffsets.set(start, end) + } + + // Create index with file-backed slice reader for Pass 2 + const p = new GitPackIndex({ + pack: null, + packfileSha, + crcs, + hashes, + offsets, + getExternalRefDelta, + }) + p.enableFileBackedReads(readFileSlice, objectEndOffsets) + + // Pass 2: resolve deltas and compute OIDs using file-backed reads + lastPercent = null + let count = 0 + for (let offsetStr in offsetToObject) { + const offset = Number(offsetStr) + const percent = Math.floor((count * 100) / totalObjectCount!) + if (percent !== lastPercent) { + if (onProgress) { + await onProgress({ + phase: 'Resolving deltas', + loaded: count, + total: totalObjectCount!, + }) + } + } + count++ + lastPercent = percent + + const o = offsetToObject[offset] + if (o.oid) continue + try { + p.readDepth = 0 + p.externalReadDepth = 0 + const { type, object } = await p.readSlice({ start: offset }) + const oid = await shasum(GitObject.wrap({ type, object })) + o.oid = oid + hashes.push(oid) + offsets.set(oid, offset) + crcs[oid] = o.crc! + } catch (err) { + continue + } + } + + hashes.sort() + return p + } + async toBuffer() { const buffers: Buffer[] = [] const write = (str: string, encoding: BufferEncoding) => { @@ -325,12 +460,20 @@ export class GitPackIndex { 0b1100000: 'ofs_delta', 0b1110000: 'ref_delta', } - if (!this.pack) { + let raw: Buffer + if (this.packSliceReader && this.objectEndOffsets) { + const end = this.objectEndOffsets.get(start) + if (end === undefined) { + throw new InternalError(`Unknown pack object boundary for offset ${start}`) + } + raw = await this.packSliceReader(start, end) + } else if (this.pack) { + raw = (await this.pack).slice(start) + } else { throw new InternalError( 'Tried to read from a GitPackIndex with no packfile loaded into memory' ) } - const raw = (await this.pack).slice(start) const reader = new BufferCursor(raw) const byte = reader.readUInt8() // Object type is encoded in bits 654 diff --git a/src/utils/writePackfileStream.ts b/src/utils/writePackfileStream.ts new file mode 100644 index 0000000..20844a8 --- /dev/null +++ b/src/utils/writePackfileStream.ts @@ -0,0 +1,59 @@ +import { Buffer } from 'buffer' +import { WritableStreamHandle } from '../models/FsClient' + +type WritePackfileStreamResult = { + /** The packfile SHA extracted from the last 20 bytes of the stream. */ + packfileSha: string + /** Whether the packfile is empty (PACK v2 with 0 objects). */ + isEmpty: boolean + /** Total size of the packfile in bytes. */ + totalSize: number +} + +const EMPTY_PACKFILE_HEADER = '5041434b' + '00000002' + '00000000' + +/** + * Streams an async iterable to a writable stream while capturing + * the packfile SHA (last 20 bytes) and detecting empty packfiles + * (by inspecting the first 12 bytes). + * + * NOTE: The caller is responsible for closing the writable stream. + * + * @internal + */ +export async function writePackfileStream( + iterable: Uint8Array[] | AsyncIterableIterator, + writable: WritableStreamHandle +): Promise { + const headerBytes = Buffer.alloc(12) + let headerBytesCollected = 0 + let totalSize = 0 + + // Rolling buffer to capture the last 20 bytes (packfile SHA) + let tailBuffer = Buffer.alloc(0) + + for await (const chunk of iterable) { + // Capture first 12 bytes for empty packfile detection + if (headerBytesCollected < 12) { + const needed = 12 - headerBytesCollected + const toCopy = Math.min(needed, chunk.byteLength) + Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength).copy(headerBytes, headerBytesCollected, 0, toCopy) + headerBytesCollected += toCopy + } + + await writable.write(chunk) + totalSize += chunk.byteLength + + // Maintain a rolling buffer of the last 20 bytes + if (tailBuffer.byteLength + chunk.byteLength <= 20) { + tailBuffer = Buffer.concat([tailBuffer, Buffer.from(chunk)]) + } else { + tailBuffer = Buffer.concat([tailBuffer, Buffer.from(chunk)]).slice(-20) + } + } + + const packfileSha = tailBuffer.toString('hex') + const isEmpty = totalSize <= 32 || headerBytes.slice(0, 12).toString('hex') === EMPTY_PACKFILE_HEADER + + return { packfileSha, isEmpty, totalSize } +} From 41520929969e2dc244bf2c50c78ab9dc908ee59d Mon Sep 17 00:00:00 2001 From: Alex Titarenko Date: Sun, 15 Mar 2026 11:59:28 -0700 Subject: [PATCH 3/6] optimize code for disk reading for packfile instead of loading everything into memory --- src/commands/checkout.ts | 125 ++++++++++++++++---------------- src/storage/readObjectPacked.ts | 23 +++++- 2 files changed, 84 insertions(+), 64 deletions(-) diff --git a/src/commands/checkout.ts b/src/commands/checkout.ts index 3aa8cf5..3b781b5 100644 --- a/src/commands/checkout.ts +++ b/src/commands/checkout.ts @@ -219,71 +219,70 @@ export async function _checkout({ ) await GitIndexManager.acquire({ fs, gitdir, cache }, async function(index) { - await Promise.all( - ops - .filter( - ([method]) => - method === 'create' || - method === 'create-index' || - method === 'update' || - method === 'mkdir-index' - ) - .map(async function([method, fullpath, oid, mode, chmod]) { - const modeNum = Number(mode) - const filepath = `${dir}/${fullpath}` - try { - if (method !== 'create-index' && method !== 'mkdir-index') { - const { object } = await readObject({ fs, cache, gitdir, oid }) - if (chmod) { - // Note: the mode option of fs.write only works when creating files, - // not updating them. Since the `fs` plugin doesn't expose `chmod` this - // is our only option. - await fs.rm(filepath) - } - if (modeNum === 0o100644) { - // regular file - await fs.write(filepath, object) - } else if (modeNum === 0o100755) { - // executable file - await fs.write(filepath, object, { mode: 0o777 }) - } else if (modeNum === 0o120000) { - // symlink - await fs.writelink(filepath, object) - } else { - throw new InternalError( - `Invalid mode 0o${modeNum.toString(8)} detected in blob ${oid}` - ) - } - } - - const stats = (await fs.lstat(filepath))! - // We can't trust the executable bit returned by lstat on Windows, - // so we need to preserve this value from the TREE. - // TODO: Figure out how git handles this internally. - if (modeNum === 0o100755) { - stats.mode = 0o755 - } - // Submodules are present in the git index but use a unique mode different from trees - if (method === 'mkdir-index') { - stats.mode = 0o160000 - } - index.insert({ - filepath: fullpath, - stats, - oid, - }) - if (onProgress) { - await onProgress({ - phase: 'Updating workdir', - loaded: ++count, - total, - }) - } - } catch (e) { - console.log(e) + const writeOps = ops.filter( + ([method]) => + method === 'create' || + method === 'create-index' || + method === 'update' || + method === 'mkdir-index' + ) + // Process files sequentially to avoid holding all decompressed blobs in memory at once. + // With parallel Promise.all, a 1GB pack with large blobs would OOM on memory-constrained devices. + for (const [method, fullpath, oid, mode, chmod] of writeOps) { + const modeNum = Number(mode) + const filepath = `${dir}/${fullpath}` + try { + if (method !== 'create-index' && method !== 'mkdir-index') { + const { object } = await readObject({ fs, cache, gitdir, oid }) + if (chmod) { + // Note: the mode option of fs.write only works when creating files, + // not updating them. Since the `fs` plugin doesn't expose `chmod` this + // is our only option. + await fs.rm(filepath) } + if (modeNum === 0o100644) { + // regular file + await fs.write(filepath, object) + } else if (modeNum === 0o100755) { + // executable file + await fs.write(filepath, object, { mode: 0o777 }) + } else if (modeNum === 0o120000) { + // symlink + await fs.writelink(filepath, object) + } else { + throw new InternalError( + `Invalid mode 0o${modeNum.toString(8)} detected in blob ${oid}` + ) + } + } + + const stats = (await fs.lstat(filepath))! + // We can't trust the executable bit returned by lstat on Windows, + // so we need to preserve this value from the TREE. + // TODO: Figure out how git handles this internally. + if (modeNum === 0o100755) { + stats.mode = 0o755 + } + // Submodules are present in the git index but use a unique mode different from trees + if (method === 'mkdir-index') { + stats.mode = 0o160000 + } + index.insert({ + filepath: fullpath, + stats, + oid, }) - ) + if (onProgress) { + await onProgress({ + phase: 'Updating workdir', + loaded: ++count, + total, + }) + } + } catch (e) { + console.log(e) + } + } }) } diff --git a/src/storage/readObjectPacked.ts b/src/storage/readObjectPacked.ts index 8914588..b47b9e5 100644 --- a/src/storage/readObjectPacked.ts +++ b/src/storage/readObjectPacked.ts @@ -1,3 +1,5 @@ +import { Buffer } from 'buffer' + import { FileSystem } from '../models/FileSystem' import { Cache } from '../models/Cache' import { readPackIndex } from '../storage/readPackIndex' @@ -33,7 +35,26 @@ export async function readObjectPacked({ // Get the resolved git object from the packfile if (!p.pack) { const packFile = indexFile.replace(/idx$/, 'pack') - p.pack = fs.read(packFile) as Promise + + if (fs.supportsFileSlice) { + // File-backed reading: read object slices from disk instead of loading entire packfile. + // This is critical for large packfiles on memory-constrained devices (e.g. iOS). + const endOffsets = new Map() + const sortedOffsets = [...p.offsets.values()].sort((a, b) => a - b) + // We need to know the packfile size for the last object's end offset. + const stat = await fs.stat(packFile) + for (let i = 0; i < sortedOffsets.length; i++) { + const start = sortedOffsets[i] + const end = i + 1 < sortedOffsets.length ? sortedOffsets[i + 1] : stat.size - 20 + endOffsets.set(start, end) + } + p.enableFileBackedReads( + (start: number, end: number) => fs.readFileSlice(packFile, start, end) as Promise, + endOffsets + ) + } else { + p.pack = fs.read(packFile) as Promise + } } const result = await p.read({ oid }) From 33d4c128a427dc6f81706b5b6d808f31fc6426ed Mon Sep 17 00:00:00 2001 From: Alex Titarenko Date: Sun, 15 Mar 2026 12:16:06 -0700 Subject: [PATCH 4/6] check out files in parallel in batches --- src/commands/checkout.ts | 108 ++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/src/commands/checkout.ts b/src/commands/checkout.ts index 3b781b5..7b11306 100644 --- a/src/commands/checkout.ts +++ b/src/commands/checkout.ts @@ -226,62 +226,66 @@ export async function _checkout({ method === 'update' || method === 'mkdir-index' ) - // Process files sequentially to avoid holding all decompressed blobs in memory at once. - // With parallel Promise.all, a 1GB pack with large blobs would OOM on memory-constrained devices. - for (const [method, fullpath, oid, mode, chmod] of writeOps) { - const modeNum = Number(mode) - const filepath = `${dir}/${fullpath}` - try { - if (method !== 'create-index' && method !== 'mkdir-index') { - const { object } = await readObject({ fs, cache, gitdir, oid }) - if (chmod) { - // Note: the mode option of fs.write only works when creating files, - // not updating them. Since the `fs` plugin doesn't expose `chmod` this - // is our only option. - await fs.rm(filepath) - } - if (modeNum === 0o100644) { - // regular file - await fs.write(filepath, object) - } else if (modeNum === 0o100755) { - // executable file - await fs.write(filepath, object, { mode: 0o777 }) - } else if (modeNum === 0o120000) { - // symlink - await fs.writelink(filepath, object) - } else { - throw new InternalError( - `Invalid mode 0o${modeNum.toString(8)} detected in blob ${oid}` - ) + // Process files in small batches to balance I/O concurrency with memory usage. + // Full Promise.all would OOM on large packfiles; purely sequential loses I/O overlap. + const BATCH_SIZE = 5 + for (let i = 0; i < writeOps.length; i += BATCH_SIZE) { + const batch = writeOps.slice(i, i + BATCH_SIZE) + await Promise.all(batch.map(async ([method, fullpath, oid, mode, chmod]) => { + const modeNum = Number(mode) + const filepath = `${dir}/${fullpath}` + try { + if (method !== 'create-index' && method !== 'mkdir-index') { + const { object } = await readObject({ fs, cache, gitdir, oid }) + if (chmod) { + // Note: the mode option of fs.write only works when creating files, + // not updating them. Since the `fs` plugin doesn't expose `chmod` this + // is our only option. + await fs.rm(filepath) + } + if (modeNum === 0o100644) { + // regular file + await fs.write(filepath, object) + } else if (modeNum === 0o100755) { + // executable file + await fs.write(filepath, object, { mode: 0o777 }) + } else if (modeNum === 0o120000) { + // symlink + await fs.writelink(filepath, object) + } else { + throw new InternalError( + `Invalid mode 0o${modeNum.toString(8)} detected in blob ${oid}` + ) + } } - } - const stats = (await fs.lstat(filepath))! - // We can't trust the executable bit returned by lstat on Windows, - // so we need to preserve this value from the TREE. - // TODO: Figure out how git handles this internally. - if (modeNum === 0o100755) { - stats.mode = 0o755 - } - // Submodules are present in the git index but use a unique mode different from trees - if (method === 'mkdir-index') { - stats.mode = 0o160000 - } - index.insert({ - filepath: fullpath, - stats, - oid, - }) - if (onProgress) { - await onProgress({ - phase: 'Updating workdir', - loaded: ++count, - total, + const stats = (await fs.lstat(filepath))! + // We can't trust the executable bit returned by lstat on Windows, + // so we need to preserve this value from the TREE. + // TODO: Figure out how git handles this internally. + if (modeNum === 0o100755) { + stats.mode = 0o755 + } + // Submodules are present in the git index but use a unique mode different from trees + if (method === 'mkdir-index') { + stats.mode = 0o160000 + } + index.insert({ + filepath: fullpath, + stats, + oid, }) + if (onProgress) { + await onProgress({ + phase: 'Updating workdir', + loaded: ++count, + total, + }) + } + } catch (e) { + console.log(e) } - } catch (e) { - console.log(e) - } + })) } }) } From 9b45b16f032d7aeac8d0c5a76090515b66f2bb30 Mon Sep 17 00:00:00 2001 From: Alex Titarenko Date: Sun, 15 Mar 2026 12:32:28 -0700 Subject: [PATCH 5/6] increase batch size on checkout to 10 --- src/commands/checkout.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/checkout.ts b/src/commands/checkout.ts index 7b11306..68bc9d2 100644 --- a/src/commands/checkout.ts +++ b/src/commands/checkout.ts @@ -228,7 +228,7 @@ export async function _checkout({ ) // Process files in small batches to balance I/O concurrency with memory usage. // Full Promise.all would OOM on large packfiles; purely sequential loses I/O overlap. - const BATCH_SIZE = 5 + const BATCH_SIZE = 10 for (let i = 0; i < writeOps.length; i += BATCH_SIZE) { const batch = writeOps.slice(i, i + BATCH_SIZE) await Promise.all(batch.map(async ([method, fullpath, oid, mode, chmod]) => { From 8d500fc18e90da1f5e6e08d76d2613b4d08b1af3 Mon Sep 17 00:00:00 2001 From: Alex Titarenko Date: Sun, 15 Mar 2026 13:20:41 -0700 Subject: [PATCH 6/6] improve FileSystemAccessApiFsClient.rename function --- src/clients/fs/FileSystemAccessApiFsClient.ts | 38 +++++++++++++++++-- src/commands/fetch.ts | 12 +----- src/types/FileSystemAccessApi.d.ts | 2 + 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/clients/fs/FileSystemAccessApiFsClient.ts b/src/clients/fs/FileSystemAccessApiFsClient.ts index 5ef1c2e..9baa942 100644 --- a/src/clients/fs/FileSystemAccessApiFsClient.ts +++ b/src/clients/fs/FileSystemAccessApiFsClient.ts @@ -250,9 +250,7 @@ export class FileSystemAccessApiFsClient implements FsClient { const oldFilepathStat = await this.stat(oldPath) if (oldFilepathStat.isFile()) { - const data = await this.readFile(oldPath) - await this.writeFile(newPath, data) - await this.rm(oldPath) + await this.renameFile(oldPath, newPath) } else if (oldFilepathStat.isDirectory()) { await this.mkdir(newPath) const sourceFolder = await this.getDirectoryByPath(oldPath) @@ -264,6 +262,40 @@ export class FileSystemAccessApiFsClient implements FsClient { } } + private async renameFile(oldPath: string, newPath: string): Promise { + const { folderPath: oldFolder, leafSegment: oldName } = this.getFolderPathAndLeafSegment(oldPath) + const { folderPath: newFolder, leafSegment: newName } = this.getFolderPathAndLeafSegment(newPath) + + const oldDir = await this.getDirectoryByPath(oldFolder) + const fileHandle = await this.getEntry<'file'>(oldDir, oldName, 'file') + if (!fileHandle) { + throw new ENOENT(oldPath) + } + + // Strategy 1: Native move() — zero-copy rename, supported in Chrome and Safari OPFS. + // Always pass (directory, newName) form — Safari doesn't support the move(newName) shorthand. + if (typeof fileHandle.move === 'function') { + const newDir = oldFolder === newFolder ? oldDir : await this.getDirectoryByPath(newFolder) + await fileHandle.move(newDir, newName) + return + } + + // Strategy 2: Streaming copy — read in chunks, write via stream. Never loads entire file. + const CHUNK_SIZE = 1024 * 1024 + const file = await fileHandle.getFile() + const writable = await this.createWritableStream(newPath) + let offset = 0 + while (offset < file.size) { + const end = Math.min(offset + CHUNK_SIZE, file.size) + const blob = file.slice(offset, end) + const chunk = new Uint8Array(await blob.arrayBuffer()) + await writable.write(chunk) + offset = end + } + await writable.close() + await this.rm(oldPath) + } + /** * Symlinks are not supported in the current implementation. * @throws Error: symlinks are not supported. diff --git a/src/commands/fetch.ts b/src/commands/fetch.ts index fca7e2f..d2befa2 100644 --- a/src/commands/fetch.ts +++ b/src/commands/fetch.ts @@ -439,17 +439,7 @@ async function processPackfileStreaming({ onProgress, }) await fs.write(fullpath.replace(/\.pack$/, '.idx'), await idx.toBuffer()) - // Stream-copy pack to final path to avoid loading entire file (OPFS rename reads whole file). - const finalWritable = await fs.createWritableStream(fullpath) - if (finalWritable) { - for await (const chunk of fs.readFileChunks(tempPackPath, CHUNK_SIZE)) { - await finalWritable.write(chunk) - } - await finalWritable.close() - await fs.rm(tempPackPath) - } else { - await fs.rename(tempPackPath, fullpath) - } + await fs.rename(tempPackPath, fullpath) } else { // readFileSlice not available: read entire file back for index creation (1x memory). const packfileData = (await fs.read(tempPackPath)) as Buffer diff --git a/src/types/FileSystemAccessApi.d.ts b/src/types/FileSystemAccessApi.d.ts index 9f9ae18..6390fc6 100644 --- a/src/types/FileSystemAccessApi.d.ts +++ b/src/types/FileSystemAccessApi.d.ts @@ -35,4 +35,6 @@ declare class FileSystemWritableFileStream extends WritableStream { declare interface FileSystemFileHandle extends FileSystemHandle { createWritable(options?: FileSystemCreateWritableOptions): Promise createSyncAccessHandle(): Promise + /** Rename or move a file. Not available in all browsers — feature-detect before calling. */ + move?(target: string | FileSystemDirectoryHandle, newName?: string): Promise }