Skip to content

Commit c09921b

Browse files
committed
Ext2FS: Don't hog FS lock while reading/writing inodes
There are two locks in the Ext2FS implementation: * The FS lock (Ext2FS::m_lock) This governs access to the superblock, block group descriptors, and the block & inode bitmap blocks. It's held while allocating or freeing blocks/inodes. * The inode lock (Ext2FSInode::m_lock) This governs access to the inode metadata, including the block list, and to the content data as well. It's held while doing basically anything with the inode. Once an on-disk block/inode is allocated, it logically belongs to the in-memory Inode object, so there's no need for the FS lock to be taken while manipulating them, the inode lock is all you need. This dramatically reduces the impact of disk I/O on path resolution and various other things that look at individual inodes.
1 parent c7c6372 commit c09921b

File tree

2 files changed

+46
-50
lines changed

2 files changed

+46
-50
lines changed

Kernel/FileSystem/Ext2FileSystem.cpp

Lines changed: 45 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -230,68 +230,68 @@ Ext2FS::BlockListShape Ext2FS::compute_block_list_shape(unsigned blocks) const
230230
return shape;
231231
}
232232

233-
KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e2inode, const Vector<BlockIndex>& blocks)
233+
KResult Ext2FSInode::flush_block_list()
234234
{
235235
LOCKER(m_lock);
236236

237-
if (blocks.is_empty()) {
238-
e2inode.i_blocks = 0;
239-
memset(e2inode.i_block, 0, sizeof(e2inode.i_block));
240-
write_ext2_inode(inode_index, e2inode);
237+
if (m_block_list.is_empty()) {
238+
m_raw_inode.i_blocks = 0;
239+
memset(m_raw_inode.i_block, 0, sizeof(m_raw_inode.i_block));
240+
fs().write_ext2_inode(index(), m_raw_inode);
241241
return KSuccess;
242242
}
243243

244244
// NOTE: There is a mismatch between i_blocks and blocks.size() since i_blocks includes meta blocks and blocks.size() does not.
245-
auto old_block_count = ceil_div(static_cast<size_t>(e2inode.i_size), block_size());
245+
auto old_block_count = ceil_div(static_cast<size_t>(m_raw_inode.i_size), fs().block_size());
246246

247-
auto old_shape = compute_block_list_shape(old_block_count);
248-
auto new_shape = compute_block_list_shape(blocks.size());
247+
auto old_shape = fs().compute_block_list_shape(old_block_count);
248+
auto new_shape = fs().compute_block_list_shape(m_block_list.size());
249249

250-
Vector<BlockIndex> new_meta_blocks;
250+
Vector<Ext2FS::BlockIndex> new_meta_blocks;
251251
if (new_shape.meta_blocks > old_shape.meta_blocks) {
252-
auto blocks_or_error = allocate_blocks(group_index_from_inode(inode_index), new_shape.meta_blocks - old_shape.meta_blocks);
252+
auto blocks_or_error = fs().allocate_blocks(fs().group_index_from_inode(index()), new_shape.meta_blocks - old_shape.meta_blocks);
253253
if (blocks_or_error.is_error())
254254
return blocks_or_error.error();
255255
new_meta_blocks = blocks_or_error.release_value();
256256
}
257257

258-
e2inode.i_blocks = (blocks.size() + new_shape.meta_blocks) * (block_size() / 512);
258+
m_raw_inode.i_blocks = (m_block_list.size() + new_shape.meta_blocks) * (fs().block_size() / 512);
259259

260260
bool inode_dirty = false;
261261

262262
unsigned output_block_index = 0;
263-
unsigned remaining_blocks = blocks.size();
263+
unsigned remaining_blocks = m_block_list.size();
264264
for (unsigned i = 0; i < new_shape.direct_blocks; ++i) {
265-
if (e2inode.i_block[i] != blocks[output_block_index])
265+
if (m_raw_inode.i_block[i] != m_block_list[output_block_index])
266266
inode_dirty = true;
267-
e2inode.i_block[i] = blocks[output_block_index].value();
267+
m_raw_inode.i_block[i] = m_block_list[output_block_index].value();
268268
++output_block_index;
269269
--remaining_blocks;
270270
}
271271
if (inode_dirty) {
272272
if constexpr (EXT2_DEBUG) {
273-
dbgln("Ext2FS: Writing {} direct block(s) to i_block array of inode {}", min((size_t)EXT2_NDIR_BLOCKS, blocks.size()), inode_index);
274-
for (size_t i = 0; i < min((size_t)EXT2_NDIR_BLOCKS, blocks.size()); ++i)
275-
dbgln(" + {}", blocks[i]);
273+
dbgln("Ext2FS: Writing {} direct block(s) to i_block array of inode {}", min((size_t)EXT2_NDIR_BLOCKS, m_block_list.size()), index());
274+
for (size_t i = 0; i < min((size_t)EXT2_NDIR_BLOCKS, m_block_list.size()); ++i)
275+
dbgln(" + {}", m_block_list[i]);
276276
}
277-
write_ext2_inode(inode_index, e2inode);
277+
fs().write_ext2_inode(index(), m_raw_inode);
278278
inode_dirty = false;
279279
}
280280

281281
if (!remaining_blocks)
282282
return KSuccess;
283283

284-
const unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&super_block());
284+
const unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&fs().super_block());
285285

286-
bool ind_block_new = !e2inode.i_block[EXT2_IND_BLOCK];
286+
bool ind_block_new = !m_raw_inode.i_block[EXT2_IND_BLOCK];
287287
if (ind_block_new) {
288-
BlockIndex new_indirect_block = new_meta_blocks.take_last();
289-
if (e2inode.i_block[EXT2_IND_BLOCK] != new_indirect_block)
288+
Ext2FS::BlockIndex new_indirect_block = new_meta_blocks.take_last();
289+
if (m_raw_inode.i_block[EXT2_IND_BLOCK] != new_indirect_block)
290290
inode_dirty = true;
291-
e2inode.i_block[EXT2_IND_BLOCK] = new_indirect_block.value();
291+
m_raw_inode.i_block[EXT2_IND_BLOCK] = new_indirect_block.value();
292292
if (inode_dirty) {
293-
dbgln_if(EXT2_DEBUG, "Ext2FS: Adding the indirect block to i_block array of inode {}", inode_index);
294-
write_ext2_inode(inode_index, e2inode);
293+
dbgln_if(EXT2_DEBUG, "Ext2FS: Adding the indirect block to i_block array of inode {}", index());
294+
fs().write_ext2_inode(index(), m_raw_inode);
295295
inode_dirty = false;
296296
}
297297
}
@@ -301,19 +301,19 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
301301
remaining_blocks -= new_shape.indirect_blocks;
302302
output_block_index += new_shape.indirect_blocks;
303303
} else {
304-
auto block_contents = ByteBuffer::create_uninitialized(block_size());
304+
auto block_contents = ByteBuffer::create_uninitialized(fs().block_size());
305305
OutputMemoryStream stream { block_contents };
306306

307307
VERIFY(new_shape.indirect_blocks <= entries_per_block);
308308
for (unsigned i = 0; i < new_shape.indirect_blocks; ++i) {
309-
stream << blocks[output_block_index++].value();
309+
stream << m_block_list[output_block_index++].value();
310310
--remaining_blocks;
311311
}
312312

313313
stream.fill_to_end(0);
314314

315315
auto buffer = UserOrKernelBuffer::for_kernel_buffer(stream.data());
316-
auto result = write_block(e2inode.i_block[EXT2_IND_BLOCK], buffer, stream.size());
316+
auto result = fs().write_block(m_raw_inode.i_block[EXT2_IND_BLOCK], buffer, stream.size());
317317
if (result.is_error())
318318
return result;
319319
}
@@ -323,15 +323,15 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
323323

324324
bool dind_block_dirty = false;
325325

326-
bool dind_block_new = !e2inode.i_block[EXT2_DIND_BLOCK];
326+
bool dind_block_new = !m_raw_inode.i_block[EXT2_DIND_BLOCK];
327327
if (dind_block_new) {
328-
BlockIndex new_dindirect_block = new_meta_blocks.take_last();
329-
if (e2inode.i_block[EXT2_DIND_BLOCK] != new_dindirect_block)
328+
Ext2FS::BlockIndex new_dindirect_block = new_meta_blocks.take_last();
329+
if (m_raw_inode.i_block[EXT2_DIND_BLOCK] != new_dindirect_block)
330330
inode_dirty = true;
331-
e2inode.i_block[EXT2_DIND_BLOCK] = new_dindirect_block.value();
331+
m_raw_inode.i_block[EXT2_DIND_BLOCK] = new_dindirect_block.value();
332332
if (inode_dirty) {
333-
dbgln_if(EXT2_DEBUG, "Ext2FS: Adding the doubly-indirect block to i_block array of inode {}", inode_index);
334-
write_ext2_inode(inode_index, e2inode);
333+
dbgln_if(EXT2_DEBUG, "Ext2FS: Adding the doubly-indirect block to i_block array of inode {}", index());
334+
fs().write_ext2_inode(index(), m_raw_inode);
335335
inode_dirty = false;
336336
}
337337
}
@@ -343,13 +343,13 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
343343
} else {
344344
unsigned indirect_block_count = divide_rounded_up(new_shape.doubly_indirect_blocks, entries_per_block);
345345

346-
auto dind_block_contents = ByteBuffer::create_uninitialized(block_size());
346+
auto dind_block_contents = ByteBuffer::create_uninitialized(fs().block_size());
347347
if (dind_block_new) {
348348
dind_block_contents.zero_fill();
349349
dind_block_dirty = true;
350350
} else {
351351
auto buffer = UserOrKernelBuffer::for_kernel_buffer(dind_block_contents.data());
352-
auto result = read_block(e2inode.i_block[EXT2_DIND_BLOCK], &buffer, block_size());
352+
auto result = fs().read_block(m_raw_inode.i_block[EXT2_DIND_BLOCK], &buffer, fs().block_size());
353353
if (result.is_error()) {
354354
dbgln("Ext2FS: write_block_list_for_inode had error: {}", result.error());
355355
return result;
@@ -361,7 +361,7 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
361361
for (unsigned i = 0; i < indirect_block_count; ++i) {
362362
bool ind_block_dirty = false;
363363

364-
BlockIndex indirect_block_index = dind_block_as_pointers[i];
364+
Ext2FS::BlockIndex indirect_block_index = dind_block_as_pointers[i];
365365

366366
bool ind_block_new = !indirect_block_index;
367367
if (ind_block_new) {
@@ -370,13 +370,13 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
370370
dind_block_dirty = true;
371371
}
372372

373-
auto ind_block_contents = ByteBuffer::create_uninitialized(block_size());
373+
auto ind_block_contents = ByteBuffer::create_uninitialized(fs().block_size());
374374
if (ind_block_new) {
375375
ind_block_contents.zero_fill();
376376
ind_block_dirty = true;
377377
} else {
378378
auto buffer = UserOrKernelBuffer::for_kernel_buffer(ind_block_contents.data());
379-
auto result = read_block(indirect_block_index, &buffer, block_size());
379+
auto result = fs().read_block(indirect_block_index, &buffer, fs().block_size());
380380
if (result.is_error()) {
381381
dbgln("Ext2FS: write_block_list_for_inode had error: {}", result.error());
382382
return result;
@@ -390,7 +390,7 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
390390

391391
VERIFY(entries_to_write <= entries_per_block);
392392
for (unsigned j = 0; j < entries_to_write; ++j) {
393-
BlockIndex output_block = blocks[output_block_index++];
393+
Ext2FS::BlockIndex output_block = m_block_list[output_block_index++];
394394
if (ind_block_as_pointers[j] != output_block) {
395395
ind_block_as_pointers[j] = output_block.value();
396396
ind_block_dirty = true;
@@ -406,7 +406,7 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
406406

407407
if (ind_block_dirty) {
408408
auto buffer = UserOrKernelBuffer::for_kernel_buffer(ind_block_contents.data());
409-
int err = write_block(indirect_block_index, buffer, block_size());
409+
int err = fs().write_block(indirect_block_index, buffer, fs().block_size());
410410
VERIFY(err >= 0);
411411
}
412412
}
@@ -419,7 +419,7 @@ KResult Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e
419419

420420
if (dind_block_dirty) {
421421
auto buffer = UserOrKernelBuffer::for_kernel_buffer(dind_block_contents.data());
422-
int err = write_block(e2inode.i_block[EXT2_DIND_BLOCK], buffer, block_size());
422+
int err = fs().write_block(m_raw_inode.i_block[EXT2_DIND_BLOCK], buffer, fs().block_size());
423423
VERIFY(err >= 0);
424424
}
425425
}
@@ -733,8 +733,6 @@ ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, UserOrKernelBuffer&
733733
return nread;
734734
}
735735

736-
Locker fs_locker(fs().m_lock);
737-
738736
if (m_block_list.is_empty())
739737
m_block_list = fs().block_list_for_inode(m_raw_inode);
740738

@@ -828,15 +826,15 @@ KResult Ext2FSInode::resize(u64 new_size)
828826
}
829827
}
830828

831-
auto result = fs().write_block_list_for_inode(index(), m_raw_inode, block_list);
829+
m_block_list = move(block_list);
830+
831+
auto result = flush_block_list();
832832
if (result.is_error())
833833
return result;
834834

835835
m_raw_inode.i_size = new_size;
836836
set_metadata_dirty(true);
837837

838-
m_block_list = move(block_list);
839-
840838
if (new_size > old_size) {
841839
// If we're growing the inode, make sure we zero out all the new space.
842840
// FIXME: There are definitely more efficient ways to achieve this.
@@ -862,7 +860,6 @@ ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const UserOrKernel
862860
VERIFY(count >= 0);
863861

864862
Locker inode_locker(m_lock);
865-
Locker fs_locker(fs().m_lock);
866863

867864
auto result = prepare_to_write_data();
868865
if (result.is_error())

Kernel/FileSystem/Ext2FileSystem.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@ class Ext2FSInode final : public Inode {
7676
virtual KResult chmod(mode_t) override;
7777
virtual KResult chown(uid_t, gid_t) override;
7878
virtual KResult truncate(u64) override;
79-
8079
virtual KResultOr<int> get_block_address(int) override;
8180

8281
KResult write_directory(const Vector<Ext2FSDirectoryEntry>&);
8382
bool populate_lookup_cache() const;
8483
KResult resize(u64);
84+
KResult flush_block_list();
8585

8686
Ext2FS& fs();
8787
const Ext2FS& fs() const;
@@ -147,7 +147,6 @@ class Ext2FS final : public BlockBasedFS {
147147

148148
Vector<BlockIndex> block_list_for_inode_impl(const ext2_inode&, bool include_block_list_blocks = false) const;
149149
Vector<BlockIndex> block_list_for_inode(const ext2_inode&, bool include_block_list_blocks = false) const;
150-
KResult write_block_list_for_inode(InodeIndex, ext2_inode&, const Vector<BlockIndex>&);
151150

152151
KResultOr<bool> get_inode_allocation_state(InodeIndex) const;
153152
KResult set_inode_allocation_state(InodeIndex, bool);

0 commit comments

Comments
 (0)