Skip to content

Commit

Permalink
Improve tiered storage related logging
Browse files Browse the repository at this point in the history
- add info logging for each storage dir on init, which will be very
useful for debugging worker tiered storage
- prevent duplicated error info in worker.log as seen in the following.
The same error caused two logging messages, one in tiered block store
and on in blockwritehandler. This is because previous code catches an
exception from allocator, log it and returns null, then in its caller,
based on null, another exception is created and thrown. This PR simply
keep the original exception from allocator.
- remove `freeSpace` from `BlockWorker` interface as no external callers
depend on this

```
021-03-23 03:39:52,791 ERROR TieredBlockStore - Allocation failure.
Options: AllocateOptions{Location=BlockStoreLocation{TierAlias=MEM,
DirIndex=0, MediumType=MEM}, Size=335544320, ForceLocation=true,
EvictionAllowed=true, UseReservedSpace=false}. Error:
alluxio.exception.WorkerOutOfSpaceException: Failed to free 335544320
bytes space at location MEM. Min contiguous requested: 335544320, Min
available requested: 335544320, Blocks iterated: 2, Blocks removed: 2,
Space freed: 268435456
at
alluxio.worker.block.TieredBlockStore.freeSpaceInternal(TieredBlockStore.java:825)
at
alluxio.worker.block.TieredBlockStore.freeSpace(TieredBlockStore.java:455)
at
alluxio.worker.block.TieredBlockStore.allocateSpace(TieredBlockStore.java:651)
at
alluxio.worker.block.TieredBlockStore.requestSpace(TieredBlockStore.java:318)
at
alluxio.worker.block.DefaultBlockWorker.requestSpace(DefaultBlockWorker.java:483)
at
alluxio.worker.grpc.BlockWriteHandler.writeBuf(BlockWriteHandler.java:132)
at
alluxio.worker.grpc.BlockWriteHandler.writeBuf(BlockWriteHandler.java:36)
at
alluxio.worker.grpc.AbstractWriteHandler.writeData(AbstractWriteHandler.java:274)
at
alluxio.worker.grpc.AbstractWriteHandler.lambda$writeDataMessage$1(AbstractWriteHandler.java:175)
at
io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)
at
alluxio.worker.grpc.GrpcExecutors$ImpersonateThreadPoolExecutor.lambda$execute$0(GrpcExecutors.java:91)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2021-03-23 03:39:52,796 ERROR AbstractWriteHandler - Failed to write
data for request BlockWriteRequest{id=3489660931,
sessionId=4499861889621204869, tier=0, createUfsBlockOptions=null}
alluxio.exception.WorkerOutOfSpaceException: Can't reserve more space
for block: 3489660931 under session: 4499861889621204869.
at
alluxio.worker.block.TieredBlockStore.requestSpace(TieredBlockStore.java:321)
at
alluxio.worker.block.DefaultBlockWorker.requestSpace(DefaultBlockWorker.java:483)
at
alluxio.worker.grpc.BlockWriteHandler.writeBuf(BlockWriteHandler.java:132)
at
alluxio.worker.grpc.BlockWriteHandler.writeBuf(BlockWriteHandler.java:36)
at
alluxio.worker.grpc.AbstractWriteHandler.writeData(AbstractWriteHandler.java:274)
at
alluxio.worker.grpc.AbstractWriteHandler.lambda$writeDataMessage$1(AbstractWriteHandler.java:175)
at
io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)
at
alluxio.worker.grpc.GrpcExecutors$ImpersonateThreadPoolExecutor.lambda$execute$0(GrpcExecutors.java:91)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
```

pr-link: #13154
change-id: cid-6412a80556e8757835940f8c4b3853c5fa39bd37
  • Loading branch information
apc999 committed Apr 9, 2021
1 parent 7824e89 commit 647c671
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 89 deletions.
Expand Up @@ -17,7 +17,6 @@
import alluxio.exception.WorkerOutOfSpaceException;
import alluxio.exception.status.DeadlineExceededException;
import alluxio.worker.SessionCleanable;
import alluxio.worker.block.evictor.EvictionPlan;
import alluxio.worker.block.io.BlockReader;
import alluxio.worker.block.io.BlockWriter;
import alluxio.worker.block.meta.BlockMeta;
Expand Down Expand Up @@ -337,20 +336,6 @@ void removeBlock(long sessionId, long blockId, BlockStoreLocation location)
@Override
void cleanupSession(long sessionId);

/**
* Frees space to make a specific amount of bytes available in the location.
*
* @param sessionId the session id
* @param minContigiousBytes the minimum amount of contigious free space in bytes
* @param minAvailableBytes the maximum amount of free space in bytes
* @param location the location to free space
* @throws WorkerOutOfSpaceException if there is not enough space to fulfill minimum requirement
* @throws BlockDoesNotExistException if blocks in {@link EvictionPlan} can not be found
*/
void freeSpace(long sessionId, long minContigiousBytes, long minAvailableBytes,
BlockStoreLocation location)
throws WorkerOutOfSpaceException, BlockDoesNotExistException, IOException;

/**
* Registers a {@link BlockStoreEventListener} to this block store.
*
Expand Down
Expand Up @@ -57,7 +57,6 @@
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

/**
Expand All @@ -82,14 +81,16 @@
* <li>Method {@link #abortBlock(long, long)} does not acquire the block lock, because only
* temporary blocks can be aborted, and they are only visible to their writers (thus no concurrent
* access).
* <li>Eviction is done in {@link #freeSpaceInternal} and it is on the basis of best effort. For
* <li>Eviction is done in {@link #freeSpace} and it is on the basis of best effort. For
* operations that may trigger this eviction (e.g., move, create, requestSpace), retry is used</li>
* </ul>
*/
@NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1624)
public class TieredBlockStore implements BlockStore {
private static final Logger LOG = LoggerFactory.getLogger(TieredBlockStore.class);
private static final long REMOVE_BLOCK_TIMEOUT_MS = 60_000;
private static final long FREE_AHEAD_BYTETS =
ServerConfiguration.getBytes(PropertyKey.WORKER_TIERED_STORE_FREE_AHEAD_BYTES);
private final BlockMetadataManager mMetaManager;
private final BlockLockManager mLockManager;
private final Allocator mAllocator;
Expand Down Expand Up @@ -216,14 +217,8 @@ public TempBlockMeta createBlock(long sessionId, long blockId, AllocateOptions o
throws BlockAlreadyExistsException, WorkerOutOfSpaceException, IOException {
LOG.debug("createBlock: sessionId={}, blockId={}, options={}", sessionId, blockId, options);
TempBlockMeta tempBlockMeta = createBlockMetaInternal(sessionId, blockId, true, options);
if (tempBlockMeta != null) {
createBlockFile(tempBlockMeta.getPath());
return tempBlockMeta;
}
// TODO(bin): We are probably seeing a rare transient failure, maybe define and throw some
// other types of exception to indicate this case.
throw new WorkerOutOfSpaceException(ExceptionMessage.NO_SPACE_FOR_BLOCK_ALLOCATION,
options.getSize(), options.getLocation(), blockId);
createBlockFile(tempBlockMeta.getPath());
return tempBlockMeta;
}

// TODO(bin): Make this method to return a snapshot.
Expand Down Expand Up @@ -311,19 +306,16 @@ public void requestSpace(long sessionId, long blockId, long additionalBytes)
throws BlockDoesNotExistException, WorkerOutOfSpaceException, IOException {
LOG.debug("requestSpace: sessionId={}, blockId={}, additionalBytes={}", sessionId, blockId,
additionalBytes);

if (additionalBytes <= 0) {
return;
}
// NOTE: a temp block is only visible to its own writer, unnecessary to acquire
// block lock here since no sharing
try (LockResource r = new LockResource(mMetadataWriteLock)) {
TempBlockMeta tempBlockMeta = mMetaManager.getTempBlockMeta(blockId);

StorageDirView allocationDir = allocateSpace(sessionId,
AllocateOptions.forRequestSpace(additionalBytes, tempBlockMeta.getBlockLocation()));
if (allocationDir == null) {
throw new WorkerOutOfSpaceException(String.format(
"Can't reserve more space for block: %d under session: %d.", blockId, sessionId));
}

if (!allocationDir.toBlockStoreLocation().equals(tempBlockMeta.getBlockLocation())) {
// If reached here, allocateSpace() failed to enforce 'forceLocation' flag.
throw new IllegalStateException(
Expand Down Expand Up @@ -440,31 +432,6 @@ public void accessBlock(long sessionId, long blockId) throws BlockDoesNotExistEx
}
}

/**
* Free space is the entry for immediate block deletion in order to open up space for
* new or ongoing blocks.
*
* - New blocks creations will not try to free space until all tiers are out of space.
* - Ongoing blocks could end up freeing space oftenly, when the file's origin location is
* low on space.
*
* This method is synchronized in order to prevent race in its only client, allocations.
* If not synchronized, new allocations could steal space reserved by ongoing ones.
* Removing synchronized requires implementing retries to this call along with an optimal
* locking strategy for fairness.
*
* TODO(ggezer): Remove synchronized.
* TODO(ggezer): Make it a private API.
*/
@Override
public synchronized void freeSpace(long sessionId, long minContiguousBytes,
long minAvailableBytes, BlockStoreLocation location)
throws BlockDoesNotExistException, WorkerOutOfSpaceException, IOException {
LOG.debug("freeSpace: sessionId={}, minContiguousBytes={}, minAvailableBytes={}, location={}",
sessionId, minAvailableBytes, minAvailableBytes, location);
freeSpaceInternal(sessionId, minContiguousBytes, minAvailableBytes, location);
}

@Override
public void cleanupSession(long sessionId) {
LOG.debug("cleanupSession: sessionId={}", sessionId);
Expand Down Expand Up @@ -641,12 +608,13 @@ private BlockStoreLocation commitBlockInternal(long sessionId, long blockId, boo
return loc;
}

@Nullable
private StorageDirView allocateSpace(long sessionId, AllocateOptions options) {
StorageDirView dirView = null;
private StorageDirView allocateSpace(long sessionId, AllocateOptions options)
throws WorkerOutOfSpaceException, IOException {
StorageDirView dirView;
BlockMetadataView allocatorView =
new BlockMetadataAllocatorView(mMetaManager, options.canUseReservedSpace());
try {
// Convenient way to break on failure cases, no intention to loop
while (true) {
if (options.isForceLocation()) {
// Try allocating from given location. Skip the review because the location is forced.
dirView = mAllocator.allocateBlockWithView(sessionId, options.getSize(),
Expand All @@ -668,14 +636,14 @@ private StorageDirView allocateSpace(long sessionId, AllocateOptions options) {
if (dirView == null) {
LOG.error("Target tier: {} has no evictable space to store {} bytes for session: {}",
options.getLocation(), options.getSize(), sessionId);
return null;
break;
}
} else {
// We are not evicting in the target tier so having no available space just
// means the tier is currently full.
LOG.warn("Target tier: {} has no available space to store {} bytes for session: {}",
options.getLocation(), options.getSize(), sessionId);
return null;
break;
}
} else {
// Try allocating from given location. This may be rejected by the review logic.
Expand All @@ -688,17 +656,14 @@ private StorageDirView allocateSpace(long sessionId, AllocateOptions options) {
options.getLocation());
dirView = mAllocator.allocateBlockWithView(sessionId, options.getSize(),
BlockStoreLocation.anyTier(), allocatorView, false);

if (dirView != null) {
return dirView;
}

if (options.isEvictionAllowed()) {
// There is no space left on worker.
// Free more than requested by configured free-ahead size.
long freeAheadBytes =
ServerConfiguration.getBytes(PropertyKey.WORKER_TIERED_STORE_FREE_AHEAD_BYTES);
long toFreeBytes = options.getSize() + freeAheadBytes;
long toFreeBytes = options.getSize() + FREE_AHEAD_BYTETS;
LOG.debug("Allocation on anyTier failed. Free space for {} bytes on anyTier",
toFreeBytes);
freeSpace(sessionId, options.getSize(), toFreeBytes,
Expand All @@ -709,12 +674,13 @@ private StorageDirView allocateSpace(long sessionId, AllocateOptions options) {
LOG.debug("Allocation after freeing space for block creation: {}", dirView);
}
}
} catch (Exception e) {
LOG.error("Allocation failure. Options: {}. Error:", options, e);
return null;
if (dirView == null) {
break;
}
return dirView;
}

return dirView;
throw new WorkerOutOfSpaceException(
String.format("Allocation failure. Options: %s. Error:", options.toString()));
}

/**
Expand All @@ -729,9 +695,9 @@ private StorageDirView allocateSpace(long sessionId, AllocateOptions options) {
* {@link WorkerOutOfSpaceException} because allocation failure could be an expected case)
* @throws BlockAlreadyExistsException if there is already a block with the same block id
*/
@Nullable
private TempBlockMeta createBlockMetaInternal(long sessionId, long blockId, boolean newBlock,
AllocateOptions options) throws BlockAlreadyExistsException {
AllocateOptions options)
throws BlockAlreadyExistsException, WorkerOutOfSpaceException, IOException {
try (LockResource r = new LockResource(mMetadataWriteLock)) {
// NOTE: a temp block is supposed to be visible for its own writer,
// unnecessary to acquire block lock here since no sharing.
Expand All @@ -742,10 +708,6 @@ private TempBlockMeta createBlockMetaInternal(long sessionId, long blockId, bool
// Allocate space.
StorageDirView dirView = allocateSpace(sessionId, options);

if (dirView == null) {
return null;
}

// TODO(carson): Add tempBlock to corresponding storageDir and remove the use of
// StorageDirView.createTempBlockMeta.
TempBlockMeta tempBlock = dirView.createTempBlockMeta(sessionId, blockId, options.getSize());
Expand All @@ -764,21 +726,34 @@ private TempBlockMeta createBlockMetaInternal(long sessionId, long blockId, bool
}

/**
* Tries to free a certain amount of space in the given location.
* Free space is the entry for immediate block deletion in order to open up space for
* new or ongoing blocks.
*
* - New blocks creations will not try to free space until all tiers are out of space.
* - Ongoing blocks could end up freeing space oftenly, when the file's origin location is
* low on space.
*
* This method is synchronized in order to prevent race in its only client, allocations.
* If not synchronized, new allocations could steal space reserved by ongoing ones.
* Removing synchronized requires implementing retries to this call along with an optimal
* locking strategy for fairness.
*
* TODO(ggezer): Remove synchronized.
*
* @param sessionId the session id
* @param minContiguousBytes the minimum amount of contiguous space in bytes to set available
* @param minAvailableBytes the minimum amount of space in bytes to set available
* @param location location of space
* @throws WorkerOutOfSpaceException if it is impossible to achieve minimum space requirement
* @param minContiguousBytes the minimum amount of contigious free space in bytes
* @param minAvailableBytes the minimum amount of free space in bytes
* @param location the location to free space
* @throws WorkerOutOfSpaceException if there is not enough space to fulfill minimum requirement
*/
private void freeSpaceInternal(long sessionId, long minContiguousBytes, long minAvailableBytes,
BlockStoreLocation location) throws WorkerOutOfSpaceException, IOException {
@VisibleForTesting
public synchronized void freeSpace(long sessionId, long minContiguousBytes,
long minAvailableBytes, BlockStoreLocation location)
throws WorkerOutOfSpaceException, IOException {
LOG.debug("freeSpace: sessionId={}, minContiguousBytes={}, minAvailableBytes={}, location={}",
sessionId, minAvailableBytes, minAvailableBytes, location);
// TODO(ggezer): Too much memory pressure when pinned-inodes list is large.
BlockMetadataEvictorView evictorView = getUpdatedView();
LOG.debug(
"freeSpaceInternal - locAvailableBytes: {}, minContiguousBytes: {}, minAvailableBytes: {}",
evictorView.getAvailableBytes(location), minContiguousBytes, minAvailableBytes);
boolean contiguousSpaceFound = false;
boolean availableBytesFound = false;

Expand Down Expand Up @@ -908,8 +883,10 @@ private MoveBlockResult moveBlockInternal(long sessionId, long blockId,
return new MoveBlockResult(true, blockSize, srcLocation, srcLocation);
}

TempBlockMeta dstTempBlock = createBlockMetaInternal(sessionId, blockId, false, moveOptions);
if (dstTempBlock == null) {
TempBlockMeta dstTempBlock;
try {
dstTempBlock = createBlockMetaInternal(sessionId, blockId, false, moveOptions);
} catch (Exception e) {
return new MoveBlockResult(false, blockSize, null, null);
}

Expand Down
Expand Up @@ -108,6 +108,9 @@ public static StorageDir newStorageDir(StorageTier tier, int dirIndex, long capa
DefaultStorageDir dir =
new DefaultStorageDir(tier, dirIndex, capacityBytes, reservedBytes, dirPath, dirMedium);
dir.initializeMeta();
LOG.info("StorageDir initialized: path={}, tier={}, dirIndex={}, medium={}, capacityBytes={}, "
+ "reservedBytes={}, availableBytes={}",
dirPath, tier, dirIndex, dirMedium, capacityBytes, reservedBytes, dir.mAvailableBytes);
return dir;
}

Expand Down

0 comments on commit 647c671

Please sign in to comment.