Skip to content

Tar: Adjust the way we write GNU longlink and longpath metadata #114940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 24, 2025
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -354,26 +354,33 @@ internal async Task WriteAsPaxAsync(Stream archiveStream, Memory<byte> buffer, C
await WriteWithSeekableDataStreamAsync(TarEntryFormat.Pax, archiveStream, buffer, cancellationToken).ConfigureAwait(false);
}
}
// Checks if the linkname string is too long to fit in the regular header field.
// .NET strings do not include a null terminator by default, need to add it manually and also consider it for the length.
private bool IsLinkNameTooLongForRegularField() => _linkName != null && (Encoding.UTF8.GetByteCount(_linkName) + 1) > FieldLengths.LinkName;

// Checks if the name string is too long to fit in the regular header field.
// .NET strings do not include a null terminator by default, need to add it manually and also consider it for the length.
private bool IsNameTooLongForRegularField() => (Encoding.UTF8.GetByteCount(_name) + 1) > FieldLengths.Name;

// Writes the current header as a Gnu entry into the archive stream.
// Makes sure to add the preceding LongLink and/or LongPath entries if necessary, before the actual entry.
internal void WriteAsGnu(Stream archiveStream, Span<byte> buffer)
{
Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek);

// First, we determine if we need a preceding LongLink, and write it if needed
if (_linkName != null && Encoding.UTF8.GetByteCount(_linkName) > FieldLengths.LinkName)
if (IsLinkNameTooLongForRegularField())
{
TarHeader longLinkHeader = GetGnuLongMetadataHeader(TarEntryType.LongLink, _linkName);
// Linkname is too long for the regular header field, create a longlink entry where the linkname will be stored.
TarHeader longLinkHeader = GetGnuLongLinkMetadataHeader();
Debug.Assert(longLinkHeader._dataStream != null && longLinkHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
longLinkHeader.WriteWithSeekableDataStream(TarEntryFormat.Gnu, archiveStream, buffer);
buffer.Clear(); // Reset it to reuse it
}

// Second, we determine if we need a preceding LongPath, and write it if needed
if (Encoding.UTF8.GetByteCount(_name) > FieldLengths.Name)
if (IsNameTooLongForRegularField())
{
TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name);
// Name is too long for the regular header field, create a longpath entry where the name will be stored.
TarHeader longPathHeader = GetGnuLongPathMetadataHeader();
Debug.Assert(longPathHeader._dataStream != null && longPathHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
longPathHeader.WriteWithSeekableDataStream(TarEntryFormat.Gnu, archiveStream, buffer);
buffer.Clear(); // Reset it to reuse it
Expand All @@ -397,19 +404,19 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory<byte> buffer, C
Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek);
cancellationToken.ThrowIfCancellationRequested();

// First, we determine if we need a preceding LongLink, and write it if needed
if (_linkName != null && Encoding.UTF8.GetByteCount(_linkName) > FieldLengths.LinkName)
if (IsLinkNameTooLongForRegularField())
{
TarHeader longLinkHeader = GetGnuLongMetadataHeader(TarEntryType.LongLink, _linkName);
// Linkname is too long for the regular header field, create a longlink entry where the linkname will be stored.
TarHeader longLinkHeader = GetGnuLongLinkMetadataHeader();
Debug.Assert(longLinkHeader._dataStream != null && longLinkHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
await longLinkHeader.WriteWithSeekableDataStreamAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken).ConfigureAwait(false);
buffer.Span.Clear(); // Reset it to reuse it
}

// Second, we determine if we need a preceding LongPath, and write it if needed
if (Encoding.UTF8.GetByteCount(_name) > FieldLengths.Name)
if (IsNameTooLongForRegularField())
{
TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name);
// Name is too long for the regular header field, create a longpath entry where the name will be stored.
TarHeader longPathHeader = GetGnuLongPathMetadataHeader();
Debug.Assert(longPathHeader._dataStream != null && longPathHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
await longPathHeader.WriteWithSeekableDataStreamAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken).ConfigureAwait(false);
buffer.Span.Clear(); // Reset it to reuse it
Expand All @@ -426,20 +433,46 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory<byte> buffer, C
}
}

private static MemoryStream GetLongMetadataStream(string text)
{
MemoryStream data = new MemoryStream();
data.Write(Encoding.UTF8.GetBytes(text));
data.WriteByte(0); // Add a null terminator at the end of the string, _size will be calculated later
data.Position = 0;
return data;
}

private TarHeader GetGnuLongLinkMetadataHeader()
{
Debug.Assert(_linkName != null);
MemoryStream dataStream = GetLongMetadataStream(_linkName);
return GetGnuLongMetadataHeader(dataStream, TarEntryType.LongLink, _uid, _gid, _uName, _gName);
}

private TarHeader GetGnuLongPathMetadataHeader()
{
MemoryStream dataStream = GetLongMetadataStream(_name);
return GetGnuLongMetadataHeader(dataStream, TarEntryType.LongPath, _uid, _gid, _uName, _gName);
}

// Creates and returns a GNU long metadata header, with the specified long text written into its data stream (seekable).
private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string longText)
private static TarHeader GetGnuLongMetadataHeader(MemoryStream dataStream, TarEntryType entryType, int mainEntryUid, int mainEntryGid, string? mainEntryUname, string? mainEntryGname)
{
Debug.Assert(entryType is TarEntryType.LongPath or TarEntryType.LongLink);

return new(TarEntryFormat.Gnu)
{
_name = GnuLongMetadataName, // Same name for both longpath or longlink
_mode = TarHelpers.GetDefaultMode(entryType),
_uid = 0,
_gid = 0,
_mTime = DateTimeOffset.MinValue, // 0
_uid = mainEntryUid,
_gid = mainEntryGid,
_mTime = DateTimeOffset.UnixEpoch, // 0
_typeFlag = entryType,
_dataStream = new MemoryStream(Encoding.UTF8.GetBytes(longText))
_dataStream = dataStream,
_uName = mainEntryUname,
_gName = mainEntryGname,
_aTime = DateTimeOffset.UnixEpoch, // 0
_cTime = DateTimeOffset.UnixEpoch, // 0
};
}

Expand Down Expand Up @@ -614,17 +647,17 @@ private int WriteCommonFields(Span<byte> buffer, TarEntryType actualEntryType)

int checksum = 0;

if (_mode > 0)
if (_mode >= 0)
{
checksum += FormatNumeric(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
}

if (_uid > 0)
if (_uid >= 0)
{
checksum += FormatNumeric(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
}

if (_gid > 0)
if (_gid >= 0)
{
checksum += FormatNumeric(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
}
Expand Down Expand Up @@ -750,8 +783,8 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)
// Saves the gnu-specific fields into the specified spans.
private int WriteGnuFields(Span<byte> buffer)
{
int checksum = WriteAsGnuTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
checksum += WriteAsGnuTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
int checksum = WriteAsTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
checksum += WriteAsTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));

if (_gnuUnusedBytes != null)
{
Expand Down Expand Up @@ -1060,7 +1093,6 @@ private static int Checksum(ReadOnlySpan<byte> bytes)
}
return checksum;
}

private int FormatNumeric(int value, Span<byte> destination)
{
Debug.Assert(destination.Length == 8, "8 byte field expected.");
Expand Down Expand Up @@ -1118,6 +1150,7 @@ private static int FormatOctal(long value, Span<byte> destination)
Span<byte> digits = stackalloc byte[32]; // longer than any possible octal formatting of a ulong

int i = digits.Length - 1;

while (true)
{
digits[i] = (byte)('0' + (remaining % 8));
Expand All @@ -1136,24 +1169,6 @@ private int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
return FormatNumeric(unixTimeSeconds, destination);
}

// Writes the specified DateTimeOffset's Unix time seconds, and returns its checksum.
// If the timestamp is UnixEpoch, it writes 0s into the destination span.
private int WriteAsGnuTimestamp(DateTimeOffset timestamp, Span<byte> destination)
{
if (timestamp == DateTimeOffset.UnixEpoch)
{
#if DEBUG
for (int i = 0; i < destination.Length; i++)
{
Debug.Assert(destination[i] == 0, "Destination span should be zeroed.");
}
#endif
return 0;
}

return WriteAsTimestamp(timestamp, destination);
}

// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
private static int WriteAsUtf8String(ReadOnlySpan<char> text, Span<byte> buffer)
{
Expand Down
Loading
Loading