Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions cmd/elasticsearch/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ minio:
secretKey: minioadmin
stackgraph:
bucket: stackgraph-bucket
multipartArchive: true
restore:
scaleDownLabelSelector: "app=stackgraph"
loggingConfigConfigMap: logging-config
Expand Down Expand Up @@ -148,7 +147,6 @@ storage:
secretKey: storageadmin
stackgraph:
bucket: stackgraph-bucket
multipartArchive: true
restore:
scaleDownLabelSelector: "app=stackgraph"
loggingConfigConfigMap: logging-config
Expand Down
6 changes: 2 additions & 4 deletions cmd/settings/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
)

const (
isMultiPartArchive = false
expectedListJobPodCount = 1
expectedListJobContainerCount = 1
backupFileNameRegex = `^sts-backup-.*\.sty$`
Expand Down Expand Up @@ -183,8 +182,7 @@ func getBackupListFromS3(appCtx *app.Context) ([]BackupFileInfo, error) {
return nil, fmt.Errorf("failed to list S3 objects: %w", err)
}

// Filter objects based on whether the archive is split or not
filteredObjects := s3client.FilterMultipartBackupObjects(result.Contents, isMultiPartArchive)
filteredObjects := s3client.FilterBackupObjects(result.Contents)

// Filter to only include direct children of the prefix that match the backup filename pattern,
// and strip the prefix from the key
Expand Down Expand Up @@ -238,7 +236,7 @@ func getBackupListFromLocalBucket(appCtx *app.Context) ([]BackupFileInfo, error)
return nil, fmt.Errorf("failed to list objects in local bucket: %w", err)
}

filteredObjects := s3client.FilterMultipartBackupObjects(result.Contents, isMultiPartArchive)
filteredObjects := s3client.FilterBackupObjects(result.Contents)

filteredObjects, err = s3client.FilterByPrefixAndRegex(filteredObjects, "", backupFileNameRegex)
if err != nil {
Expand Down
4 changes: 1 addition & 3 deletions cmd/stackgraph/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ func runList(appCtx *app.Context) error {
// List objects in bucket
bucket := appCtx.Config.Stackgraph.Bucket
prefix := appCtx.Config.Stackgraph.S3Prefix
multipartArchive := appCtx.Config.Stackgraph.MultipartArchive

appCtx.Logger.Infof("Listing Stackgraph backups in bucket '%s'...", bucket)

Expand All @@ -66,8 +65,7 @@ func runList(appCtx *app.Context) error {
return fmt.Errorf("failed to list S3 objects: %w", err)
}

// Filter objects based on whether the archive is split or not
filteredObjects := s3client.FilterMultipartBackupObjects(result.Contents, multipartArchive)
filteredObjects := s3client.FilterBackupObjects(result.Contents)

// Filter to only include direct children of the prefix that match the backup filename pattern,
// and strip the prefix from the key
Expand Down
5 changes: 1 addition & 4 deletions cmd/stackgraph/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ func getLatestBackup(k8sClient *k8s.Client, namespace string, config *config.Con
// List objects in bucket
bucket := config.Stackgraph.Bucket
prefix := config.Stackgraph.S3Prefix
multipartArchive := config.Stackgraph.MultipartArchive

input := &s3.ListObjectsV2Input{
Bucket: aws.String(bucket),
Expand All @@ -181,8 +180,7 @@ func getLatestBackup(k8sClient *k8s.Client, namespace string, config *config.Con
return "", fmt.Errorf("failed to list S3 objects: %w", err)
}

// Filter objects based on whether the archive is split or not
filteredObjects := s3client.FilterMultipartBackupObjects(result.Contents, multipartArchive)
filteredObjects := s3client.FilterBackupObjects(result.Contents)

// Filter to only include direct children of the prefix that match the backup filename pattern,
// and strip the prefix from the key
Expand Down Expand Up @@ -278,7 +276,6 @@ func buildRestoreEnvVars(backupFile string, config *config.Config) []corev1.EnvV
{Name: "FORCE_DELETE", Value: purgeStackgraphDataFlag},
{Name: "BACKUP_STACKGRAPH_BUCKET_NAME", Value: config.Stackgraph.Bucket},
{Name: "BACKUP_STACKGRAPH_S3_PREFIX", Value: config.Stackgraph.S3Prefix},
{Name: "BACKUP_STACKGRAPH_MULTIPART_ARCHIVE", Value: strconv.FormatBool(config.Stackgraph.MultipartArchive)},
{Name: "MINIO_ENDPOINT", Value: fmt.Sprintf("%s:%d", storageService.Name, storageService.Port)},
{Name: "STACKSTATE_BASE_URL", Value: config.GetBaseURL()},
{Name: "RECEIVER_BASE_URL", Value: config.GetReceiverBaseURL()},
Expand Down
112 changes: 14 additions & 98 deletions internal/clients/s3/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,22 @@ import (
s3types "github.com/aws/aws-sdk-go-v2/service/s3/types"
)

const (
multipartArchiveSuffixLength = 2
)

// Object represents a simplified S3 object with key metadata
type Object struct {
Key string
LastModified time.Time
Size int64
}

// FilterMultipartBackupObjects filters S3 objects based on whether the archive is split or not
// If it is not multipartArchive, it filters out multipart archives (files ending with .digits)
// Otherwise, it groups multipart archives by base name and sums their sizes
func FilterMultipartBackupObjects(objects []s3types.Object, multipartArchive bool) []Object {
if !multipartArchive {
return filterNonMultipart(objects)
}
return aggregateMultipart(objects)
}

// filterNonMultipart filters out multipart archives (files ending with .digits)
func filterNonMultipart(objects []s3types.Object) []Object {
// FilterBackupObjects filters out backup part files ending with .digits.
func FilterBackupObjects(objects []s3types.Object) []Object {
var filteredObjects []Object

for _, obj := range objects {
key := aws.ToString(obj.Key)

// Skip if it ends with .digits (multipart archive)
if strings.Contains(key, ".") {
parts := strings.Split(key, ".")
lastPart := parts[len(parts)-1]
isDigits := true
for _, c := range lastPart {
if c < '0' || c > '9' {
isDigits = false
break
}
}
if isDigits && len(lastPart) > 0 {
continue
}
if hasNumericFileSuffix(key) {
continue
}

filteredObjects = append(filteredObjects, Object{
Expand All @@ -64,83 +38,25 @@ func filterNonMultipart(objects []s3types.Object) []Object {
return filteredObjects
}

// aggregateMultipart groups multipart archives by base name and sums their sizes
func aggregateMultipart(objects []s3types.Object) []Object {
// Map to group objects by base name
archiveMap := make(map[string]*Object)

for _, obj := range objects {
key := aws.ToString(obj.Key)

// Check if this is a multipart file (ends with .NN where NN are digits)
baseName, isMultipart := getBaseName(key)
if !isMultipart {
// Not a multipart file, include as-is
archiveMap[key] = &Object{
Key: key,
LastModified: aws.ToTime(obj.LastModified),
Size: aws.ToInt64(obj.Size),
}
continue
}

// Group multipart files by base name
if existing, exists := archiveMap[baseName]; exists {
// Add size to existing entry
existing.Size += aws.ToInt64(obj.Size)
// Keep the most recent LastModified time
if aws.ToTime(obj.LastModified).After(existing.LastModified) {
existing.LastModified = aws.ToTime(obj.LastModified)
}
} else {
// Create new entry
archiveMap[baseName] = &Object{
Key: baseName,
LastModified: aws.ToTime(obj.LastModified),
Size: aws.ToInt64(obj.Size),
}
}
}

// Convert map to slice
var filteredObjects []Object
for _, obj := range archiveMap {
filteredObjects = append(filteredObjects, *obj)
}

return filteredObjects
}

// getBaseName extracts the base name from a multipart archive filename
// Returns (baseName, isMultipart)
// Example: "backup.graph.00" -> ("backup.graph", true)
//
// "backup.graph" -> ("backup.graph", false)
func getBaseName(key string) (string, bool) {
func hasNumericFileSuffix(key string) bool {
if !strings.Contains(key, ".") {
return key, false
return false
}

parts := strings.Split(key, ".")
lastPart := parts[len(parts)-1]

// Check if last part is all digits (2 digits for part numbers like .00, .01, etc.)
if len(lastPart) == multipartArchiveSuffixLength {
isDigits := true
for _, c := range lastPart {
if c < '0' || c > '9' {
isDigits = false
break
}
}
if isDigits {
// Remove the .NN suffix to get base name
baseName := strings.Join(parts[:len(parts)-1], ".")
return baseName, true
if len(lastPart) == 0 {
return false
}

for _, c := range lastPart {
if c < '0' || c > '9' {
return false
}
}

return key, false
return true
}

// FilterByPrefixAndRegex filters objects to only include direct children of the given prefix
Expand Down
Loading
Loading