diff --git a/sdk/adapters/supernodeservice/adapter.go b/sdk/adapters/supernodeservice/adapter.go index f9e9e6da..0d326a17 100644 --- a/sdk/adapters/supernodeservice/adapter.go +++ b/sdk/adapters/supernodeservice/adapter.go @@ -446,6 +446,7 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( bytesWritten int64 chunkIndex int startedEmitted bool + downloadStart time.Time ) // 3. Receive streamed responses @@ -509,7 +510,11 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( } } } - in.EventLogger(ctx, toSdkEvent(x.Event.EventType), x.Event.Message, edata) + // Avoid blocking Recv loop on event handling; dispatch asynchronously + evtType := toSdkEvent(x.Event.EventType) + go func(ed event.EventData, et event.EventType, msg string) { + in.EventLogger(ctx, et, msg, ed) + }(edata, evtType, x.Event.Message) } // 3b. Actual data chunk @@ -520,7 +525,10 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( } if !startedEmitted { if in.EventLogger != nil { - in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID}) + // mark start to compute throughput at completion + downloadStart = time.Now() + // Emit started asynchronously to avoid blocking + go in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID}) } startedEmitted = true } @@ -538,7 +546,25 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( a.logger.Info(ctx, "download complete", "bytes_written", bytesWritten, "path", in.OutputPath, "action_id", in.ActionID) if in.EventLogger != nil { - in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{event.KeyActionID: in.ActionID, event.KeyOutputPath: in.OutputPath}) + // Compute metrics if we marked a start + var elapsed float64 + var throughput float64 + if !downloadStart.IsZero() { + elapsed = time.Since(downloadStart).Seconds() + mb := float64(bytesWritten) / (1024.0 * 1024.0) + if elapsed > 0 { + throughput = mb / elapsed + } + } + // Emit completion asynchronously with metrics + go in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{ + event.KeyActionID: in.ActionID, + event.KeyOutputPath: in.OutputPath, + event.KeyBytesTotal: bytesWritten, + event.KeyChunks: chunkIndex, + event.KeyElapsedSeconds: elapsed, + event.KeyThroughputMBS: throughput, + }) } return &CascadeSupernodeDownloadResponse{ Success: true, diff --git a/sdk/net/factory.go b/sdk/net/factory.go index b9fad9fd..f3486780 100644 --- a/sdk/net/factory.go +++ b/sdk/net/factory.go @@ -39,9 +39,10 @@ func NewClientFactory(ctx context.Context, logger log.Logger, keyring keyring.Ke // Tuned for 1GB max files with 4MB chunks // Reduce in-flight memory by aligning windows and msg sizes to chunk size. opts := client.DefaultClientOptions() - opts.MaxRecvMsgSize = 8 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead - opts.MaxSendMsgSize = 8 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead - opts.InitialWindowSize = 4 * 1024 * 1024 // 4MB per-stream window ≈ chunk size + opts.MaxRecvMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead + opts.MaxSendMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead + // Increase per-stream window to provide headroom for first data chunk + events + opts.InitialWindowSize = 12 * 1024 * 1024 // 8MB per-stream window opts.InitialConnWindowSize = 64 * 1024 * 1024 // 64MB per-connection window return &ClientFactory{ diff --git a/sdk/task/download.go b/sdk/task/download.go index 3e85007a..2c727ae9 100644 --- a/sdk/task/download.go +++ b/sdk/task/download.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "os" - "sort" "time" "github.com/LumeraProtocol/supernode/v2/sdk/adapters/lumera" @@ -77,51 +76,6 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern } } - // Optionally rank supernodes by available memory to improve success for large files - // We keep a short timeout per status fetch to avoid delaying downloads. - type rankedSN struct { - sn lumera.Supernode - availGB float64 - hasStatus bool - } - ranked := make([]rankedSN, 0, len(supernodes)) - for _, sn := range supernodes { - ranked = append(ranked, rankedSN{sn: sn}) - } - - // Probe supernode status with short timeouts and close clients promptly - for i := range ranked { - sn := ranked[i].sn - // 2s status timeout to keep this pass fast - stx, cancel := context.WithTimeout(ctx, 2*time.Second) - client, err := clientFactory.CreateClient(stx, sn) - if err != nil { - cancel() - continue - } - status, err := client.GetSupernodeStatus(stx) - _ = client.Close(stx) - cancel() - if err != nil { - continue - } - ranked[i].hasStatus = true - ranked[i].availGB = status.Resources.Memory.AvailableGB - } - - // Sort: nodes with status first, higher available memory first - sort.Slice(ranked, func(i, j int) bool { - if ranked[i].hasStatus != ranked[j].hasStatus { - return ranked[i].hasStatus && !ranked[j].hasStatus - } - return ranked[i].availGB > ranked[j].availGB - }) - - // Rebuild the supernodes list in the sorted order - for i := range ranked { - supernodes[i] = ranked[i].sn - } - // Try supernodes sequentially, one by one (now sorted) var lastErr error for idx, sn := range supernodes { @@ -146,8 +100,8 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern continue } - // Success; return to caller - return nil + // Success; return to caller + return nil } if lastErr != nil { @@ -176,15 +130,15 @@ func (t *CascadeDownloadTask) attemptDownload( t.LogEvent(ctx, evt, msg, data) } - resp, err := client.Download(ctx, req) - if err != nil { - return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err) - } - if !resp.Success { - return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message) - } + resp, err := client.Download(ctx, req) + if err != nil { + return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err) + } + if !resp.Success { + return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message) + } - return nil + return nil } // downloadResult holds the result of a successful download attempt diff --git a/supernode/node/action/server/cascade/cascade_action_server.go b/supernode/node/action/server/cascade/cascade_action_server.go index a99fbf0a..449f4c42 100644 --- a/supernode/node/action/server/cascade/cascade_action_server.go +++ b/supernode/node/action/server/cascade/cascade_action_server.go @@ -313,7 +313,14 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS "chunk_size": chunkSize, }) - // Announce: file is ready to be served to the client + // Pre-read first chunk to avoid any delay between SERVE_READY and first data + buf := make([]byte, chunkSize) + n, readErr := f.Read(buf) + if readErr != nil && readErr != io.EOF { + return fmt.Errorf("chunked read failed: %w", readErr) + } + + // Announce: file is ready to be served to the client (right before first data) if err := stream.Send(&pb.DownloadResponse{ ResponseType: &pb.DownloadResponse_Event{ Event: &pb.DownloadEvent{ @@ -326,10 +333,27 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS return err } - // Stream the file in fixed-size chunks - buf := make([]byte, chunkSize) + // Send pre-read first chunk if available + if n > 0 { + if err := stream.Send(&pb.DownloadResponse{ + ResponseType: &pb.DownloadResponse_Chunk{ + Chunk: &pb.DataChunk{Data: buf[:n]}, + }, + }); err != nil { + logtrace.Error(ctx, "failed to stream first chunk", logtrace.Fields{logtrace.FieldError: err.Error()}) + return err + } + } + + // If EOF after first read, we're done + if readErr == io.EOF { + logtrace.Info(ctx, "completed streaming all chunks", fields) + return nil + } + + // Continue streaming remaining chunks for { - n, readErr := f.Read(buf) + n, readErr = f.Read(buf) if n > 0 { if err := stream.Send(&pb.DownloadResponse{ ResponseType: &pb.DownloadResponse_Chunk{