diff --git a/sdk/adapters/supernodeservice/adapter.go b/sdk/adapters/supernodeservice/adapter.go
index f9e9e6da..0d326a17 100644
--- a/sdk/adapters/supernodeservice/adapter.go
+++ b/sdk/adapters/supernodeservice/adapter.go
@@ -446,6 +446,7 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
 		bytesWritten   int64
 		chunkIndex     int
 		startedEmitted bool
+		downloadStart  time.Time
 	)
 
 	// 3. Receive streamed responses
@@ -509,7 +510,11 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
 						}
 					}
 				}
-				in.EventLogger(ctx, toSdkEvent(x.Event.EventType), x.Event.Message, edata)
+				// Avoid blocking Recv loop on event handling; dispatch asynchronously
+				evtType := toSdkEvent(x.Event.EventType)
+				go func(ed event.EventData, et event.EventType, msg string) {
+					in.EventLogger(ctx, et, msg, ed)
+				}(edata, evtType, x.Event.Message)
 			}
 
 			// 3b. Actual data chunk
@@ -520,7 +525,10 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
 			}
 			if !startedEmitted {
 				if in.EventLogger != nil {
-					in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID})
+					// mark start to compute throughput at completion
+					downloadStart = time.Now()
+					// Emit started asynchronously to avoid blocking
+					go in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID})
 				}
 				startedEmitted = true
 			}
@@ -538,7 +546,25 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
 	a.logger.Info(ctx, "download complete", "bytes_written", bytesWritten, "path", in.OutputPath, "action_id", in.ActionID)
 
 	if in.EventLogger != nil {
-		in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{event.KeyActionID: in.ActionID, event.KeyOutputPath: in.OutputPath})
+		// Compute metrics if we marked a start
+		var elapsed float64
+		var throughput float64
+		if !downloadStart.IsZero() {
+			elapsed = time.Since(downloadStart).Seconds()
+			mb := float64(bytesWritten) / (1024.0 * 1024.0)
+			if elapsed > 0 {
+				throughput = mb / elapsed
+			}
+		}
+		// Emit completion asynchronously with metrics
+		go in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{
+			event.KeyActionID:       in.ActionID,
+			event.KeyOutputPath:     in.OutputPath,
+			event.KeyBytesTotal:     bytesWritten,
+			event.KeyChunks:         chunkIndex,
+			event.KeyElapsedSeconds: elapsed,
+			event.KeyThroughputMBS:  throughput,
+		})
 	}
 	return &CascadeSupernodeDownloadResponse{
 		Success:    true,
diff --git a/sdk/net/factory.go b/sdk/net/factory.go
index b9fad9fd..f3486780 100644
--- a/sdk/net/factory.go
+++ b/sdk/net/factory.go
@@ -39,9 +39,10 @@ func NewClientFactory(ctx context.Context, logger log.Logger, keyring keyring.Ke
 	// Tuned for 1GB max files with 4MB chunks
 	// Reduce in-flight memory by aligning windows and msg sizes to chunk size.
 	opts := client.DefaultClientOptions()
-	opts.MaxRecvMsgSize = 8 * 1024 * 1024         // 8MB: supports 4MB chunks + overhead
-	opts.MaxSendMsgSize = 8 * 1024 * 1024         // 8MB: supports 4MB chunks + overhead
-	opts.InitialWindowSize = 4 * 1024 * 1024      // 4MB per-stream window ≈ chunk size
+	opts.MaxRecvMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead
+	opts.MaxSendMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead
+	// Increase per-stream window to provide headroom for first data chunk + events
+	opts.InitialWindowSize = 12 * 1024 * 1024     // 8MB per-stream window
 	opts.InitialConnWindowSize = 64 * 1024 * 1024 // 64MB per-connection window
 
 	return &ClientFactory{
diff --git a/sdk/task/download.go b/sdk/task/download.go
index 3e85007a..2c727ae9 100644
--- a/sdk/task/download.go
+++ b/sdk/task/download.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"os"
-	"sort"
 	"time"
 
 	"github.com/LumeraProtocol/supernode/v2/sdk/adapters/lumera"
@@ -77,51 +76,6 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern
 		}
 	}
 
-	// Optionally rank supernodes by available memory to improve success for large files
-	// We keep a short timeout per status fetch to avoid delaying downloads.
-	type rankedSN struct {
-		sn        lumera.Supernode
-		availGB   float64
-		hasStatus bool
-	}
-	ranked := make([]rankedSN, 0, len(supernodes))
-	for _, sn := range supernodes {
-		ranked = append(ranked, rankedSN{sn: sn})
-	}
-
-	// Probe supernode status with short timeouts and close clients promptly
-	for i := range ranked {
-		sn := ranked[i].sn
-		// 2s status timeout to keep this pass fast
-		stx, cancel := context.WithTimeout(ctx, 2*time.Second)
-		client, err := clientFactory.CreateClient(stx, sn)
-		if err != nil {
-			cancel()
-			continue
-		}
-		status, err := client.GetSupernodeStatus(stx)
-		_ = client.Close(stx)
-		cancel()
-		if err != nil {
-			continue
-		}
-		ranked[i].hasStatus = true
-		ranked[i].availGB = status.Resources.Memory.AvailableGB
-	}
-
-	// Sort: nodes with status first, higher available memory first
-	sort.Slice(ranked, func(i, j int) bool {
-		if ranked[i].hasStatus != ranked[j].hasStatus {
-			return ranked[i].hasStatus && !ranked[j].hasStatus
-		}
-		return ranked[i].availGB > ranked[j].availGB
-	})
-
-	// Rebuild the supernodes list in the sorted order
-	for i := range ranked {
-		supernodes[i] = ranked[i].sn
-	}
-
 	// Try supernodes sequentially, one by one (now sorted)
 	var lastErr error
 	for idx, sn := range supernodes {
@@ -146,8 +100,8 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern
 			continue
 		}
 
-    // Success; return to caller
-    return nil
+		// Success; return to caller
+		return nil
 	}
 
 	if lastErr != nil {
@@ -176,15 +130,15 @@ func (t *CascadeDownloadTask) attemptDownload(
 		t.LogEvent(ctx, evt, msg, data)
 	}
 
-    resp, err := client.Download(ctx, req)
-    if err != nil {
-        return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err)
-    }
-    if !resp.Success {
-        return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message)
-    }
+	resp, err := client.Download(ctx, req)
+	if err != nil {
+		return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err)
+	}
+	if !resp.Success {
+		return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message)
+	}
 
-    return nil
+	return nil
 }
 
 // downloadResult holds the result of a successful download attempt
diff --git a/supernode/node/action/server/cascade/cascade_action_server.go b/supernode/node/action/server/cascade/cascade_action_server.go
index a99fbf0a..449f4c42 100644
--- a/supernode/node/action/server/cascade/cascade_action_server.go
+++ b/supernode/node/action/server/cascade/cascade_action_server.go
@@ -313,7 +313,14 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
 		"chunk_size": chunkSize,
 	})
 
-	// Announce: file is ready to be served to the client
+	// Pre-read first chunk to avoid any delay between SERVE_READY and first data
+	buf := make([]byte, chunkSize)
+	n, readErr := f.Read(buf)
+	if readErr != nil && readErr != io.EOF {
+		return fmt.Errorf("chunked read failed: %w", readErr)
+	}
+
+	// Announce: file is ready to be served to the client (right before first data)
 	if err := stream.Send(&pb.DownloadResponse{
 		ResponseType: &pb.DownloadResponse_Event{
 			Event: &pb.DownloadEvent{
@@ -326,10 +333,27 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
 		return err
 	}
 
-	// Stream the file in fixed-size chunks
-	buf := make([]byte, chunkSize)
+	// Send pre-read first chunk if available
+	if n > 0 {
+		if err := stream.Send(&pb.DownloadResponse{
+			ResponseType: &pb.DownloadResponse_Chunk{
+				Chunk: &pb.DataChunk{Data: buf[:n]},
+			},
+		}); err != nil {
+			logtrace.Error(ctx, "failed to stream first chunk", logtrace.Fields{logtrace.FieldError: err.Error()})
+			return err
+		}
+	}
+
+	// If EOF after first read, we're done
+	if readErr == io.EOF {
+		logtrace.Info(ctx, "completed streaming all chunks", fields)
+		return nil
+	}
+
+	// Continue streaming remaining chunks
 	for {
-		n, readErr := f.Read(buf)
+		n, readErr = f.Read(buf)
 		if n > 0 {
 			if err := stream.Send(&pb.DownloadResponse{
 				ResponseType: &pb.DownloadResponse_Chunk{