In [None]:
import socketio
import os
import base64
import time
import uuid
import pandas as pd
from IPython.display import display
import logging
import threading
from typing import Optional
import mimetypes

# --- Configuration ---
PROJECT_ID = "101"
SERVER_URL = "http://localhost:5055"
PROJECT_NAMESPACE = f"/project/{PROJECT_ID}"

LARGE_CSV_FILE_PATH = "generated_cells_data_large_1gb.csv"
ANNDATA_FILE_PATH = "scanpy-pbmc3k.h5ad"

# Upload parameters
CHUNK_SIZE = 256 * 1024  # 256KB chunks
DATASOURCE_NAME = None  # Set to None to use filename, or specify a name string
REPLACE_DATASOURCE = True  # Set to True to overwrite if datasource with same name exists

# File type to upload - change this to switch between CSV and AnnData
UPLOAD_FILE_TYPE = "anndata"  # Options: "csv" or "anndata"

# Retry parameters
MAX_RETRIES = 50
RETRY_DELAY_SECONDS = 15  # Wait time between retries

# Configure logging for the client
logging.basicConfig(level=logging.INFO, format='%(asctime)s - SOCKETIO_CLIENT - %(levelname)s - %(message)s')
client_logger = logging.getLogger(__name__)

def detect_file_type(file_path):
    """Detect file type based on extension and content."""
    if not os.path.exists(file_path):
        return None, None
    
    filename = os.path.basename(file_path).lower()
    
    if filename.endswith('.h5ad'):
        return "anndata", "application/x-hdf"
    elif filename.endswith('.csv'):
        return "csv", "text/csv"
    else:
        # Try to guess based on mime type
        mime_type, _ = mimetypes.guess_type(file_path)
        if mime_type == "text/csv":
            return "csv", "text/csv"
        else:
            return "unknown", mime_type or "application/octet-stream"

class SocketIOUploader:
    """
    SocketIO-based file uploader with resumability support for CSV and AnnData files.
    """
    
    def __init__(self, server_url: str, namespace: str, file_path: str, name: Optional[str] = None, 
                 file_id: Optional[str] = None, view: str = "default", 
                 replace: bool = True, supplied_only: bool = False):
        """Initialize the SocketIO uploader."""
        self.server_url = server_url
        self.namespace = namespace
        self.file_path = file_path
        self.file_size = os.path.getsize(file_path)
        self.file_name = os.path.basename(file_path)
        
        # Auto-detect file type
        self.file_type, self.content_type = detect_file_type(file_path)
        client_logger.info(f"Detected file type: {self.file_type}, content type: {self.content_type}")
        
        # Set datasource name (only used for CSV files)
        if self.file_type == "csv":
            self.name = name or os.path.splitext(self.file_name)[0]
            self.view = view
            self.replace = replace
            self.supplied_only = supplied_only
        else:
            # For AnnData files, these parameters are not needed
            self.name = None
            self.view = None
            self.replace = None
            self.supplied_only = None
        
        # File ID for tracking and resuming
        self.file_id = file_id or str(uuid.uuid4())
        client_logger.info(f"Initialized SocketIO uploader for file: {self.file_name}, File ID: {self.file_id}")
        
        # State variables
        self.progress = 0
        self.uploaded_bytes = 0
        self.resume_offset = 0
        self.start_time = None
        self.end_time = None
        self.processing_complete = False
        self.upload_transfer_complete = False
        self.server_will_process = False
        self.is_resuming = False
        self.final_result = None
        self.upload_success = False
        self.should_exit_processing_wait = False
        
        # Threading events
        self.connection_established = threading.Event()
        self.upload_acknowledged = threading.Event()
        self.server_responded_to_query = threading.Event()
        self.stop_event = threading.Event()
        self.lock = threading.Lock()
        
        # SocketIO client with more conservative settings
        self.sio = socketio.Client(
            logger=False, 
            engineio_logger=False,
            reconnection=False,
            reconnection_attempts=0
        )
        self._setup_event_handlers()
        
    def _setup_event_handlers(self):
        """Set up SocketIO event handlers."""
        
        @self.sio.on('connect', namespace=self.namespace)
        def on_connect():
            client_logger.info(f"Connected to SocketIO server at {self.server_url}{self.namespace}")
            self.connection_established.set()
            
        @self.sio.on('disconnect', namespace=self.namespace)
        def on_disconnect():
            client_logger.info("Disconnected from SocketIO server")
            
        @self.sio.on('connected', namespace=self.namespace)
        def on_connected(data):
            client_logger.info(f"Server connection acknowledged: {data}")
            
        @self.sio.on('upload_start_ack', namespace=self.namespace)
        def on_upload_start_ack(data):
            client_logger.info(f"Upload start acknowledged: {data}")
            if data.get('file_id') == self.file_id:
                self.upload_acknowledged.set()
                
        @self.sio.on('upload_resume_ack', namespace=self.namespace)
        def on_upload_resume_ack(data):
            client_logger.info(f"Upload resume acknowledged: {data}")
            if data.get('file_id') == self.file_id:
                self.resume_offset = data.get('received_bytes', 0)
                self.is_resuming = True
                self.upload_acknowledged.set()
                
        @self.sio.on('upload_end_ack', namespace=self.namespace)
        def on_upload_end_ack(data):
            client_logger.info(f"Upload end acknowledged: {data}")
            if data.get('file_id') == self.file_id:
                with self.lock:
                    self.upload_transfer_complete = True
            
        @self.sio.on('upload_progress', namespace=self.namespace)
        def on_upload_progress(data):
            if data.get('file_id') == self.file_id:
                current_progress = data.get('progress', 0)
                if current_progress == 0 or current_progress == 100 or current_progress % 10 == 0:
                    if not hasattr(self, '_last_logged_progress') or current_progress > self._last_logged_progress:
                        client_logger.info(f"Upload progress: {current_progress}% ({data.get('received')} / {data.get('total')} bytes)")
                        self._last_logged_progress = current_progress
                        
        @self.sio.on('upload_processing_initiated', namespace=self.namespace)
        def on_upload_processing_initiated(data):
            client_logger.info(f"Server initiated processing: {data}")
            if data.get('file_id') == self.file_id:
                with self.lock:
                    self.processing_complete = False
                    self.server_will_process = True
                self.server_responded_to_query.set()
                
        @self.sio.on('upload_processing', namespace=self.namespace)
        def on_upload_processing(data):
            client_logger.info(f"Server processing file: {data}")
            if data.get('file_id') == self.file_id:
                with self.lock:
                    self.processing_complete = False
                
        # FIXED: Properly handle upload_success event
        @self.sio.on('upload_success', namespace=self.namespace)
        def on_upload_success(data):
            client_logger.info(f"Processing successful: {data}")
            if data.get('file_id') == self.file_id:
                with self.lock:
                    self.processing_complete = True
                    self.upload_success = True
                    self.final_result = data
                    self.should_exit_processing_wait = True
                    
        @self.sio.on('upload_error', namespace=self.namespace)
        def on_upload_error(data):
            client_logger.error(f"Server error: {data}")
            if data.get('file_id') == self.file_id or not data.get('file_id'):
                with self.lock:
                    self.processing_complete = True
                    self.upload_success = False
                    self.final_result = data
                    self.should_exit_processing_wait = True
                    
        @self.sio.on('upload_resume_info', namespace=self.namespace)
        def on_upload_resume_info(data):
            client_logger.info(f"Resume info received: {data}")
            if data.get('file_id') == self.file_id:
                self.resume_offset = data.get('received_bytes', 0)
                self.is_resuming = True
                self.upload_transfer_complete = False
                self.server_will_process = False
                self.server_responded_to_query.set()
                
        @self.sio.on('upload_not_found', namespace=self.namespace)
        def on_upload_not_found(data):
            client_logger.info(f"Server does not have state for file: {data}")
            if data.get('file_id') == self.file_id:
                self.resume_offset = 0
                self.is_resuming = False
                self.upload_transfer_complete = False
                self.server_will_process = False
                self.server_responded_to_query.set()
                
        @self.sio.on('pong', namespace=self.namespace)
        def on_pong(data):
            client_logger.debug("Received pong from server")
            
    def _query_upload_status(self) -> bool:
        """Query server for upload status. Returns True if client needs to send data."""
        self.server_will_process = False
        self.server_responded_to_query.clear()
        
        query_msg = {"file_id": self.file_id}
        client_logger.info(f"Querying server status for file_id: {self.file_id}")
        
        try:
            self.sio.emit('upload_query', query_msg, namespace=self.namespace)
        except Exception as e:
            client_logger.error(f"Failed to send query: {e}")
            with self.lock:
                self.processing_complete = True
                self.upload_success = False
                self.final_result = {'type': 'error', 'message': f'Failed to send query: {e}'}
            return False
            
        # Wait for response
        if self.server_responded_to_query.wait(timeout=20):
            with self.lock:
                if self.server_will_process:
                    client_logger.info("Server will handle processing. No data transfer needed.")
                    return False
            client_logger.info(f"Query processed. Resuming: {self.is_resuming}, Offset: {self.resume_offset}")
            return True
        else:
            client_logger.error("Timeout waiting for server response to query")
            self.resume_offset = 0
            self.is_resuming = False
            return True
            
    def _start_upload(self):
        """Send upload start message."""
        self.start_time = time.time()
        
        # Build start message based on file type
        start_msg = {
            "file_id": self.file_id,
            "filename": self.file_name,
            "size": self.file_size,
            "content_type": self.content_type
        }
        
        # Add CSV-specific parameters
        if self.file_type == "csv":
            start_msg.update({
                "name": self.name,
                "view": self.view,
                "replace": self.replace,
                "supplied_only": self.supplied_only
            })
        
        client_logger.info(f"Sending upload start for {self.file_type} file, ID {self.file_id}")
        self.sio.emit('upload_start', start_msg, namespace=self.namespace)
        
    def _send_file_chunks(self):
        """Send file chunks to server."""
        chunk_num = 0
        bytes_sent_this_session = 0
        client_logger.info(f"Starting file transmission from offset {self.resume_offset}")
        
        try:
            with open(self.file_path, 'rb') as file:
                if self.is_resuming and self.resume_offset > 0:
                    if self.resume_offset >= self.file_size:
                        client_logger.warning(f"Resume offset {self.resume_offset} >= file size {self.file_size}")
                        self.uploaded_bytes = self.resume_offset
                        return
                    client_logger.info(f"Seeking to resume offset: {self.resume_offset}")
                    file.seek(self.resume_offset)
                    self.uploaded_bytes = self.resume_offset
                else:
                    self.uploaded_bytes = 0
                    
                while not self.stop_event.is_set():
                    if self.uploaded_bytes >= self.file_size:
                        client_logger.info("File transfer complete")
                        break
                        
                    chunk = file.read(CHUNK_SIZE)
                    if not chunk:
                        client_logger.info("Reached end of file")
                        break
                        
                    bytes_to_send = len(chunk)
                    if self.uploaded_bytes + bytes_to_send > self.file_size:
                        bytes_to_send = self.file_size - self.uploaded_bytes
                        chunk = chunk[:bytes_to_send]
                        
                    if bytes_to_send <= 0:
                        break
                        
                    chunk_b64 = base64.b64encode(chunk).decode('utf-8')
                    chunk_msg = {
                        "file_id": self.file_id,
                        "chunk_num": chunk_num,
                        "data": chunk_b64
                    }
                    
                    self.sio.emit('upload_chunk', chunk_msg, namespace=self.namespace)
                    
                    bytes_sent_this_session += bytes_to_send
                    self.uploaded_bytes += bytes_to_send
                    chunk_num += 1
                    
                    # Small delay to prevent overwhelming the server
                    time.sleep(0.01)
                    
            client_logger.info(f"Finished sending chunks. Total bytes uploaded: {self.uploaded_bytes}")
            
        except Exception as e:
            client_logger.exception(f"Error sending file chunks: {e}")
            raise
            
    def _end_upload(self):
        """Send upload end message."""
        if self.uploaded_bytes >= self.file_size:
            end_msg = {"file_id": self.file_id}
            client_logger.info(f"Sending upload end for file_id: {self.file_id}")
            self.sio.emit('upload_end', end_msg, namespace=self.namespace)
        else:
            client_logger.warning(f"Upload incomplete ({self.uploaded_bytes}/{self.file_size})")

    def upload(self):
        """Main upload method with improved error handling."""
        client_logger.info(f"Starting SocketIO upload process for {self.file_type} file, ID: {self.file_id}")
        
        # Reset state
        self.stop_event.clear()
        self.processing_complete = False
        self.upload_transfer_complete = False
        self.server_will_process = False
        self.upload_success = False
        self.should_exit_processing_wait = False
        self.final_result = None
        self.connection_established.clear()
        self.upload_acknowledged.clear()
        self.server_responded_to_query.clear()
        self._last_logged_progress = -1
        
        try:
            # Ensure we're disconnected first
            try:
                if self.sio.connected:
                    self.sio.disconnect()
                time.sleep(1)  # Brief pause before reconnecting
            except Exception:
                pass
            
            # Connect to server with retries
            connection_attempts = 3
            for conn_attempt in range(connection_attempts):
                try:
                    client_logger.info(f"Connection attempt {conn_attempt + 1}/{connection_attempts}")
                    self.sio.connect(
                        self.server_url, 
                        namespaces=[self.namespace],
                        transports=['websocket', 'polling'],
                        wait_timeout=60
                    )
                    
                    # Wait for connection with timeout
                    if self.connection_established.wait(timeout=15):
                        client_logger.info("Connection established successfully")
                        break
                    else:
                        raise Exception("Connection timeout")
                        
                except Exception as e:
                    client_logger.warning(f"Connection attempt {conn_attempt + 1} failed: {e}")
                    if conn_attempt < connection_attempts - 1:
                        time.sleep(2)  # Wait before retry
                        continue
                    else:
                        raise Exception(f"Failed to establish connection after {connection_attempts} attempts")
            
            # Verify connection is still active
            if not self.sio.connected:
                raise Exception("Connection lost after establishment")
            
            # Query status
            proceed_with_upload = self._query_upload_status()
            
            if proceed_with_upload:
                client_logger.info("Proceeding with file data transfer")
                
                # Start upload
                self._start_upload()
                
                # Wait for acknowledgment
                if not self.upload_acknowledged.wait(timeout=15):
                    raise Exception("Did not receive upload acknowledgment")
                    
                # Send file chunks with connection monitoring
                self._send_file_chunks()
                
                # End upload
                if not self.stop_event.is_set() and self.uploaded_bytes >= self.file_size:
                    self._end_upload()
                    # Wait for upload_end_ack before proceeding
                    end_ack_timeout = 30
                    end_ack_start = time.time()
                    while not self.upload_transfer_complete and not self.stop_event.is_set():
                        if time.time() - end_ack_start > end_ack_timeout:
                            client_logger.warning("Timeout waiting for upload_end_ack, assuming transfer complete")
                            with self.lock:
                                self.upload_transfer_complete = True
                            break
                        time.sleep(0.5)
                    
            else:
                with self.lock:
                    should_wait = self.server_will_process
                if should_wait:
                    client_logger.info("Server will handle processing. Waiting for completion.")
                    is_waiting_for_server_processing = True
                else:
                    client_logger.info("No data transfer needed.")
                    return True, {'type': 'info', 'message': 'No transfer needed'}

            # Wait for processing with better connection management
            timeout_seconds = 3600  # 1 hour timeout
            start_wait_time = time.time()
            last_ping_time = time.time()
            ping_interval = 30  # Send ping every 30 seconds
            max_consecutive_connection_failures = 3
            consecutive_failures = 0
            
            # Check if we need to wait for processing
            with self.lock:
                needs_processing_wait = (self.upload_transfer_complete or 
                                       self.server_will_process or 
                                       locals().get('is_waiting_for_server_processing', False))

            client_logger.info(f"Waiting for processing completion. needs_processing_wait: {needs_processing_wait}")
            client_logger.info(f"State: upload_transfer_complete={self.upload_transfer_complete}, server_will_process={self.server_will_process}")

            while not self.should_exit_processing_wait and not self.stop_event.is_set():
                current_time = time.time()
                
                # Check timeout
                if current_time - start_wait_time > timeout_seconds:
                    client_logger.error(f"Timeout ({timeout_seconds}s) waiting for processing")
                    raise Exception("Client timeout waiting for completion")

                # Handle connection loss more gracefully
                if not self.sio.connected:
                    consecutive_failures += 1
                    client_logger.warning(f"Connection lost (failure {consecutive_failures}/{max_consecutive_connection_failures})")
                    
                    if consecutive_failures >= max_consecutive_connection_failures:
                        client_logger.info("Too many connection failures during processing wait. Assuming server is processing in background.")
                        # Instead of failing, we'll assume the server is processing and exit gracefully
                        with self.lock:
                            self.upload_success = True
                            self.final_result = {
                                'type': 'info', 
                                'message': 'Upload completed, server processing in background. Connection lost but upload was successful.'
                            }
                            self.should_exit_processing_wait = True
                        break
                    
                    # Try to reconnect once
                    try:
                        client_logger.info("Attempting to reconnect to check processing status...")
                        self.sio.connect(
                            self.server_url, 
                            namespaces=[self.namespace],
                            transports=['websocket', 'polling'],
                            wait_timeout=10
                        )
                        if self.connection_established.wait(timeout=5):
                            client_logger.info("Reconnected successfully")
                            consecutive_failures = 0  # Reset failure count
                            continue
                        else:
                            client_logger.warning("Reconnection timeout")
                    except Exception as e:
                        client_logger.warning(f"Reconnection failed: {e}")
                    
                    # Wait before next attempt
                    time.sleep(5)
                    continue
                else:
                    consecutive_failures = 0  # Reset failure count on successful connection

                # Send periodic pings to keep connection alive if we're waiting for processing
                if needs_processing_wait and current_time - last_ping_time > ping_interval:
                    if self.sio.connected:
                        try:
                            client_logger.info("Sending keepalive ping during processing wait")
                            self.sio.emit('ping', {'message': 'keepalive'}, namespace=self.namespace)
                            last_ping_time = current_time
                        except Exception as e:
                            client_logger.warning(f"Failed to send keepalive ping: {e}")
                    
                time.sleep(1)
            
            # Return final result
            with self.lock:
                success = self.upload_success
                result = self.final_result
            
            if success:
                client_logger.info(f"Upload completed successfully: {result}")
            else:
                client_logger.warning(f"Upload failed: {result}")
            
            return success, result
            
        except Exception as e:
            client_logger.exception(f"Error during upload: {e}")
            with self.lock:
                if not self.processing_complete:
                    self.processing_complete = True
                    self.upload_success = False
                    self.final_result = {'type': 'error', 'message': f'Upload error: {e}'}
            return False, self.final_result
            
        finally:
            try:
                if self.sio.connected:
                    client_logger.info("Disconnecting from server")
                    self.sio.disconnect()
            except Exception:
                pass

    def cancel_upload(self):
        """Cancel the current upload."""
        try:
            cancel_msg = {"file_id": self.file_id}
            self.sio.emit('upload_cancel', cancel_msg, namespace=self.namespace)
            self.stop_event.set()
        except Exception as e:
            client_logger.error(f"Error cancelling upload: {e}")

# --- Helper Functions ---
def preview_csv(file_path):
    """Preview CSV file contents."""
    if not os.path.exists(file_path):
        client_logger.error(f"File not found: {file_path}")
        return None
        
    try:
        df = pd.read_csv(file_path, nrows=10)
        file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
        client_logger.info(f"CSV File: {os.path.basename(file_path)} | Size: {file_size_mb:.2f} MB")
        print("\n--- CSV Preview (first 5 rows) ---")
        display(df.head())
        print("----------------------------------\n")
        return True
    except Exception as e:
        client_logger.error(f"Error previewing CSV: {e}")
        return False

def preview_anndata(file_path):
    """Preview AnnData file contents."""
    if not os.path.exists(file_path):
        client_logger.error(f"File not found: {file_path}")
        return None
        
    try:
        import scanpy as sc
        adata = sc.read(file_path)
        file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
        
        client_logger.info(f"AnnData File: {os.path.basename(file_path)} | Size: {file_size_mb:.2f} MB")
        print("\n--- AnnData Preview ---")
        print(f"Shape: {adata.shape}")
        print(f"Variables (genes): {adata.n_vars}")
        print(f"Observations (cells): {adata.n_obs}")
        
        if adata.obs.shape[1] > 0:
            print(f"\nObservation columns: {list(adata.obs.columns[:10])}")
            if len(adata.obs.columns) > 10:
                print(f"... and {len(adata.obs.columns) - 10} more")
        
        if adata.var.shape[1] > 0:
            print(f"\nVariable columns: {list(adata.var.columns[:10])}")
            if len(adata.var.columns) > 10:
                print(f"... and {len(adata.var.columns) - 10} more")
                
        print("----------------------\n")
        return True
    except ImportError:
        client_logger.error("scanpy not installed. Cannot preview AnnData files.")
        return False
    except Exception as e:
        client_logger.error(f"Error previewing AnnData: {e}")
        return False

# --- Main Execution Logic ---
if __name__ == "__main__" and "get_ipython" in locals():
    print(f"--- Starting SocketIO File Upload Script ---")
    print(f"Target Server: {SERVER_URL}{PROJECT_NAMESPACE}")
    print(f"Upload File Type: {UPLOAD_FILE_TYPE}")

    # Select file based on type
    if UPLOAD_FILE_TYPE.lower() == "csv":
        file_path = LARGE_CSV_FILE_PATH
        print(f"CSV File to Upload: {file_path}")
    elif UPLOAD_FILE_TYPE.lower() == "anndata":
        file_path = ANNDATA_FILE_PATH
        print(f"AnnData File to Upload: {file_path}")
    else:
        print(f"Error: Unknown file type '{UPLOAD_FILE_TYPE}'. Use 'csv' or 'anndata'")
        exit()

    file_id_for_upload = str(uuid.uuid4())
    
    if not os.path.exists(file_path):
        print(f"\nERROR: File not found at {file_path}")
    else:
        # Preview file based on type
        if UPLOAD_FILE_TYPE.lower() == "csv":
            if not preview_csv(file_path):
                print("Failed to preview CSV file")
                exit()
            ds_name = DATASOURCE_NAME or os.path.splitext(os.path.basename(file_path))[0]
            print(f"Using Datasource Name: {ds_name}")
            print(f"Replace if exists: {REPLACE_DATASOURCE}")
        elif UPLOAD_FILE_TYPE.lower() == "anndata":
            if not preview_anndata(file_path):
                print("Failed to preview AnnData file")
                exit()
            ds_name = None  # Not used for AnnData
            print("AnnData files don't use datasource names - will be processed automatically")
        
        overall_success = False
        final_upload_result = None
        
        for attempt in range(MAX_RETRIES):
            client_logger.info(f"--- Upload Attempt {attempt + 1} of {MAX_RETRIES} ---")
            
            print(f"Generated File ID for attempt {attempt + 1}: {file_id_for_upload}")
            
            uploader = SocketIOUploader(
                server_url=SERVER_URL,
                namespace=PROJECT_NAMESPACE,
                file_path=file_path,
                name=ds_name,  # Only used for CSV files
                file_id=file_id_for_upload,
                replace=REPLACE_DATASOURCE,  # Only used for CSV files
                supplied_only=False  # Only used for CSV files
            )
            
            success, result = uploader.upload()
            final_upload_result = result
            
            if success:
                client_logger.info(f"Upload successful! Result: {result}")
                overall_success = True
                break
            else:
                client_logger.warning(f"Upload attempt {attempt + 1} failed: {result}")
                
                # Improved error classification for retries
                should_retry = False
                if result and 'message' in result:
                    msg_lower = result['message'].lower()
                    
                    # Expanded list of retriable error patterns
                    retriable_error_patterns = [
                        'connection', 'timeout', 'websocket error', 'disconnect', 
                        'namespace', 'network', 'socket', 'broken pipe', 
                        'connection reset', 'connection refused', 'connection aborted',
                        'bad namespace', 'not a connected namespace'
                    ]
                    
                    # Success patterns - don't retry these
                    success_patterns = [
                        'file processed successfully', 'processing successful', 'upload successful',
                        'server processing in background', 'anndata processed successfully'
                    ]
                    
                    # Check if this is actually a success message being misclassified
                    if any(pattern in msg_lower for pattern in success_patterns):
                        client_logger.info("Success message detected, stopping retries")
                        overall_success = True
                        break
                    
                    # Check if this is a retriable error
                    if any(pattern in msg_lower for pattern in retriable_error_patterns):
                        should_retry = True
                        client_logger.info(f"Detected retriable error: {result['message']}")
                    else:
                        client_logger.warning(f"Non-retriable error detected: {result['message']}")
                
                # Retry logic
                if attempt < MAX_RETRIES - 1:
                    if should_retry:
                        client_logger.info(f"Retrying in {RETRY_DELAY_SECONDS} seconds... (attempt {attempt + 1}/{MAX_RETRIES})")
                        time.sleep(RETRY_DELAY_SECONDS)
                    else:
                        # Even for "non-retriable" errors, give it a few more tries for large files
                        if attempt < 3:  # Allow at least 3 attempts even for "non-retriable" errors
                            client_logger.info(f"Retrying anyway for large file in {RETRY_DELAY_SECONDS} seconds...")
                            time.sleep(RETRY_DELAY_SECONDS)
                        else:
                            client_logger.error("Non-retriable error and max retry attempts for non-retriable reached")
                            break
                else:
                    client_logger.error("Max retries reached")
                    break
        
        # Final result reporting
        if overall_success:
            print(f"\n UPLOAD COMPLETED SUCCESSFULLY! ")
            print(f"File Type: {UPLOAD_FILE_TYPE.upper()}")
            print(f"Final result: {final_upload_result}")
            
            if UPLOAD_FILE_TYPE.lower() == "csv":
                print(f" CSV datasource '{ds_name}' has been added to the project")
            elif UPLOAD_FILE_TYPE.lower() == "anndata":
                print(f" AnnData file has been processed and converted to MDV format")
        else:
            print(f"\n UPLOAD FAILED after {MAX_RETRIES} attempts")
            print(f"File Type: {UPLOAD_FILE_TYPE.upper()}")
            print(f"Final result: {final_upload_result}")

# --- Configuration Helper Functions ---
def set_csv_upload(csv_path, datasource_name=None, replace=True):
    """Helper function to configure for CSV upload."""
    global UPLOAD_FILE_TYPE, LARGE_CSV_FILE_PATH, DATASOURCE_NAME, REPLACE_DATASOURCE
    UPLOAD_FILE_TYPE = "csv"
    LARGE_CSV_FILE_PATH = csv_path
    DATASOURCE_NAME = datasource_name
    REPLACE_DATASOURCE = replace
    print(f"Configured for CSV upload: {csv_path}")

def set_anndata_upload(anndata_path):
    """Helper function to configure for AnnData upload."""
    global UPLOAD_FILE_TYPE, ANNDATA_FILE_PATH
    UPLOAD_FILE_TYPE = "anndata"
    ANNDATA_FILE_PATH = anndata_path
    print(f"Configured for AnnData upload: {anndata_path}")

# --- Example Usage ---
"""
To use this script:

1. For CSV files:
   set_csv_upload("path/to/your/data.csv", "MyDataset", replace=True)
   
2. For AnnData files:
   set_anndata_upload("path/to/your/data.h5ad")

3. Then run the main execution block or set UPLOAD_FILE_TYPE manually

The script will automatically detect file types and handle the appropriate upload process.
"""