## File Upload Testing in Azure OpenAI (AI Foundry)

### Environment Setup

In [1]:
# Import required packages
import os
import shutil
import time
from concurrent.futures import ThreadPoolExecutor
from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

In [None]:
# Set file batch configuration
TOTAL_FILES = 210
FILE_SIZE_KB = 5

In [3]:
# Set up environment variables for Azure OpenAI
AOAI_API_BASE = os.getenv("AZURE_OPENAI_API_BASE")
AOAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AOAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_API_DEPLOY")

### Azure OpenAI Client Setup

In [4]:
# Initialise token provider
token_provider = get_bearer_token_provider(
    DefaultAzureCredential(),
    "https://cognitiveservices.azure.com/.default"
)

In [5]:
# Initialise Azure OpenAI client
client = AzureOpenAI(  
    azure_endpoint = AOAI_API_BASE,
    azure_ad_token_provider = token_provider,
    api_version = AOAI_API_VERSION,
)

### File Batch and Vector Store Setup

In [None]:
# Create test files
def create_test_files():
    test_dir = os.path.join(os.getcwd(), "test_files")
    
    if os.path.exists(test_dir):
        shutil.rmtree(test_dir)
    os.makedirs(test_dir)
    
    files = []
    content_base = "This is test content for vector store batch upload testing. " * (FILE_SIZE_KB * 15)
    
    print(f"Creating {TOTAL_FILES} test files ...")
    for i in range(TOTAL_FILES):
        file_path = os.path.join(test_dir, f"test_doc_{i:03d}.txt")
        content = f"Document {i+1}\n{content_base}\nEnd of document {i+1}\nTimestamp: {time.time()}"
        
        with open(file_path, 'w') as f:
            f.write(content)
        files.append(file_path)
    
    print(f"Created {TOTAL_FILES} files")
    return files, test_dir

test_files, temp_dir = create_test_files()

Creating 200 test files in c:\Users\lturakulov\Downloads\ZZZ_TEMP\ZZZ_CASES_WITH_RCA\Bosch_File_Uploads\test_files...
Created 200 files


In [7]:
# Create vector store
def create_vector_store():
    vector_store = client.vector_stores.create(
        name=f"Rate Test Vector Store - {int(time.time())}"
    )
    print(f"Created vector store: {vector_store.id}")
    return vector_store.id

vector_store_id = create_vector_store()

Created vector store: vs_1VRTU0m5gDFo8jNI3n858QHr


### Option 1: Individual Upload of Files to Azure OpenAI

In [8]:
# Upload files to Azure OpenAI individually
def upload_files_individually_with_timing(file_paths):
    print(f"Individual upload test: {len(file_paths)} files")
    file_ids = []
    request_times = []
    completion_times = []
    overall_start = time.time()
    
    for i, file_path in enumerate(file_paths):
        request_start = time.time()
        try:
            with open(file_path, 'rb') as f:
                file_obj = client.files.create(file=f, purpose="assistants")
                file_ids.append(file_obj.id)
            
            request_end = time.time()
            request_times.append(request_end - request_start)
            completion_times.append(request_end - overall_start)
            
            print(f"  File {i+1}: Request took {request_times[-1]:.3f}s, completed at {completion_times[-1]:.3f}s")
            
        except Exception as e:
            print(f"  Failed to upload file {i+1}: {e}")
    
    total_duration = time.time() - overall_start
    
    print(f"\nIndividual Upload Results:")
    print(f"  Files uploaded: {len(file_ids)}")
    print(f"  Total time: {total_duration:.2f}s")
    print(f"  Average request time: {sum(request_times)/len(request_times):.3f}s")
    print(f"  Completion rate: {len(file_ids)/total_duration:.2f} files/second")
    print(f"  Request submission rate: {len(file_ids)/sum(request_times):.2f} requests/second")
    
    return file_ids, total_duration

# Test with only 10 files
individual_file_ids, individual_duration = upload_files_individually_with_timing(test_files[:10])


Individual upload test: 10 files
  File 1: Request took 1.228s, completed at 1.228s
  File 2: Request took 0.956s, completed at 2.185s
  File 3: Request took 1.400s, completed at 3.585s
  File 4: Request took 1.090s, completed at 4.674s
  File 5: Request took 1.388s, completed at 6.062s
  File 6: Request took 1.305s, completed at 7.368s
  File 7: Request took 1.552s, completed at 8.920s
  File 8: Request took 1.515s, completed at 10.435s
  File 9: Request took 1.672s, completed at 12.108s
  File 10: Request took 1.799s, completed at 13.909s

Individual Upload Results:
  Files uploaded: 10
  Total time: 13.91s
  Average request time: 1.391s
  Completion rate: 0.72 files/second
  Request submission rate: 0.72 requests/second


### Option 2: Individual Upload of Files to Vector Store

In [9]:
# Add files to Vector Store individually
def add_files_to_vector_store_individually(file_ids, vector_store_id):
    print(f"\nAdding {len(file_ids)} files to vector store individually...")
    start_time = time.time()
    successful = 0
    
    for i, file_id in enumerate(file_ids):
        try:
            result = client.vector_stores.files.create(
                vector_store_id=vector_store_id,
                file_id=file_id
            )
            successful += 1
            print(f"  Added {i+1}/{len(file_ids)}: {result.id}")
        except Exception as e:
            print(f"  Failed to add file {i+1}: {e}")
    
    duration = time.time() - start_time
    print(f"Vector store addition: {successful} files in {duration:.2f}s")
    print(f"Rate: {successful/duration:.2f} files/second")
    return successful, duration

individual_vs_count, individual_vs_duration = add_files_to_vector_store_individually(individual_file_ids, vector_store_id)


Adding 10 files to vector store individually...
  Added 1/10: assistant-CgBC3W6ZCMuisX1zUdFgEM
  Added 2/10: assistant-4LaKvhetbiEaPLVwj3igB9
  Added 3/10: assistant-FGhKMVM5EpX6HMf2PpLwsa
  Added 4/10: assistant-7c7TJ8kG2oBVWB8z4G657z
  Added 5/10: assistant-F6zoMRDgM1ZXHLKrK42NaR
  Added 6/10: assistant-A4EtCS58cwsbz1SiyPvyJ4
  Added 7/10: assistant-2ruNouJgW4xde9cdY8QWWc
  Added 8/10: assistant-VQjYza73Mdc7S5Ls9fdPw9
  Added 9/10: assistant-LHptsxTwittyRpBVWQGbB5
  Added 10/10: assistant-MbXrP4qhxRVMYYDHJYpXYq
Vector store addition: 10 files in 4.90s
Rate: 2.04 files/second


### Option 3: Batch Upload of Files to Azure OpenAI and its Vector Store

In [None]:
# Concurrently upload files to Azure OpenAI
def concurrent_upload_with_rate_measurement(file_paths, max_workers=30):
    print(f"\nConcurrent upload: {len(file_paths)} files, {max_workers} workers")
    print("Measuring REQUEST SUBMISSION RATE vs COMPLETION RATE")
    
    request_submission_times = []
    completion_times = []
    results = []
    
    def upload_single_file_with_timing(file_info):
        file_path, file_index = file_info
        
        # Record when we START the request
        request_start = time.time()
        request_submission_times.append(request_start)
        
        try:
            with open(file_path, 'rb') as f:
                file_obj = client.files.create(file=f, purpose="assistants")
            
            # Record when request COMPLETES
            completion_time = time.time()
            completion_times.append(completion_time)
            
            return {
                'success': True,
                'file_id': file_obj.id,
                'request_start': request_start,
                'completion_time': completion_time,
                'duration': completion_time - request_start,
                'file_index': file_index
            }
        except Exception as e:
            completion_time = time.time()
            completion_times.append(completion_time)
            return {
                'success': False,
                'file_id': None,
                'request_start': request_start,
                'completion_time': completion_time,
                'duration': completion_time - request_start,
                'error': str(e),
                'file_index': file_index
            }
    
    overall_start = time.time()
    file_info_list = [(fp, i) for i, fp in enumerate(file_paths)]
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(upload_single_file_with_timing, fi) for fi in file_info_list]
        
        for future in futures:
            result = future.result()
            results.append(result)
    
    overall_end = time.time()
    total_duration = overall_end - overall_start
    
    # Calculate request submission rate (30 RPS target)
    request_submission_times.sort()
    first_request = min(request_submission_times)
    last_request = max(request_submission_times)
    request_submission_duration = last_request - first_request
    
    # Calculate completion rate
    successful = [r for r in results if r['success']]
    
    print(f"\nConcurrent Upload Analysis:")
    print(f"  Total files: {len(file_paths)}")
    print(f"  Successful: {len(successful)}")
    print(f"  Failed: {len(file_paths) - len(successful)}")
    print(f"\nRATE ANALYSIS:")
    print(f"  REQUEST SUBMISSION:")
    print(f"    Time to submit all requests: {request_submission_duration:.3f}s")
    print(f"    Request submission rate: {len(file_paths)/request_submission_duration:.2f} requests/second")
    print(f"  COMPLETION RATE:")
    print(f"    Total completion time: {total_duration:.2f}s") 
    print(f"    Completion rate: {len(successful)/total_duration:.2f} files/second")
    
    return [r['file_id'] for r in successful if r['file_id']], len(successful), total_duration, request_submission_duration

# Test concurrent upload with 200 files
concurrent_file_ids, concurrent_success, concurrent_duration, request_duration = concurrent_upload_with_rate_measurement(test_files[10:210], max_workers=50)


Concurrent upload: 190 files, 50 workers
Measuring REQUEST SUBMISSION RATE vs COMPLETION RATE

Concurrent Upload Analysis:
  Total files: 190
  Successful: 190
  Failed: 0

RATE ANALYSIS:
  REQUEST SUBMISSION:
    Time to submit all requests: 9.576s
    Request submission rate: 19.84 requests/second
  COMPLETION RATE:
    Total completion time: 14.01s
    Completion rate: 13.56 files/second

KEY INSIGHT: Request submission rate measures how fast we send requests
Completion rate measures how fast requests finish (includes processing time)


In [None]:
# Concurrently upload files to Vector Store
def batch_upload_with_measurements(file_paths, batch_size, test_name):
    print(f"\n{test_name}: {len(file_paths)} files in batches of {batch_size}")
    
    all_file_ids = []
    total_upload_time = 0
    batch_times = []
    
    # Step 1: Upload files in batches
    for i in range(0, len(file_paths), batch_size):
        batch_files = file_paths[i:i+batch_size]
        batch_num = i//batch_size + 1
        
        print(f"  Batch {batch_num}: Uploading {len(batch_files)} files...")
        batch_start = time.time()
        batch_file_ids = []
        
        for file_path in batch_files:
            try:
                with open(file_path, 'rb') as f:
                    file_obj = client.files.create(file=f, purpose="assistants")
                    batch_file_ids.append(file_obj.id)
            except Exception as e:
                print(f"    Failed to upload file: {e}")
        
        batch_end = time.time()
        batch_duration = batch_end - batch_start
        batch_times.append(batch_duration)
        total_upload_time += batch_duration
        all_file_ids.extend(batch_file_ids)
        
        print(f"    Uploaded {len(batch_file_ids)} files in {batch_duration:.2f}s")
        print(f"    Batch rate: {len(batch_file_ids)/batch_duration:.2f} files/second")
    
    # Step 2: Create vector store batch
    print(f"\n  Creating vector store batch with {len(all_file_ids)} files...")
    batch_create_start = time.time()
    
    try:
        file_batch = client.vector_stores.file_batches.create(
            vector_store_id=vector_store_id,
            file_ids=all_file_ids
        )
        batch_create_time = time.time() - batch_create_start
        
        print(f"  Vector store batch created: {file_batch.id}")
        print(f"  Batch creation time: {batch_create_time:.2f}s")
        
        total_time = total_upload_time + batch_create_time
        print(f"\n{test_name} Results:")
        print(f"  Files processed: {len(all_file_ids)}")
        print(f"  Upload time: {total_upload_time:.2f}s")
        print(f"  Batch creation time: {batch_create_time:.2f}s")
        print(f"  Total time: {total_time:.2f}s")
        print(f"  Overall rate: {len(all_file_ids)/total_time:.2f} files/second")
        
        return file_batch, len(all_file_ids), total_time
        
    except Exception as e:
        print(f"  Failed to create vector store batch: {e}")
        return None, len(all_file_ids), total_upload_time


In [12]:
# Test different batch sizes with 200 files each
print("\n" + "="*70)
print("BATCH SIZE COMPARISON TESTS")
print("="*70)

# Test 1: Batch size 20
batch_20_result, batch_20_count, batch_20_time = batch_upload_with_measurements(
    test_files[10:210], 20, "BATCH TEST 1 (Size 20)")

# Test 2: Batch size 50  
batch_50_result, batch_50_count, batch_50_time = batch_upload_with_measurements(
    test_files[10:210], 50, "BATCH TEST 2 (Size 50)")

# Test 3: Batch size 100
batch_100_result, batch_100_count, batch_100_time = batch_upload_with_measurements(
    test_files[10:210], 100, "BATCH TEST 3 (Size 100)")


BATCH SIZE COMPARISON TESTS

BATCH TEST 1 (Size 20): 190 files in batches of 20
  Batch 1: Uploading 20 files...
    Uploaded 20 files in 27.00s
    Batch rate: 0.74 files/second
  Batch 2: Uploading 20 files...
    Uploaded 20 files in 27.88s
    Batch rate: 0.72 files/second
  Batch 3: Uploading 20 files...
    Uploaded 20 files in 22.95s
    Batch rate: 0.87 files/second
  Batch 4: Uploading 20 files...
    Uploaded 20 files in 25.97s
    Batch rate: 0.77 files/second
  Batch 5: Uploading 20 files...
    Uploaded 20 files in 24.99s
    Batch rate: 0.80 files/second
  Batch 6: Uploading 20 files...
    Uploaded 20 files in 26.42s
    Batch rate: 0.76 files/second
  Batch 7: Uploading 20 files...
    Uploaded 20 files in 26.19s
    Batch rate: 0.76 files/second
  Batch 8: Uploading 20 files...
    Uploaded 20 files in 26.54s
    Batch rate: 0.75 files/second
  Batch 9: Uploading 20 files...
    Uploaded 20 files in 26.35s
    Batch rate: 0.76 files/second
  Batch 10: Uploading 10 fil

### Request Rate Demonstration

In [13]:
# Test 30 RPS capability
def demonstrate_30_rps_capability():
    print("\n" + "="*70)
    print("DEMONSTRATING 30 REQUESTS PER SECOND CAPABILITY")
    print("="*70)
    
    # Use small subset of files for clear demonstration
    demo_files = test_files[:60]  # 60 files to show 2 seconds at 30 RPS
    
    def timed_upload(file_path):
        request_start = time.time()
        try:
            with open(file_path, 'rb') as f:
                file_obj = client.files.create(file=f, purpose="assistants")
            return {
                'success': True,
                'file_id': file_obj.id,
                'request_time': request_start,
                'completion_time': time.time()
            }
        except Exception as e:
            return {
                'success': False,
                'error': str(e),
                'request_time': request_start,
                'completion_time': time.time()
            }
    
    print(f"Uploading {len(demo_files)} files with maximum concurrency...")
    
    overall_start = time.time()
    results = []
    
    # Use high concurrency to maximize request submission rate
    with ThreadPoolExecutor(max_workers=60) as executor:
        futures = [executor.submit(timed_upload, fp) for fp in demo_files]
        
        for i, future in enumerate(futures):
            result = future.result()
            results.append(result)
            
            # Show progress for first 10 files
            if i < 10:
                status = "SUCCESS" if result['success'] else f"FAILED: {result.get('error', '')[:20]}"
                print(f"  File {i+1}: {status}")
    
    overall_end = time.time()
    
    # Analyze request submission timing
    request_times = [r['request_time'] for r in results]
    completion_times = [r['completion_time'] for r in results]
    
    first_request = min(request_times) 
    last_request = max(request_times)
    request_submission_window = last_request - first_request
    
    first_completion = min(completion_times)
    last_completion = max(completion_times)
    completion_window = last_completion - first_completion
    
    successful = [r for r in results if r['success']]
    
    print(f"\nDEMONSTRATION RESULTS:")
    print(f"  Files attempted: {len(demo_files)}")
    print(f"  Successful: {len(successful)}")
    print(f"  Failed: {len(demo_files) - len(successful)}")
    
    print(f"\nREQUEST SUBMISSION ANALYSIS:")
    print(f"  Time to submit all {len(demo_files)} requests: {request_submission_window:.3f}s")
    print(f"  Request submission rate: {len(demo_files)/request_submission_window:.2f} requests/second")
    print(f"  {'ACHIEVED 30+ RPS' if len(demo_files)/request_submission_window >= 30 else 'Below 30 RPS'}")
    
    print(f"\nCOMPLETION ANALYSIS:")
    print(f"  Time for all requests to complete: {completion_window:.3f}s")
    print(f"  Completion rate: {len(successful)/completion_window:.2f} files/second")
    
    print(f"\nKEY EXPLANATION:")
    print(f"  - Request submission rate ({len(demo_files)/request_submission_window:.2f} RPS) shows we can SEND requests at 30+ RPS")
    print(f"  - Completion rate ({len(successful)/completion_window:.2f} files/sec) is limited by server processing time")
    print(f"  - The 30 RPS limit applies to REQUEST SUBMISSION, not completion")
    
    return results

# Run the demo
demo_results = demonstrate_30_rps_capability()


DEMONSTRATING 30 REQUESTS PER SECOND CAPABILITY
Uploading 60 files with maximum concurrency...
  File 1: SUCCESS
  File 2: SUCCESS
  File 3: SUCCESS
  File 4: SUCCESS
  File 5: SUCCESS
  File 6: SUCCESS
  File 7: SUCCESS
  File 8: SUCCESS
  File 9: SUCCESS
  File 10: SUCCESS

DEMONSTRATION RESULTS:
  Files attempted: 60
  Successful: 60
  Failed: 0

REQUEST SUBMISSION ANALYSIS:
  Time to submit all 60 requests: 0.027s
  Request submission rate: 2236.15 requests/second
  ACHIEVED 30+ RPS

COMPLETION ANALYSIS:
  Time for all requests to complete: 4.762s
  Completion rate: 12.60 files/second

KEY EXPLANATION:
  - Request submission rate (2236.15 RPS) shows we can SEND requests at 30+ RPS
  - Completion rate (12.60 files/sec) is limited by server processing time
  - The 30 RPS limit applies to REQUEST SUBMISSION, not completion


### Statistical Analysis

In [None]:
# Performance Summary
print("\n" + "="*80)
print("COMPREHENSIVE ANALYSIS SUMMARY")
print("="*80)

print("\nTEST RESULTS SUMMARY:")
print(f"{'Test Type':<30} {'Files':<8} {'Success':<8} {'Time (s)':<10} {'Rate':<12}")
print("-" * 75)

# Individual tests (10 files)
print(f"{'Individual Upload':<30} {10:<8} {len(individual_file_ids):<8} {individual_duration:<10.2f} {len(individual_file_ids)/individual_duration:<12.2f}")
print(f"{'Individual Vector Store':<30} {10:<8} {individual_vs_count:<8} {individual_vs_duration:<10.2f} {individual_vs_count/individual_vs_duration:<12.2f}")

# Concurrent test (200 files) 
print(f"{'Concurrent Upload':<30} {200:<8} {concurrent_success:<8} {concurrent_duration:<10.2f} {concurrent_success/concurrent_duration:<12.2f}")

# Batch tests (200 files each)
print(f"{'Batch Size 20':<30} {200:<8} {batch_20_count:<8} {batch_20_time:<10.2f} {batch_20_count/batch_20_time:<12.2f}")
print(f"{'Batch Size 50':<30} {200:<8} {batch_50_count:<8} {batch_50_time:<10.2f} {batch_50_count/batch_50_time:<12.2f}")
print(f"{'Batch Size 100':<30} {200:<8} {batch_100_count:<8} {batch_100_time:<10.2f} {batch_100_count/batch_100_time:<12.2f}")

print(f"\nCRITICAL UNDERSTANDING - REQUEST RATE vs COMPLETION RATE:")
print(f"----------------------------------------")
print(f"REQUEST RATE (what the 30 RPS limit measures):")
print(f"  - How fast you can SUBMIT requests to the API")
print(f"  - Our demonstration showed: {len(demo_results)/((max([r['request_time'] for r in demo_results]) - min([r['request_time'] for r in demo_results]))):.2f} requests/second")
print(f"  - This is the rate that hits the 30 RPS limit")

print(f"\nCOMPLETION RATE (what end users often measure incorrectly):")
print(f"  - How fast requests finish processing (files/total_time)")
print(f"  - This includes network latency, server processing, etc.")
print(f"  - Always slower than request rate due to processing overhead")



COMPREHENSIVE ANALYSIS SUMMARY

TEST RESULTS SUMMARY:
Test Type                      Files    Success  Time (s)   Rate        
---------------------------------------------------------------------------
Individual Upload              10       10       13.91      0.72        
Individual Vector Store        10       10       4.90       2.04        
Concurrent Upload              200      190      14.01      13.56       
Batch Size 20                  200      190      289.77     0.66        
Batch Size 50                  200      190      301.04     0.63        
Batch Size 100                 200      190      299.60     0.63        

CRITICAL UNDERSTANDING - REQUEST RATE vs COMPLETION RATE:
----------------------------------------
REQUEST RATE (what the 30 RPS limit measures):
  - How fast you can SUBMIT requests to the API
  - Our demonstration showed: 2236.15 requests/second
  - This is the rate that hits the 30 RPS limit

COMPLETION RATE (what end users often measure incorrectly):


### Housekeeping

In [15]:
# Remove test files and vector store
def cleanup():
    print("\n" + "="*50)
    print("CLEANUP")
    print("="*50)
    
    # Clean up local files
    if temp_dir and os.path.exists(temp_dir):
        shutil.rmtree(temp_dir)
        print(f"Deleted test files in the local directory")
    
    # Clean up uploaded files from Azure OpenAI
    print("Deleting uploaded files from Azure OpenAI...")
    all_uploaded_files = []
    
    # Collect all file IDs
    if 'individual_file_ids' in globals():
        all_uploaded_files.extend(individual_file_ids)
    if 'concurrent_file_ids' in globals():
        all_uploaded_files.extend([fid for fid in concurrent_file_ids if fid])
    if 'demo_results' in globals():
        demo_file_ids = [r['file_id'] for r in demo_results if r['success'] and r.get('file_id')]
        all_uploaded_files.extend(demo_file_ids)
    
    # Get files from vector store
    try:
        vs_files = client.vector_stores.files.list(vector_store_id=vector_store_id)
        vs_file_ids = [f.id for f in vs_files.data]
        all_uploaded_files.extend(vs_file_ids)
    except Exception as e:
        print(f"Could not retrieve vector store files: {e}")
    
    # Remove duplicates and delete
    unique_file_ids = list(set(all_uploaded_files))
    print(f"Found {len(unique_file_ids)} unique files to delete")
    
    deleted_count = 0
    for file_id in unique_file_ids:
        try:
            client.files.delete(file_id)
            deleted_count += 1
        except Exception as e:
            print(f"Failed to delete {file_id}: {e}")
    
    print(f"Successfully deleted {deleted_count}/{len(unique_file_ids)} files")
    
    # Delete vector store
    try:
        client.vector_stores.delete(vector_store_id)
        print(f"Deleted vector store: {vector_store_id}")
    except Exception as e:
        print(f"Failed to delete vector store: {e}")

cleanup()


CLEANUP
Deleted test files directory: c:\Users\lturakulov\Downloads\ZZZ_TEMP\ZZZ_CASES_WITH_RCA\Bosch_File_Uploads\test_files
Deleting uploaded files from Azure OpenAI...
Found 280 unique files to delete
Successfully deleted 280/280 files
Deleted vector store: vs_1VRTU0m5gDFo8jNI3n858QHr
