## Example  - Simple Station Distance Calculation

A clean, simple example that simulates your bike rental scenario:
- **Reader**: Loads data files and creates station lookup table
- **Workers**: Calculate distances between stations with caching
- **Single lock**: Simple approach for shared data access

In [10]:
import multiprocessing as mp
import random
import time

def data_reader(queue, lookup_table, lock, num_pairs=30, total_files=3):
    """Simulates loading bike rental data files"""
    print("Reader: Starting...")
    
    for file_num in range(1, total_files + 1):
        print(f"Reader: Processing file {file_num}/{total_files}")
        
        # Add new stations to lookup table
        with lock:
            for station_id in range(1, 5 + file_num + 1):  
                if station_id not in lookup_table:
                    # Simulate station coordinates
                    lookup_table[station_id] = random.uniform(10.0, 100.0)
        
        # Generate trip pairs (start_station, end_station)
        for i in range(num_pairs):
            start_station = random.randint(1, 5 + file_num)
            end_station = random.randint(1, 5 + file_num)
            if start_station != end_station:
                trip_id = file_num * 1000 + i
                queue.put((trip_id, start_station, end_station))
            time.sleep(0.1)  # simulate file reading
        
        time.sleep(0.5)  # simulate file processing
    
    # Signal workers to stop
    queue.put(None)
    print("Reader: Finished")

def distance_worker(worker_id, queue, lookup_table, distance_cache, results, lock):
    """Calculates distances between stations"""
    print(f"Worker {worker_id}: Starting...")
    processed = 0
    
    while True:
        item = queue.get()
        
        if item is None:
            queue.put(None)  # pass shutdown signal to other workers
            break
        
        trip_id, start_station, end_station = item
        cache_key = (start_station, end_station)  # directional: (1,2) != (2,1)
        
        # Check cache first
        with lock:
            if cache_key in distance_cache:
                distance = distance_cache[cache_key]
                results.append((trip_id, start_station, end_station, distance, f"Worker{worker_id}", "cached"))
                processed += 1
                continue
        
        # Calculate new distance
        with lock:
            coord1 = lookup_table[start_station]
            coord2 = lookup_table[end_station]
        
        # Simulate slow distance calculation (like OSM routing)
        time.sleep(0.5)
        distance = abs(coord1 - coord2)  # simple distance
        
        # Save to cache and results
        with lock:
            distance_cache[cache_key] = distance
            results.append((trip_id, start_station, end_station, distance, f"Worker{worker_id}", "calculated"))
        
        processed += 1
    
    print(f"Worker {worker_id}: Processed {processed} items")

In [11]:
import multiprocessing as mp
import threading
import time

# Setup shared data structures
manager = mp.Manager()
queue = manager.Queue()
lookup_table = manager.dict()  # station_id -> coordinate
distance_cache = manager.dict()  # (start_station, end_station) -> distance
results = manager.list()  # trip results
lock = manager.Lock()  # single lock for all shared data

# Configuration
num_workers = 3
total_files = 3

# Create processes
reader_process = mp.Process(
    target=data_reader,
    args=(queue, lookup_table, lock, 20, total_files)
)

worker_processes = [
    mp.Process(
        target=distance_worker,
        args=(i, queue, lookup_table, distance_cache, results, lock)
    )
    for i in range(num_workers)
]

print(f"Starting 1 reader and {num_workers} workers...")
start_time = time.time()

# Start all processes
reader_process.start()
for worker in worker_processes:
    worker.start()

# Wait for completion
reader_process.join()
for worker in worker_processes:
    worker.join()

end_time = time.time()
print(f"\n✅ Completed in {end_time - start_time:.2f} seconds")

Starting 1 reader and 3 workers...
Reader: Starting...Reader: Processing file 1/3



Worker 0: Starting...
Worker 1: Starting...Worker 2: Starting...

Reader: Processing file 2/3
Reader: Processing file 3/3
Reader: FinishedWorker 0: Processed 19 itemsWorker 1: Processed 16 items
Worker 2: Processed 21 items



✅ Completed in 7.59 seconds


In [13]:
# Analyze results
print(f"\n📊 Results Summary:")
print(f"Total trips processed: {len(results)}")
print(f"Stations in lookup table: {len(lookup_table)}")
print(f"Cached routes: {len(distance_cache)}")

# Count cached vs calculated
cached_count = sum(1 for r in results if r[5] == "cached")
calculated_count = sum(1 for r in results if r[5] == "calculated")

if cached_count + calculated_count > 0:
    print(f"Cached results: {cached_count}")
    print(f"Calculated results: {calculated_count}")
    print(f"Cache hit rate: {cached_count/(cached_count+calculated_count)*100:.1f}%")

# Show sample results
print(f"\n🔍 Sample results (first 8):")
for i, result in enumerate(sorted(list(results))[:8]):
    trip_id, start, end, dist, worker, source = result
    status = "💾" if source == "cached" else "🔄"
    print(f"  {status} Trip {trip_id}: {start}→{end}, Distance: {dist:.2f}, {worker}")

# Worker distribution
worker_counts = {}
for result in results:
    worker = result[4]
    worker_counts[worker] = worker_counts.get(worker, 0) + 1

print(f"\n👥 Work distribution:")
for worker, count in sorted(worker_counts.items()):
    print(f"  {worker}: {count} trips")

print(f"\n🔧 Architecture:")
print(f"  ✓ Single lock for simplicity")
print(f"  ✓ Shared results list")
print(f"  ✓ Directional cache: (1,2) ≠ (2,1)")
print(f"  ✓ Queue-based work distribution")


📊 Results Summary:
Total trips processed: 56
Stations in lookup table: 8
Cached routes: 34
Cached results: 20
Calculated results: 36
Cache hit rate: 35.7%

🔍 Sample results (first 8):
  🔄 Trip 1000: 6→5, Distance: 61.73, Worker0
  🔄 Trip 1001: 6→2, Distance: 19.29, Worker2
  🔄 Trip 1002: 2→4, Distance: 55.13, Worker1
  🔄 Trip 1003: 6→1, Distance: 5.01, Worker0
  🔄 Trip 1004: 2→4, Distance: 55.13, Worker2
  🔄 Trip 1005: 2→3, Distance: 23.39, Worker1
  💾 Trip 1006: 6→2, Distance: 19.29, Worker0
  🔄 Trip 1007: 1→2, Distance: 24.30, Worker0

👥 Work distribution:
  Worker0: 19 trips
  Worker1: 16 trips
  Worker2: 21 trips

🔧 Architecture:
  ✓ Single lock for simplicity
  ✓ Shared results list
  ✓ Directional cache: (1,2) ≠ (2,1)
  ✓ Queue-based work distribution


In [None]:
for station_id, coord in sorted(lookup_table.items()):
    print(f"  Station {station_id}: Coordinate {coord:.2f}")


📍 Final Station Lookup Table:
  Station 1: Coordinate 15.65
  Station 2: Coordinate 39.94
  Station 3: Coordinate 63.33
  Station 4: Coordinate 95.07
  Station 5: Coordinate 82.38
  Station 6: Coordinate 20.65
  Station 7: Coordinate 10.67
  Station 8: Coordinate 63.43
