In [6]:
import numpy as np
import time
import os
import psutil
import mmap

# -------------------------------
# Helper: Check memory usage
# -------------------------------
def memory_usage_mb():
    process = psutil.Process(os.getpid())
    mem_bytes = process.memory_info().rss
    return mem_bytes / (1024 ** 2)

# -------------------------------
# Save numpy embeddings to .mmap file
# -------------------------------
def save_embeddings_mmap(embeddings, filename):
    embeddings.astype(np.float32).tofile(filename)
    return embeddings.shape

# -------------------------------
# Memory-mapped embeddings loader
# -------------------------------
class MemoryMappedEmbeddings:
    def __init__(self, embeddings_file, shape, dtype=np.float32):
        self.file = open(embeddings_file, 'r+b')
        self.mmap = mmap.mmap(self.file.fileno(), 0)
        self.embeddings = np.frombuffer(self.mmap, dtype=dtype).reshape(shape)

    def __getitem__(self, idx):
        return self.embeddings[idx]

    def close(self):
        self.embeddings = None  # release view before closing mmap
        self.mmap.close()
        self.file.close()

# -------------------------------
# Generate random embeddings
# -------------------------------
embeddings = np.random.rand(10000, 768).astype(np.float32)
print(f"Embeddings shape: {embeddings.shape}")

# -------------------------------
# Save to file for memory mapping
# -------------------------------
filename = "embeddings.mmap"
shape = save_embeddings_mmap(embeddings, filename)
print(f"Saved embeddings with shape {shape}")

# -------------------------------
# Benchmark full load
# -------------------------------
start = time.time()
normal_loaded = np.fromfile(filename, dtype=np.float32).reshape(shape)
load_time = time.time() - start
mem_used = memory_usage_mb()
print(f"Normal load time: {load_time:.2f} sec, memory usage: {mem_used:.2f} MB")

# -------------------------------
# Benchmark memory-mapped init
# -------------------------------
start = time.time()
memmap_embeddings = MemoryMappedEmbeddings(filename, shape)
init_time = time.time() - start
mem_used = memory_usage_mb()
print(f"Memory-mapped init time: {init_time:.2f} sec, memory usage: {mem_used:.2f} MB")

# -------------------------------
# Benchmark access latency
# -------------------------------
start = time.time()
vec = memmap_embeddings[42].copy()
latency = time.time() - start
print(f"Memory-mapped single vector access latency: {latency:.6f} sec")

# -------------------------------
# Cleanup
# -------------------------------
memmap_embeddings.close()



Embeddings shape: (10000, 768)
Saved embeddings with shape (10000, 768)
Normal load time: 0.01 sec, memory usage: 149.65 MB
Memory-mapped init time: 0.00 sec, memory usage: 149.65 MB
Memory-mapped single vector access latency: 0.000096 sec


#  Memory-Mapped Embeddings: Fast & Efficient Access

This notebook shows how **memory mapping** improves performance when working with large embedding files.

We compare:
- **Standard loading** (loads entire file into RAM)
- **Memory-mapped loading** (lazy-loads on-demand)

---

## Why Use Memory Mapping?

When working with large files like sentence embeddings or model output vectors:
- Loading the full `.npy` or `.pt` file can consume GBs of memory.
- It’s slow if you only need to use a subset of data.

🔹 **Memory Mapping** keeps the file on disk and reads only the part you need — just-in-time!

Benefits:
-  INstant load time
-  Lower memory usage
-  Fast access to specific rows

---

##  What We Do Here

- Generate random 10,000 x 768 embeddings.
- Save them to disk in `.mmap` format.
- Measure:
  - Time to load normally
  - Time to initialize memory-mapped version
  - Time to access a single vector


## When to Use This

- You work with huge embeddings or model outputs (GBs).
- You don’t need to load everything at once.
- You care about startup time or memory footprint.

---

## Caution

- Memory-mapping is read-only here — don't write directly!
- If you accidentally retain `.mmap` references (e.g. slicing), you may hit `BufferError` while closing. Use `.copy()` to avoid.

---

**In one line**: Memory-mapping is perfect for large-scale embedding handling without RAM overload.

