# 🧩 Notebook-08: File I/O, Memory Mapping, Views vs Copies in NumPy

In [1]:
import numpy as np
import os
import sys
from pathlib import Path

# ✅ Setup path
PROJECT_ROOT = Path.cwd().parent
DATA_DIR = PROJECT_ROOT / "datasets"
SCRIPT_DIR = PROJECT_ROOT / "scripts"
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))

# ✅ Import utility functions
from io_utils import (
    save_npy, load_npy, save_npz, load_npz, save_txt, load_txt,
    write_csv_with_missing_values, load_genfromtxt,
    create_memmap, load_memmap
)

print("💾 NumPy File I/O & Memory Management\n")

💾 NumPy File I/O & Memory Management



In [2]:
# ✅ 1. Saving and loading .npy files
arr = np.arange(10)
save_npy(DATA_DIR / "sample_array.npy", arr)
loaded = load_npy(DATA_DIR / "sample_array.npy")
print("Saved .npy array:", loaded)

Saved .npy array: [0 1 2 3 4 5 6 7 8 9]


In [3]:
# ✅ 2. Saving and loading multiple arrays (.npz format)
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
save_npz(DATA_DIR / "multiple_arrays.npz", array1=a, array2=b)
data = load_npz(DATA_DIR / "multiple_arrays.npz")
print("\nArray1 from .npz:", data["array1"])
print("Array2 from .npz:", data["array2"])


Array1 from .npz: [1 2 3]
Array2 from .npz: [4 5 6]


In [4]:
# ✅ 3. Saving to and loading from text
save_txt(DATA_DIR / "sample_data.txt", arr)
loaded_txt = load_txt(DATA_DIR / "sample_data.txt")
print("\nLoaded from .txt:", loaded_txt)


Loaded from .txt: [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]


In [5]:
# ✅ 4. Using genfromtxt (handling missing data)
write_csv_with_missing_values(DATA_DIR / "missing_data.csv")
data = load_genfromtxt(DATA_DIR / "missing_data.csv")
print("\nData with missing value handled:\n", data)


Data with missing value handled:
 [[ 1.  2.  3.]
 [ 4. nan  6.]
 [ 7.  8.  9.]]


In [6]:
# ✅ 5. Memory-mapped files (efficient large file I/O)
mmap_array = create_memmap(DATA_DIR / "memmap_array.dat")
print("\nMemory-mapped array:\n", mmap_array)
del mmap_array  # flush to disk


Memory-mapped array:
 [[0.65761036 0.47393706 0.48731482]
 [0.84995455 0.0568013  0.3221486 ]
 [0.21267675 0.81114644 0.7193266 ]]


In [7]:
# ✅ 6. Reload memory-mapped array
mmap_loaded = load_memmap(DATA_DIR / "memmap_array.dat")
print("Reloaded memmap array:\n", mmap_loaded)

Reloaded memmap array:
 [[0.65761036 0.47393706 0.48731482]
 [0.84995455 0.0568013  0.3221486 ]
 [0.21267675 0.81114644 0.7193266 ]]


In [8]:
# ✅ 7. Views vs. Copies
arr = np.array([1, 2, 3])
view = arr.view()
copy = arr.copy()
arr[0] = 100

print("\nOriginal array modified:", arr)
print("View (shares memory):", view)
print("Copy (independent):", copy)


Original array modified: [100   2   3]
View (shares memory): [100   2   3]
Copy (independent): [1 2 3]


In [9]:
# ✅ 8. Check memory sharing
print("arr.base is None:", arr.base is None)
print("view.base is arr:", view.base is arr)
print("copy.base is None:", copy.base is None)

arr.base is None: True
view.base is arr: True
copy.base is None: True


In [10]:
# ✅ 9. Using np.vectorize() to apply custom functions element-wise
def safe_divide(x, y):
    return 0 if y == 0 else x / y

a = np.array([10, 20, 30])
b = np.array([2, 0, 5])
vec_divide = np.vectorize(safe_divide)
print("\nVectorized divide:", vec_divide(a, b))


Vectorized divide: [5. 0. 6.]
