In [1]:
# Import the NumPy library
import numpy as np
import os # Import os module to manage files

# --- Create Sample Arrays ---
arr1 = np.arange(10)
arr2 = np.random.rand(3, 4) # 3x4 array of random floats
arr3 = np.array(['apple', 'banana', 'cherry'], dtype=str)

print("--- Sample Arrays ---")
print(f"arr1: {arr1}")
print(f"arr2:\n{arr2}")
print(f"arr3: {arr3}")
print("-" * 30)

# Define filenames
npy_file = 'single_array.npy'
npz_uncompressed_file = 'multi_array_uncompressed.npz'
npz_compressed_file = 'multi_array_compressed.npz'
txt_file = 'array_data.txt'
csv_file = 'array_data.csv'


# --- 1. NumPy Binary Files (.npy) ---
# Efficient way to store a *single* NumPy array on disk.
# Preserves data type, shape, etc. Not human-readable.

print("--- Saving/Loading Single Array (.npy) ---")
# Save arr2 to a .npy file
np.save(npy_file, arr2)
print(f"Saved arr2 to '{npy_file}'")

# Load the array back from the .npy file
loaded_arr2 = np.load(npy_file)
print(f"\nLoaded array from '{npy_file}':\n{loaded_arr2}")

# Verify if loaded array is the same as original
print(f"Are loaded and original arr2 the same? {np.array_equal(arr2, loaded_arr2)}")
print("-" * 30)


# --- 2. NumPy Archive Files (.npz) ---
# Efficient way to store *multiple* NumPy arrays in a single file.
# Arrays are saved in an uncompressed format by default (.npz).

print("--- Saving/Loading Multiple Arrays (.npz uncompressed) ---")
# Save arr1 and arr3 to an uncompressed .npz file
# Use keyword arguments to name the arrays within the archive
np.savez(npz_uncompressed_file, first_array=arr1, string_array=arr3, unnamed_array=arr2)
print(f"Saved multiple arrays to '{npz_uncompressed_file}'")

# Load the .npz file. Returns a dictionary-like NpzFile object.
loaded_npz = np.load(npz_uncompressed_file)
print(f"\nLoaded archive '{npz_uncompressed_file}'")
print(f"Arrays available in the archive: {loaded_npz.files}") # List saved array names

# Access individual arrays using their names (like dictionary keys)
loaded_arr1_from_npz = loaded_npz['first_array']
loaded_arr3_from_npz = loaded_npz['string_array']
loaded_arr2_from_npz = loaded_npz['unnamed_array'] # Accessing the one saved without explicit name

print(f"\nLoaded 'first_array': {loaded_arr1_from_npz}")
print(f"Loaded 'string_array': {loaded_arr3_from_npz}")
print(f"\nLoaded 'unnamed_array':\n{loaded_arr2_from_npz}")

# Close the file explicitly if needed (often handled automatically)
loaded_npz.close()
print("-" * 20)

# --- Compressed .npz Files ---
# Use np.savez_compressed() to save multiple arrays with compression.
# Useful for large arrays to save disk space, but can be slower to save/load.

print("--- Saving/Loading Multiple Arrays (.npz compressed) ---")
np.savez_compressed(npz_compressed_file, array_a=arr1, array_b=arr2)
print(f"Saved multiple arrays compressed to '{npz_compressed_file}'")

# Loading is the same as for uncompressed .npz
loaded_compressed = np.load(npz_compressed_file)
print(f"\nLoaded compressed archive '{npz_compressed_file}'")
print(f"Arrays available: {loaded_compressed.files}")
print(f"Loaded 'array_a': {loaded_compressed['array_a']}")
loaded_compressed.close()
print("-" * 30)


# --- 3. Text Files (.txt, .csv, etc.) ---
# Human-readable format, but less efficient for storage and loading.
# Can lose precision for floats if not formatted carefully.
# Primarily suitable for 1D and 2D arrays.

print("--- Saving to Text File (.txt / .csv) ---")
# np.savetxt(filename, array, fmt='%.18e', delimiter=' ', ...)
# fmt: Specifies the format for each element (e.g., '%.2f' for 2 decimal places, '%d' for integer)
# delimiter: Character separating values in each row (e.g., ' ' for space, ',' for comma, '\t' for tab)

# Save arr2 to a space-delimited text file with specific float format
np.savetxt(txt_file, arr2, fmt='%.5f', delimiter=' ', header='Col1 Col2 Col3 Col4', footer='End of data', comments='# ')
print(f"Saved arr2 to '{txt_file}' with custom format, header, footer.")

# Save arr1 (1D) to a comma-separated file (CSV) as integers
# Need to reshape 1D array to 2D (e.g., a column vector) for standard CSV structure, or save as single row
arr1_col = arr1[:, np.newaxis] # Reshape to (10, 1)
np.savetxt(csv_file, arr1_col, fmt='%d', delimiter=',', header='Value', comments='')
print(f"\nSaved arr1 (as column) to '{csv_file}' as CSV.")
print("-" * 20)

print("--- Loading from Text File (.txt / .csv) ---")
# np.loadtxt(filename, dtype=<class 'float'>, comments='#', delimiter=None, skiprows=0, ...)
# dtype: Data type to read the data as.
# comments: Character indicating start of comments (lines starting with this are ignored).
# delimiter: Character separating values. Guesses whitespace if None.
# skiprows: Number of initial lines to skip (e.g., for headers).

# Load data from the text file, skipping header
loaded_txt = np.loadtxt(txt_file, dtype=np.float64, delimiter=' ', skiprows=1)
print(f"Loaded data from '{txt_file}':\n{loaded_txt}")
print(f"Shape: {loaded_txt.shape}") # Note: Footer is ignored by default comments='#'

# Load data from the CSV file, skipping header
loaded_csv = np.loadtxt(csv_file, dtype=int, delimiter=',', skiprows=1)
print(f"\nLoaded data from '{csv_file}':\n{loaded_csv}") # Loads as a 1D array if only one column
print(f"Shape: {loaded_csv.shape}")

# Note: For more complex CSVs (mixed types, missing values), Pandas read_csv is often more robust.
print("-" * 30)


# --- Clean up created files ---
print("--- Cleaning up created files ---")
try:
    os.remove(npy_file)
    print(f"Removed '{npy_file}'")
    os.remove(npz_uncompressed_file)
    print(f"Removed '{npz_uncompressed_file}'")
    os.remove(npz_compressed_file)
    print(f"Removed '{npz_compressed_file}'")
    os.remove(txt_file)
    print(f"Removed '{txt_file}'")
    os.remove(csv_file)
    print(f"Removed '{csv_file}'")
except OSError as e:
    print(f"Error removing files: {e}")
print("-" * 30)

--- Sample Arrays ---
arr1: [0 1 2 3 4 5 6 7 8 9]
arr2:
[[0.20507578 0.62692333 0.31293911 0.23656929]
 [0.14998491 0.65965589 0.2058041  0.14057722]
 [0.17982907 0.96187003 0.19164681 0.44407939]]
arr3: ['apple' 'banana' 'cherry']
------------------------------
--- Saving/Loading Single Array (.npy) ---
Saved arr2 to 'single_array.npy'

Loaded array from 'single_array.npy':
[[0.20507578 0.62692333 0.31293911 0.23656929]
 [0.14998491 0.65965589 0.2058041  0.14057722]
 [0.17982907 0.96187003 0.19164681 0.44407939]]
Are loaded and original arr2 the same? True
------------------------------
--- Saving/Loading Multiple Arrays (.npz uncompressed) ---
Saved multiple arrays to 'multi_array_uncompressed.npz'

Loaded archive 'multi_array_uncompressed.npz'
Arrays available in the archive: ['first_array', 'string_array', 'unnamed_array']

Loaded 'first_array': [0 1 2 3 4 5 6 7 8 9]
Loaded 'string_array': ['apple' 'banana' 'cherry']

Loaded 'unnamed_array':
[[0.20507578 0.62692333 0.31293911 0.236