In [1]:
""" Imports """
import numpy as np
import zlib
import gc
import time
import sys

In [2]:
""" Compression Functions """
def compress_array(array):
    """ 
    input: array
    output: bytes 

    subfunctions: 
    - zlib.compress()
    """
    compressed = zlib.compress(array)
    return compressed


def decompress_array(compressed_array):
    """ 
    input: bytes
    output: np.array 

    subfunctions:
    - zlib.decompress()
    - np.frombuffer()
    """
    decompressed = zlib.decompress(compressed_array)
    return np.frombuffer(decompressed, dtype=np.int64)

In [3]:
""" Declarations & Initial Memory Check"""
# Set the seed for random number generation
np.random.seed(123)

# Create a large array of random integers
array = np.random.randint(0, 100, size=(100, 100))

# Print the type of the original array
print("Original array Type: ", type(array))

# Print the size of the original array
print("Original array size using .nbytes:", array.nbytes, "bytes")
print("Original array size using .getsizeof():", sys.getsizeof(array), "bytes")

words = "\nMemory checks found <array>.nbytes and sys.getsizeof()"
words += "\n\t <array>.nbytes: only works on np.arrays and returns size of stored data"
words += "\n\t sys.getsizeof(): returns size of stored data + overhead"
print(words)

Original array Type:  <class 'numpy.ndarray'>
Original array size using .nbytes: 80000 bytes
Original array size using .getsizeof(): 80128 bytes

Memory checks found <array>.nbytes and sys.getsizeof()
	 <array>.nbytes: only works on np.arrays and returns size of stored data
	 sys.getsizeof(): returns size of stored data + overhead


In [12]:
""" Compression Quick Example """

# Compress the array
compressed_array = compress_array(array)

# Calculate memory usage 
compressed_array_memory = sys.getsizeof(compressed_array)

print("Original array Type: ", type(array))
print("Original array Size: ",sys.getsizeof(array), "bytes" )

print("\nNew Compressed Type: ", type(compressed_array))
print("New Compressed Size: ", sys.getsizeof(compressed_array), "bytes")

decompressed_array = decompress_array(compressed_array)
decompressed_array_memory = sys.getsizeof(decompressed_array)

print("\nDecompressed Type: ", type(decompressed_array))
print("Decompressed Size: ", sys.getsizeof(decompressed_array), "bytes")


Original array Type:  <class 'numpy.ndarray'>
Original array Size:  8000128 bytes

New Compressed Type:  <class 'bytes'>
New Compressed Size:  1399256 bytes

Decompressed Type:  <class 'numpy.ndarray'>
Decompressed Size:  112 bytes


In [5]:
"""
Composite Program
- Memory usage checks 
    - Here we stress sys.getsizeof() to check data memory + overhead
- Timing checks
- Use of Garbage Collector (gc)

Relevant Functions used
- <array>.nbytes
- gc.disable()
- gc.collect()
- gc.enable()
- compress_array(<array>)
- decompress_array(<array>)
- sys.getsizeof(<object>)
"""

# Set the seed for random number generation
np.random.seed(123)

# Create a large array of random integers
array = np.random.randint(0, 100, size=(1000, 1000))

# Print the size of the original array
print("Original array size using .nbytes:", array.nbytes, "bytes")
print("Original array size using .getsizeof():", sys.getsizeof(array), "bytes")

# Disable automatic garbage collection
# gc.disable()

# Compress the array
start_time = time.time()
compressed_array = compress_array(array)
compression_time = time.time() - start_time

# Print the size of the compressed array
print("Compressed array size:", len(compressed_array), "bytes")

# Explicitly trigger garbage collection
# gc.collect()

# Decompress the array
start_time = time.time()
decompressed_array = decompress_array(compressed_array)
decompression_time = time.time() - start_time

# Check if the decompressed array matches the original array
print("Arrays are equal:", np.array_equal(array, decompressed_array))

# Calculate memory usage
array_memory = sys.getsizeof(array)
compressed_array_memory = sys.getsizeof(compressed_array)
decompressed_array_memory = sys.getsizeof(decompressed_array)
total_memory_usage = array_memory + compressed_array_memory + decompressed_array_memory

# Print the breakdown of memory usage
print("Array memory usage:", array_memory, "bytes")
print("Compressed array memory usage:", compressed_array_memory, "bytes")
print("Decompressed array memory usage:", decompressed_array_memory, "bytes")
print("Total memory usage:", total_memory_usage, "bytes")

# Re-enable automatic garbage collection
# gc.enable()

# Calculate the total execution time
total_execution_time = compression_time + decompression_time

# Print the compression and decompression times
print("Compression time:", compression_time, "seconds")
print("Decompression time:", decompression_time, "seconds")

# Print the total execution time
print("Total execution time:", total_execution_time, "seconds")



Original array size using .nbytes: 8000000 bytes
Original array size using .getsizeof(): 8000128 bytes
Compressed array size: 1399223 bytes
Arrays are equal: False
Array memory usage: 8000128 bytes
Compressed array memory usage: 1399256 bytes
Decompressed array memory usage: 112 bytes
Total memory usage: 9399496 bytes
Compression time: 0.9768402576446533 seconds
Decompression time: 0.04046893119812012 seconds
Total execution time: 1.0173091888427734 seconds


In [14]:
"""
Composite Program
- Memory usage checks 
    - Here we stress sys.getsizeof() to check data memory + overhead
- Timing checks
- Use of Garbage Collector (gc)

Relevant Functions used
- <array>.nbytes
- gc.disable()
- gc.collect()
- gc.enable()
- compress_array(<array>)
- decompress_array(<array>)
- sys.getsizeof(<object>)
"""

# Set the seed for random number generation
np.random.seed(123)

# Create a large array of random integers
array = np.random.randint(0, 100, size=(1000, 1000))

# Print the size of the original array
print("Original array size using .nbytes:", array.nbytes, "bytes")
print("Original array size using .getsizeof():", sys.getsizeof(array), "bytes")

# Disable automatic garbage collection
# gc.disable()



# Compress the array & track timing 
start_time = time.time()
compressed_array = compress_array(array)
compression_time = time.time() - start_time

print("\nCompressed array size using .getsizeof():", sys.getsizeof(compressed_array), "bytes")

# Explicitly trigger garbage collector 
# gc.collect()            # return memory right away

# Decompress the array and track timing 
start_time = time.time()
decompressed_array = decompress_array(compressed_array)
decompression_time = time.time() - start_time

# Check if the decompressed array matches the original array
print("Arrays are equal:", np.array_equal(array, decompressed_array))

# Calculate memory usage
array_memory = sys.getsizeof(array)
compressed_array_memory = sys.getsizeof(compressed_array)
decompressed_array_memory = sys.getsizeof(decompressed_array)
total_memory_usage = array_memory + compressed_array_memory + decompressed_array_memory




Original array size using .nbytes: 8000000 bytes
Original array size using .getsizeof(): 8000128 bytes
Compressed array size using .getsizeof(): 1399256 bytes
