In [1]:
""" Imports """
import numpy as np
import zlib
import gc
import time
import sys

# Compression Functions

In [2]:
""" Compression Functions """
def compress_array(array):
    """ 
    input: array
    output: bytes 

    subfunctions: 
    - zlib.compress()
    """
    compressed = zlib.compress(array)
    return compressed


def decompress_array(compressed_array):
    """ 
    input: bytes
    output: np.array 

    subfunctions:
    - zlib.decompress()
    - np.frombuffer()
    """
    decompressed = zlib.decompress(compressed_array)
    return np.frombuffer(decompressed, dtype=np.int64)

# Declarations & Initial Memory Check

In [3]:
""" Declarations & Initial Memory Check"""
# Set the seed for random number generation
np.random.seed(123)

# Create a large array of random integers
array = np.random.randint(0, 100, size=(3, 3))
print(" Just to visualize the array: \n", array)
# array = np.random.randint(0, 100, size=(100, 100))


#------------------------------------------------------------------------------



# Print the type of the original array
print("Original array Type: ", type(array))

# Print the size of the original array
print("Original array size using .nbytes:", array.nbytes, "bytes")
print("Original array size using .getsizeof():", sys.getsizeof(array), "bytes")

words = "\nMemory checks found <array>.nbytes and sys.getsizeof()"
words += "\n\t <array>.nbytes: only works on np.arrays and returns size of stored data"
words += "\n\t sys.getsizeof(): returns size of stored data + overhead"
print(words)

 Just to visualize the array: 
 [[66 92 98]
 [17 83 57]
 [86 97 96]]
Original array Type:  <class 'numpy.ndarray'>
Original array size using .nbytes: 72 bytes
Original array size using .getsizeof(): 200 bytes

Memory checks found <array>.nbytes and sys.getsizeof()
	 <array>.nbytes: only works on np.arrays and returns size of stored data
	 sys.getsizeof(): returns size of stored data + overhead


# Garbage Collector & Reference Counting: Automatic 

In [4]:
""" Automatic """
import sys
import gc

# Create an object
x = [1, 2, 3]

# Get reference count
ref_count = sys.getrefcount(x)

print("Reference count of x:", ref_count)

# get the current collection
# thresholds as a tuple
print("Garbage collection thresholds:",
                    gc.get_threshold())

Geeks = "\nHere, the default threshold on the above system is 700. " 
Geeks += "\nThis means when the number of allocations vs. the number of deallocations "
Geeks += "\nis greater than 700 the automatic garbage collector will run. "
Geeks += "\nThus any portion of your code which frees up large blocks of memory is a "
Geeks += "\ngood candidate for running manual garbage collection."

print(Geeks)

Reference count of x: 2
Garbage collection thresholds: (700, 10, 10)

Here, the default threshold on the above system is 700. 
This means when the number of allocations vs. the number of deallocations 
is greater than 700 the automatic garbage collector will run. 
Thus any portion of your code which frees up large blocks of memory is a 
good candidate for running manual garbage collection.


# Garbage Collector & Reference Counting: Manual 

In [5]:
""" Manual one variable vs cycling """

import sys
import gc

# Create an object
x = [1, 2, 3]

# Get reference count
ref_count = sys.getrefcount(x)

# Returns the number of
# objects it has collected
# and deallocated
collected = gc.collect()

print("Reference count of x:", ref_count)



i = 0
 
# create a cycle and on each iteration x as a dictionary
# assigned to 1
def create_cycle():
    x = { }
    x[i+1] = x
    print(x)
 
# lists are cleared whenever a full collection or
# collection of the highest generation (2) is run
collected = gc.collect() # or gc.collect(2)
print("Garbage collector: collected %d objects." % (collected))
 
print("Creating cycles...")
for i in range(10):
    create_cycle()
 
collected = gc.collect()
 
print("Garbage collector: collected %d objects." % (collected))

Reference count of x: 2
Garbage collector: collected 0 objects.
Creating cycles...
{1: {...}}
{2: {...}}
{3: {...}}
{4: {...}}
{5: {...}}
{6: {...}}
{7: {...}}
{8: {...}}
{9: {...}}
{10: {...}}
Garbage collector: collected 10 objects.


In [6]:
""" Compression Quick Example """

# Compress the array
compressed_array = compress_array(array)

# Calculate memory usage 
compressed_array_memory = sys.getsizeof(compressed_array)

# Decompress the bytes and reformat into an array 
decompressed_array = decompress_array(compressed_array)

# Calculate the memory usage 
decompressed_array_memory = sys.getsizeof(decompressed_array)

#------------------------------------------------------------------------------

print("Original array Type: ", type(array))
print("Original array Size: ",sys.getsizeof(array), "bytes" )

print("\nCompressed Type: ", type(compressed_array))
print("Compressed Size: ", sys.getsizeof(compressed_array), "bytes")


print("\nDecompressed Type: ", type(decompressed_array))
print("Decompressed Size: ", sys.getsizeof(decompressed_array), "bytes")




Original array Type:  <class 'numpy.ndarray'>
Original array Size:  200 bytes

Compressed Type:  <class 'bytes'>
Compressed Size:  65 bytes

Decompressed Type:  <class 'numpy.ndarray'>
Decompressed Size:  112 bytes


In [7]:
"""
Composite Program
- Memory usage checks 
    - Here we stress sys.getsizeof() to check data memory + overhead
- Timing checks
- Use of Garbage Collector (gc)

Relevant Functions used
- <array>.nbytes
- gc.disable()
- gc.collect()
- gc.enable()
- compress_array(<array>)
- decompress_array(<array>)
- sys.getsizeof(<object>)
"""
import numpy as np
import zlib
import gc
import time
import sys

""" Compression Functions """
def compress_array(array):
    """ 
    input: array
    output: bytes 

    subfunctions: 
    - zlib.compress()
    """
    compressed = zlib.compress(array)
    return compressed


def decompress_array(compressed_array):
    """ 
    input: bytes
    output: np.array 

    subfunctions:
    - zlib.decompress()
    - np.frombuffer()
    """
    decompressed = zlib.decompress(compressed_array)
    return np.frombuffer(decompressed, dtype=np.int64)


total_start_time = time.time()
# Set the seed for random number generation
np.random.seed(123)

# Create a large array of random integers
array = np.random.randint(0, 100, size=(50, 50))

# Print the size of the original array
print("Original array size using .nbytes:", array.nbytes, "bytes")
print("Original array size using .getsizeof():", sys.getsizeof(array), "bytes")


# Compress the array & track timing 
start_time = time.time()
compressed_array = compress_array(array)
compression_time = time.time() - start_time

print("\nCompressed array size using .getsizeof():", sys.getsizeof(compressed_array), "bytes")

# Explicitly trigger garbage collector 
gc.collect()           

# Decompress the array and track timing 
start_time = time.time()
decompressed_array = decompress_array(compressed_array)
decompression_time = time.time() - start_time

# Operation on decompressed array 
zeros = decompressed_array * 0

# Check if the decompressed array matches the original array
print("Arrays are equal:", np.array_equal(array, decompressed_array))

# Calculate memory usage
array_memory = sys.getsizeof(array)
compressed_array_memory = sys.getsizeof(compressed_array)
decompressed_array_memory = sys.getsizeof(decompressed_array)
total_memory_usage = array_memory + compressed_array_memory + decompressed_array_memory
zeros_memory = sys.getsizeof(zeros)

# Print the breakdown of memory usage
print("\nArray memory usage:", array_memory, "bytes")
print("Compressed array memory usage:", compressed_array_memory, "bytes")
print("Decompressed array memory usage:", decompressed_array_memory, "bytes")
print("Total memory usage:", total_memory_usage, "bytes")

# Calculate the total execution time
total_execution_time = compression_time + decompression_time

# Print the compression and decompression times
print("\nCompression time:", compression_time, "seconds")
print("Decompression time:", decompression_time, "seconds")

# Print the total execution time
print("Total execution time:", total_execution_time, "seconds")


print(array)
print(decompressed_array)
print(zeros)


sim_time = time.time() - total_start_time
print("Total sim time:", sim_time, "seconds")

Original array size using .nbytes: 20000 bytes
Original array size using .getsizeof(): 20128 bytes

Compressed array size using .getsizeof(): 3783 bytes
Arrays are equal: False

Array memory usage: 20128 bytes
Compressed array memory usage: 3783 bytes
Decompressed array memory usage: 112 bytes
Total memory usage: 24023 bytes

Compression time: 0.0018534660339355469 seconds
Decompression time: 0.00031828880310058594 seconds
Total execution time: 0.002171754837036133 seconds
[[66 92 98 ... 75 34 58]
 [10 22 77 ...  9 87 14]
 [83 70 12 ... 40 88 63]
 ...
 [48 60 94 ... 12 80 92]
 [58 45 82 ... 27 62 86]
 [16 68 73 ... 89 98 63]]
[66 92 98 ... 89 98 63]
[0 0 0 ... 0 0 0]
Total sim time: 0.05933976173400879 seconds
