### 1. Understanding Python Memory Management
How Python allocates and manages memory.

In [None]:
import sys

# Check memory size of different objects
print("Memory size of different Python objects:\n")

# Integer
num = 42
print(f"Integer (42): {sys.getsizeof(num)} bytes")

# Float
float_num = 3.14
print(f"Float (3.14): {sys.getsizeof(float_num)} bytes")

# String
text = "Hello"
print(f"String ('Hello'): {sys.getsizeof(text)} bytes")

# List
my_list = [1, 2, 3, 4, 5]
print(f"List [1,2,3,4,5]: {sys.getsizeof(my_list)} bytes")

# Dictionary
my_dict = {'a': 1, 'b': 2}
print(f"Dictionary: {sys.getsizeof(my_dict)} bytes")

# Empty vs filled containers
print("\nContainer size comparison:")
empty_list = []
print(f"Empty list: {sys.getsizeof(empty_list)} bytes")
filled_list = list(range(100))
print(f"List with 100 items: {sys.getsizeof(filled_list)} bytes")

### 2. Reference Counting
Python's primary memory management mechanism.

In [None]:
import sys

# Reference counting demonstration
print("Reference Counting Example:\n")

# Create an object
a = [1, 2, 3]
print(f"Created list 'a': {a}")
print(f"Reference count: {sys.getrefcount(a)}")
# Note: getrefcount itself creates a temporary reference, so count is +1

# Add another reference
b = a
print(f"\nCreated reference 'b = a'")
print(f"Reference count of 'a': {sys.getrefcount(a)}")
print(f"a is b: {a is b}")

# Add to a list
c = [a, a]
print(f"\nAdded 'a' to list 'c' twice")
print(f"Reference count of 'a': {sys.getrefcount(a)}")

# Delete a reference
del b
print(f"\nDeleted reference 'b'")
print(f"Reference count of 'a': {sys.getrefcount(a)}")

# Delete all references
del c
print(f"\nDeleted reference 'c'")
print(f"Reference count of 'a': {sys.getrefcount(a)}")

### 3. Object Identity and Memory Address
Understanding object identity in memory.

In [None]:
# Object identity and memory addresses
print("Object Identity and Memory Addresses:\n")

# Small integers are cached
a = 5
b = 5
print(f"a = 5, b = 5")
print(f"id(a): {id(a)}")
print(f"id(b): {id(b)}")
print(f"a is b: {a is b}  # Same object in memory")

# Large integers are not cached
x = 1000
y = 1000
print(f"\nx = 1000, y = 1000")
print(f"id(x): {id(x)}")
print(f"id(y): {id(y)}")
print(f"x is y: {x is y}  # May be different objects")

# Lists always create new objects
list1 = [1, 2, 3]
list2 = [1, 2, 3]
print(f"\nlist1 = [1, 2, 3], list2 = [1, 2, 3]")
print(f"id(list1): {id(list1)}")
print(f"id(list2): {id(list2)}")
print(f"list1 is list2: {list1 is list2}  # Different objects")
print(f"list1 == list2: {list1 == list2}  # But equal values")

# String interning
s1 = "hello"
s2 = "hello"
print(f"\ns1 = 'hello', s2 = 'hello'")
print(f"id(s1): {id(s1)}")
print(f"id(s2): {id(s2)}")
print(f"s1 is s2: {s1 is s2}  # Same object (interned)")

### 4. Garbage Collection Basics
Introduction to Python's garbage collector.

In [None]:
import gc

# Check if garbage collection is enabled
print(f"Garbage collection enabled: {gc.isenabled()}")

# Get current collection thresholds
thresholds = gc.get_threshold()
print(f"\nGC Thresholds (gen0, gen1, gen2): {thresholds}")

# Get collection statistics
print(f"\nGC Statistics:")
stats = gc.get_stats()
for i, stat in enumerate(stats):
    print(f"Generation {i}: {stat}")

# Get number of objects tracked by GC
print(f"\nNumber of objects tracked: {len(gc.get_objects())}")

# Get GC counts
counts = gc.get_count()
print(f"\nGC counts (gen0, gen1, gen2): {counts}")
print("  gen0: objects created since last gen0 collection")
print("  gen1: number of gen0 collections since last gen1 collection")
print("  gen2: number of gen1 collections since last gen2 collection")

### 5. Circular References
Understanding and detecting circular references.

In [None]:
import gc
import sys

# Example of circular reference
class Node:
    def __init__(self, value):
        self.value = value
        self.next = None
    
    def __repr__(self):
        return f"Node({self.value})"
    
    def __del__(self):
        print(f"Deleting {self}")

print("Creating circular reference:")

# Create nodes
node1 = Node(1)
node2 = Node(2)

# Create circular reference
node1.next = node2
node2.next = node1

print(f"node1.next = {node1.next}")
print(f"node2.next = {node2.next}")
print(f"Reference count of node1: {sys.getrefcount(node1)}")
print(f"Reference count of node2: {sys.getrefcount(node2)}")

# Delete references
print("\nDeleting node1 and node2 variables...")
del node1
del node2

print("Variables deleted, but objects still in memory due to circular reference")

# Force garbage collection
print("\nForcing garbage collection...")
collected = gc.collect()
print(f"Collected {collected} objects")
print("Now the circular reference is broken and objects are freed")

### 6. Manual Garbage Collection
Controlling garbage collection manually.

In [None]:
import gc
import time

# Disable automatic garbage collection
print("Testing manual garbage collection:\n")

# Get initial counts
print(f"Initial GC counts: {gc.get_count()}")

# Create many objects
print("\nCreating 10000 objects...")
objects = []
for i in range(10000):
    objects.append([i] * 100)

print(f"GC counts after creation: {gc.get_count()}")

# Clear references
print("\nClearing references...")
objects.clear()

print(f"GC counts after clearing: {gc.get_count()}")

# Manual collection
print("\nPerforming manual garbage collection...")
start_time = time.time()
collected = gc.collect()
end_time = time.time()

print(f"Collected {collected} objects")
print(f"Collection took {(end_time - start_time) * 1000:.2f} ms")
print(f"GC counts after collection: {gc.get_count()}")

# Disabling/enabling GC
print("\n" + "="*50)
print("Testing GC enable/disable:\n")

print(f"GC enabled: {gc.isenabled()}")

gc.disable()
print(f"After gc.disable(): {gc.isenabled()}")

gc.enable()
print(f"After gc.enable(): {gc.isenabled()}")

### 7. Memory Profiling with tracemalloc
Tracking memory allocations.

In [None]:
import tracemalloc

# Start tracing memory allocations
tracemalloc.start()

# Take a snapshot before allocation
snapshot1 = tracemalloc.take_snapshot()

# Allocate memory
print("Allocating memory...\n")
large_list = [i for i in range(100000)]
large_dict = {i: i**2 for i in range(10000)}
large_string = "x" * 1000000

# Take a snapshot after allocation
snapshot2 = tracemalloc.take_snapshot()

# Compare snapshots
top_stats = snapshot2.compare_to(snapshot1, 'lineno')

print("Top 10 memory allocations:")
for stat in top_stats[:10]:
    print(stat)

# Get current memory usage
current, peak = tracemalloc.get_traced_memory()
print(f"\nCurrent memory usage: {current / 1024 / 1024:.2f} MB")
print(f"Peak memory usage: {peak / 1024 / 1024:.2f} MB")

# Stop tracing
tracemalloc.stop()

# Clean up
del large_list, large_dict, large_string

### 8. Weak References
Using weak references to avoid keeping objects alive.

In [None]:
import weakref
import gc

class LargeObject:
    def __init__(self, name):
        self.name = name
        self.data = [0] * 1000000  # 1 million integers
    
    def __repr__(self):
        return f"LargeObject({self.name})"
    
    def __del__(self):
        print(f"Deleting {self.name}")

print("Strong Reference Example:")
print("="*50)

# Strong reference keeps object alive
obj1 = LargeObject("obj1")
strong_ref = obj1

print(f"Created {obj1}")
print(f"Deleting obj1 variable...")
del obj1

print("Object still exists via strong_ref")
print(f"strong_ref: {strong_ref}")

del strong_ref
print("Now object is deleted\n")

gc.collect()

print("\nWeak Reference Example:")
print("="*50)

# Weak reference doesn't keep object alive
obj2 = LargeObject("obj2")
weak_ref = weakref.ref(obj2)

print(f"Created {obj2}")
print(f"Weak reference: {weak_ref()}")
print(f"\nDeleting obj2 variable...")
del obj2

gc.collect()

print("Object is deleted, weak reference is now dead")
print(f"Weak reference: {weak_ref()}  # Returns None")

print("\nWeakref Dictionary Example:")
print("="*50)

# WeakValueDictionary
cache = weakref.WeakValueDictionary()

obj3 = LargeObject("obj3")
cache['key1'] = obj3

print(f"Added to cache: {cache['key1']}")
print(f"Cache keys: {list(cache.keys())}")

del obj3
gc.collect()

print(f"\nAfter deleting obj3:")
print(f"Cache keys: {list(cache.keys())}  # Empty - object was collected")

### 9. Memory Leaks and Detection
Identifying and fixing memory leaks.

In [None]:
import gc
import sys

# Example 1: Global variable memory leak
print("Example 1: Global Variable Memory Leak")
print("="*50)

leaked_list = []

def memory_leak_example():
    """Function that leaks memory by appending to global list"""
    for i in range(1000):
        leaked_list.append([i] * 100)

print(f"Initial list size: {len(leaked_list)}")
memory_leak_example()
print(f"After function call: {len(leaked_list)} items")
print(f"Memory used by list: {sys.getsizeof(leaked_list) / 1024:.2f} KB\n")

# Fix: Clear the list
leaked_list.clear()
print("After clearing: memory can be reclaimed\n")

# Example 2: Circular reference memory leak
print("Example 2: Circular Reference")
print("="*50)

class Parent:
    def __init__(self):
        self.children = []

class Child:
    def __init__(self, parent):
        self.parent = parent
        parent.children.append(self)

# Create circular reference
parent = Parent()
child1 = Child(parent)
child2 = Child(parent)

print(f"Parent has {len(parent.children)} children")
print(f"Each child references parent: {child1.parent is parent}")

# Delete references
del parent, child1, child2
print("Variables deleted, running GC...")
collected = gc.collect()
print(f"Collected {collected} objects\n")

# Example 3: Using gc.get_referrers to debug
print("Example 3: Debugging with gc.get_referrers")
print("="*50)

class MyObject:
    pass

obj = MyObject()
list1 = [obj]
dict1 = {'key': obj}

print(f"Object: {obj}")
print(f"\nReferrers to this object:")
for referrer in gc.get_referrers(obj):
    print(f"  {type(referrer)}: {referrer if not isinstance(referrer, dict) or len(str(referrer)) < 100 else 'dict...'}")

# Clean up
del obj, list1, dict1

### 10. Best Practices: Using Context Managers
Automatic resource cleanup.

In [None]:
import sys

# Bad Practice: Manual resource management
print("❌ Bad Practice: Manual File Handling")
print("="*50)

def bad_file_handling():
    file = open('test_file.txt', 'w')
    file.write('Hello, World!')
    file.close()  # Easy to forget!
    # If an exception occurs, file won't be closed

# Good Practice: Using context manager
print("✅ Good Practice: Context Manager")
print("="*50)

def good_file_handling():
    with open('test_file.txt', 'w') as file:
        file.write('Hello, World!')
    # File is automatically closed, even if exception occurs

good_file_handling()
print("File handled safely with context manager\n")

# Custom context manager for memory tracking
print("Custom Context Manager for Memory Tracking:")
print("="*50)

class MemoryTracker:
    def __enter__(self):
        import tracemalloc
        tracemalloc.start()
        self.snapshot1 = tracemalloc.take_snapshot()
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        import tracemalloc
        self.snapshot2 = tracemalloc.take_snapshot()
        
        current, peak = tracemalloc.get_traced_memory()
        print(f"\nMemory usage during block:")
        print(f"  Current: {current / 1024:.2f} KB")
        print(f"  Peak: {peak / 1024:.2f} KB")
        
        tracemalloc.stop()
        return False

# Use the custom context manager
with MemoryTracker():
    # Create some objects
    data = [i**2 for i in range(10000)]
    print(f"Created list with {len(data)} items")

### 11. Best Practices: Generator vs List
Memory-efficient iteration.

In [None]:
import sys

# Compare memory usage: List vs Generator
print("Memory Comparison: List vs Generator")
print("="*50)

# List comprehension - stores all items in memory
list_comp = [x**2 for x in range(100000)]
print(f"List comprehension memory: {sys.getsizeof(list_comp) / 1024:.2f} KB")

# Generator expression - computes on-the-fly
gen_expr = (x**2 for x in range(100000))
print(f"Generator expression memory: {sys.getsizeof(gen_expr) / 1024:.2f} KB")

print(f"\nMemory saved: {(sys.getsizeof(list_comp) - sys.getsizeof(gen_expr)) / 1024:.2f} KB")

# Example: Processing large dataset
print("\n" + "="*50)
print("Processing Large Dataset Example:\n")

# Bad: Load all data into memory
def process_data_bad():
    data = [i for i in range(1000000)]  # All in memory
    results = [x * 2 for x in data]      # Another copy in memory
    return sum(results)

# Good: Use generators
def process_data_good():
    data = (i for i in range(1000000))   # Generator
    results = (x * 2 for x in data)       # Generator chain
    return sum(results)                   # Computed on-the-fly

import tracemalloc

# Test bad approach
tracemalloc.start()
result_bad = process_data_bad()
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print(f"Bad approach peak memory: {peak / 1024 / 1024:.2f} MB")

# Test good approach
tracemalloc.start()
result_good = process_data_good()
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print(f"Good approach peak memory: {peak / 1024 / 1024:.2f} MB")

print(f"\nBoth produce same result: {result_bad == result_good}")

### 12. Best Practices: Slots for Classes
Reducing memory overhead in classes.

In [None]:
import sys

# Regular class with __dict__
class RegularPoint:
    def __init__(self, x, y):
        self.x = x
        self.y = y

# Class with __slots__
class SlottedPoint:
    __slots__ = ['x', 'y']
    
    def __init__(self, x, y):
        self.x = x
        self.y = y

# Compare single instance
print("Single Instance Comparison:")
print("="*50)

regular = RegularPoint(10, 20)
slotted = SlottedPoint(10, 20)

print(f"Regular class size: {sys.getsizeof(regular)} bytes")
print(f"Regular __dict__ size: {sys.getsizeof(regular.__dict__)} bytes")
print(f"Total regular: {sys.getsizeof(regular) + sys.getsizeof(regular.__dict__)} bytes")

print(f"\nSlotted class size: {sys.getsizeof(slotted)} bytes")
print(f"No __dict__ attribute (uses slots)")

# Compare many instances
print("\n" + "="*50)
print("Many Instances Comparison:\n")

import tracemalloc

# Regular classes
tracemalloc.start()
regular_points = [RegularPoint(i, i*2) for i in range(100000)]
current, peak = tracemalloc.get_traced_memory()
regular_memory = peak
tracemalloc.stop()

del regular_points

# Slotted classes
tracemalloc.start()
slotted_points = [SlottedPoint(i, i*2) for i in range(100000)]
current, peak = tracemalloc.get_traced_memory()
slotted_memory = peak
tracemalloc.stop()

print(f"100,000 regular instances: {regular_memory / 1024 / 1024:.2f} MB")
print(f"100,000 slotted instances: {slotted_memory / 1024 / 1024:.2f} MB")
print(f"\nMemory saved with slots: {(regular_memory - slotted_memory) / 1024 / 1024:.2f} MB")
print(f"Reduction: {((regular_memory - slotted_memory) / regular_memory * 100):.1f}%")

del slotted_points

### 13. Best Practices: String Interning
Optimizing string memory usage.

In [None]:
import sys

# Automatic string interning
print("Automatic String Interning:")
print("="*50)

s1 = "hello"
s2 = "hello"
print(f"s1 = 'hello'")
print(f"s2 = 'hello'")
print(f"s1 is s2: {s1 is s2}  # Same object")
print(f"id(s1): {id(s1)}")
print(f"id(s2): {id(s2)}")

# String concatenation doesn't intern
print("\nString Concatenation:")
print("="*50)

s3 = "hello" + " world"
s4 = "hello" + " world"
print(f"s3 = 'hello' + ' world'")
print(f"s4 = 'hello' + ' world'")
print(f"s3 is s4: {s3 is s4}  # Different objects")
print(f"id(s3): {id(s3)}")
print(f"id(s4): {id(s4)}")

# Manual interning
print("\nManual String Interning:")
print("="*50)

s5 = sys.intern("hello world")
s6 = sys.intern("hello world")
print(f"s5 = sys.intern('hello world')")
print(f"s6 = sys.intern('hello world')")
print(f"s5 is s6: {s5 is s6}  # Same object after interning")
print(f"id(s5): {id(s5)}")
print(f"id(s6): {id(s6)}")

# Practical example: Many duplicate strings
print("\n" + "="*50)
print("Practical Example: Processing Log Files\n")

import tracemalloc

# Without interning
tracemalloc.start()
logs_without_intern = []
for i in range(10000):
    # Simulate log entries with repeated strings
    logs_without_intern.append("ERROR: Connection timeout")
    logs_without_intern.append("INFO: Request processed")
    logs_without_intern.append("WARNING: High memory usage")

current, peak = tracemalloc.get_traced_memory()
memory_without = peak
tracemalloc.stop()

# With interning
tracemalloc.start()
logs_with_intern = []
error_msg = sys.intern("ERROR: Connection timeout")
info_msg = sys.intern("INFO: Request processed")
warning_msg = sys.intern("WARNING: High memory usage")

for i in range(10000):
    logs_with_intern.append(error_msg)
    logs_with_intern.append(info_msg)
    logs_with_intern.append(warning_msg)

current, peak = tracemalloc.get_traced_memory()
memory_with = peak
tracemalloc.stop()

print(f"Without interning: {memory_without / 1024:.2f} KB")
print(f"With interning: {memory_with / 1024:.2f} KB")
print(f"Memory saved: {(memory_without - memory_with) / 1024:.2f} KB")
print(f"Reduction: {((memory_without - memory_with) / memory_without * 100):.1f}%")

### 14. Best Practices: Efficient Data Structures
Choosing the right data structure for memory efficiency.

In [None]:
import sys
from array import array
from collections import deque
import numpy as np

print("Memory Comparison of Data Structures")
print("="*50)

# Create same data in different structures
n = 100000

# List
python_list = list(range(n))
list_size = sys.getsizeof(python_list)

# Tuple
python_tuple = tuple(range(n))
tuple_size = sys.getsizeof(python_tuple)

# Array
python_array = array('i', range(n))
array_size = sys.getsizeof(python_array)

# NumPy array
numpy_array = np.arange(n, dtype=np.int32)
numpy_size = sys.getsizeof(numpy_array) + numpy_array.nbytes

print(f"\n100,000 integers stored in different structures:\n")
print(f"List:        {list_size / 1024:.2f} KB")
print(f"Tuple:       {tuple_size / 1024:.2f} KB  (immutable, slightly smaller)")
print(f"Array:       {array_size / 1024:.2f} KB  (typed, more efficient)")
print(f"NumPy Array: {numpy_size / 1024:.2f} KB  (most efficient for numbers)")

# Deque vs List for frequent insertions/deletions
print("\n" + "="*50)
print("Deque vs List for Operations:\n")

import time

# List - slow insertions at beginning
test_list = list(range(10000))
start = time.time()
for i in range(1000):
    test_list.insert(0, i)
list_time = time.time() - start

# Deque - fast insertions at both ends
test_deque = deque(range(10000))
start = time.time()
for i in range(1000):
    test_deque.appendleft(i)
deque_time = time.time() - start

print(f"1000 insertions at beginning:")
print(f"  List:  {list_time*1000:.2f} ms")
print(f"  Deque: {deque_time*1000:.2f} ms")
print(f"  Speedup: {list_time/deque_time:.1f}x")

# Set vs List for membership testing
print("\n" + "="*50)
print("Set vs List for Membership Testing:\n")

test_list = list(range(10000))
test_set = set(range(10000))

# List lookup
start = time.time()
for i in range(1000):
    _ = 9999 in test_list
list_time = time.time() - start

# Set lookup
start = time.time()
for i in range(1000):
    _ = 9999 in test_set
set_time = time.time() - start

print(f"1000 membership tests:")
print(f"  List: {list_time*1000:.2f} ms")
print(f"  Set:  {set_time*1000:.2f} ms")
print(f"  Speedup: {list_time/set_time:.0f}x")

print("\nKey Takeaways:")
print("  • Use tuples for immutable data (slightly more memory efficient)")
print("  • Use array/NumPy for numeric data (much more efficient)")
print("  • Use deque for frequent insertions/deletions")
print("  • Use set for membership testing")

### 15. Memory Management Best Practices Summary

In [None]:
# Summary of best practices
print("Memory Management Best Practices in Python")
print("="*60)

best_practices = [
    ("1. Use Generators", 
     "For large datasets, use generators instead of lists to process data on-the-fly"),
    
    ("2. Use Context Managers", 
     "Always use 'with' statement for files and resources to ensure cleanup"),
    
    ("3. Use __slots__", 
     "For classes with many instances, use __slots__ to reduce memory overhead"),
    
    ("4. String Interning", 
     "Use sys.intern() for frequently repeated strings to save memory"),
    
    ("5. Choose Efficient Data Structures", 
     "Use array/NumPy for numbers, deque for queues, set for membership tests"),
    
    ("6. Avoid Circular References", 
     "Break circular references or use weak references when appropriate"),
    
    ("7. Delete Large Objects", 
     "Explicitly delete large objects when no longer needed (del variable)"),
    
    ("8. Use Local Variables", 
     "Local variables are faster and cleaned up automatically"),
    
    ("9. Profile Memory Usage", 
     "Use tracemalloc and memory_profiler to identify memory bottlenecks"),
    
    ("10. Lazy Loading", 
     "Load data only when needed, not all at once"),
]

for title, description in best_practices:
    print(f"\n{title}")
    print(f"  → {description}")

print("\n" + "="*60)
print("\nCommon Memory Issues to Avoid:")
print("  ❌ Global variables holding large data")
print("  ❌ Circular references without cleanup")
print("  ❌ Loading entire files into memory")
print("  ❌ Creating unnecessary copies of data")
print("  ❌ Using lists when generators would suffice")
print("  ❌ Not closing file handles and connections")
print("  ❌ Accumulating data in caches without limits")

print("\n" + "="*60)
print("\nWhen to Manually Call gc.collect():")
print("  • After deleting many large objects")
print("  • Before memory-intensive operations")
print("  • In long-running applications with memory constraints")
print("  • After processing large batches of data")
print("\n  Note: Usually not needed - Python's automatic GC is efficient")