In [2]:
# Module 5: Performance Optimization Techniques 

# 1. • Profiling and Benchmarking 
# o Using cProfile, line_profiler, and timeit 
# o Visualizing performance with SnakeViz 
# • Memory Profiling 
# o Using memory_profiler and tracemalloc 
# o Reducing memory footprint 

# 2. • Code Optimization Strategies 
# o Efficient data handling (NumPy, array, memoryviews) 
# o Vectorization and batching 
# o Caching and memoization techniques 

# 3. • Accelerating Python 
# o Just-in-Time compilation with Numba 
# o Using Cython for performance-critical modules 
# o Integrating with C/C++ via ctypes or cffi

In [None]:
# What is “performance optimization”?
# Golden Rule:
# Don’t guess what is slow. Measure it first.
# That measurement is done using profiling and benchmarking.

In [4]:
# Benchmarking:
# “How long does this code take?”
# Tools: timeit, simple timers.

In [None]:
# Profiling:
# “Where is the time going inside my program?”
# Tools: cProfile (function-level), line_profiler (line-level).

In [None]:
# timeit (benchmarking)
# - Best for measuring small code snippets
# - Runs code many times to give more reliable timing
# - Helps compare two approaches

# cProfile (profiling)
# - Built-in profiler
# - Shows which functions take time
# - Great first step for real programs

# line_profiler (profiling, line-by-line)
# - Shows time spent on each line inside a function
# - Very useful when one function is slow and you need exact line-level detail
# - Requires installing (pip install line_profiler)

# SnakeViz (visualization)
# - Turns cProfile results into an interactive visual view
# - Easier than reading long text tables
# - Requires installing (pip install snakeviz)

In [6]:
# A) Benchmarking with timeit
# Goal: compare two ways of building a list

import timeit
# timeit is used to measure execution time of small code snippets reliably

# Code snippet 1: using a for-loop
code_loop = """
result = []
for i in range(10000):
    result.append(i * i)
"""

# Code snippet 2: using list comprehension (often faster in Python)
code_comp = """
result = [i * i for i in range(10000)]
"""

# Run each snippet multiple times and measure total time
t1 = timeit.timeit(code_loop, number=1000)
# timeit() executes the code_loop snippet 1000 times and returns total seconds

t2 = timeit.timeit(code_comp, number=1000)
# timeit() executes the code_comp snippet 1000 times and returns total seconds

print("For-loop total time:", t1)
print("List comprehension total time:", t2)

# OUTPUT (example; will vary by machine):
# For-loop total time: 0.80
# List comprehension total time: 0.55

# Both snippets do the same work
# timeit measures performance accurately
# List comprehensions are usually faster than for-loops in Python
# Running code many times gives reliable benchmarks

For-loop total time: 0.1833622089761775
List comprehension total time: 0.13026441598776728


In [8]:
# A) Benchmarking with timeit
# Goal: compare two ways of building a list

import timeit
# timeit is used to measure execution time of small code snippets reliably

# Code snippet 1: using a for-loop
code_loop = """
result = []
for i in range(10000):
    result.append(i * i)
"""

# Code snippet 2: using list comprehension (often faster in Python)
code_comp = """
result = [i * i for i in range(10000)]
"""

# Run each snippet multiple times and measure total time
t1 = timeit.timeit(code_loop, number=10000)
# timeit() executes the code_loop snippet 1000 times and returns total seconds

t2 = timeit.timeit(code_comp, number=10000)
# timeit() executes the code_comp snippet 1000 times and returns total seconds

print("For-loop total time:", t1)
print("List comprehension total time:", t2)

# OUTPUT (example; will vary by machine):
# For-loop total time: 0.80
# List comprehension total time: 0.55

# Both snippets do the same work
# timeit measures performance accurately
# List comprehensions are usually faster than for-loops in Python
# Running code many times gives reliable benchmarks

# Note:
# timeit helps you compare approaches
# It’s not telling you “why” it’s slow—only “how long”

For-loop total time: 1.557621084008133
List comprehension total time: 1.3023948750051204


In [None]:
# B) Profiling with cProfile (find slow functions)
# Goal: find which functions are slow in a bigger program

import cProfile
# cProfile measures where time is spent across functions

import pstats
# pstats helps format and sort profiling results
# pstats is used to read, sort, and display profiling results nicely.

def slow_task():
    # This function does extra work slowly on purpose
    total = 0
    for i in range(200_000_000):
        total += i
    return total

def fast_task():
    # This function is quicker on purpose
    return sum(range(200_000_000))

def main():
    # main() calls both tasks so profiler can measure them
    slow_task()
    fast_task()

if __name__ == "__main__":
    profiler = cProfile.Profile()
    # Create a profiler object

    profiler.enable()
    # Start collecting profiling stats
    # Starts recording performance data.

    main()
    # Run the program section you want to profile

    profiler.disable()
    # Stop collecting stats

    stats = pstats.Stats(profiler).strip_dirs().sort_stats("cumtime")
    # strip_dirs(): cleaner output (removes long paths)
    # sort_stats("cumtime"): sorts by cumulative time (time in function + subcalls)

    stats.print_stats(10)
    # Print top 10 entries

# OUTPUT (example format):
# ncalls  tottime  percall  cumtime  percall  filename:lineno(function)
# 1       ...      ...      ...      ...      yourfile.py:..(slow_task)
# 1       ...      ...      ...      ...      yourfile.py:..(fast_task)


# To run this, we need to generate some profiling data i.e. a ".prof" file
# python -m cProfile -o program.prof demo.py
# This command runs demo.py and saves profiling data to program.prof

# Visualize using SnakeViz
# snakeviz program.prof
# This opens a web-based visualization of the profiling data

# SnakeViz shines when:
# Runtime is longer
# Functions differ more significantly in cost

In [None]:
# D) Line-by-line profiling with line_profiler (when one function is the problem)
# When to use it
# Use it after cProfile shows:
# “This function is slow.”
# Now you want:
# “Which line inside it is slow?”
# Install line_profiler:
# pip install line_profiler
# your_script.py
@profile
def heavy_function():
    total = 0                 # line 1: initialization
    for i in range(2_000_000):# line 2: loop
        total += i            # line 3: HOT LINE (likely)
    return total              # line 4: return



def main():
    heavy_function()


if __name__ == "__main__":
#     main()
# (base) ingledarshan@MacBook-Pro Resources % kernprof -l -v demo.py
# Wrote profile results to 'demo.py.lprof'
# Timer unit: 1e-06 s

# Total time: 0.442432 s
# File: demo.py
# Function: heavy_function at line 2

# Line #      Hits         Time  Per Hit   % Time  Line Contents
# ==============================================================
#      2                                           @profile
#      3                                           def heavy_function():
#      4         1          0.0      0.0      0.0      total = 0                 # line 1: initialization
#      5   2000001     219293.0      0.1     49.6      for i in range(2_000_000):# line 2: loop
#      6   2000000     223139.0      0.1     50.4          total += i            # line 3: HOT LINE (likely)
#      7         1          0.0      0.0      0.0      return total              # line 4: return

In [9]:
# What to use when:
# - Start with timeit for small comparisons
# - Use cProfile for the whole program to find slow functions
# - Use SnakeViz to visualize and explore quickly
# - Use line_profiler only for the specific slow functions

# timeit tells you how long, cProfile tells you where, line_profiler tells you which line, and SnakeViz helps you see it visually.

In [10]:
# Memory Profiling 
# o Using memory_profiler and tracemalloc 
# o Reducing memory footprint

In [12]:
# What is "memory" in a program?
# Memory is the space your program uses to store:
# - variables
# - lists, dicts, objects
# - temporary data while code runs

# If a program:
# - uses too much memory → it becomes slow or crashes
# - leaks memory → memory keeps growing over time
# So we measure memory usage, then reduce it.

In [13]:
# What is "memory profiling"?
# “How much memory is my code using, and where is it being used?”

# This is different from speed profiling:
# - Speed profiling → time
# - Memory profiling → RAM usage

In [14]:
# Overview of the tools used here:

# 1. memory_profiler
# - Shows memory usage line by line
# - Very easy to understand
# - Great for beginners
# - Needs installation

# 2. tracemalloc
# - Built into Python (no install)
# - Tracks where memory is allocated
# - Compares memory usage between code points
# - More powerful, slightly more advanced

In [15]:
# pip install memory_profiler
# pip install psutil


In [16]:
from memory_profiler import profile
# profile decorator tells memory_profiler to track memory usage

@profile
def create_list():
    data = []                     # start with empty list
    for i in range(1_000_000):
        data.append(i)            # list keeps growing → memory increases
    return data

if __name__ == "__main__":
    create_list()

# To run this:
# python -m memory_profiler your_script.py

# Mem usage → total memory at that line
# Increment → memory added by that line
# You immediately see which line increases memory most

# Why memory_profiler is useful
# It tells you:
# - which line is allocating memory
# - whether memory keeps growing
# - where optimization is needed

ERROR: Could not find file /var/folders/5g/9xpg7d6d4114s98tv10y9rgm0000gn/T/ipykernel_62586/4000661648.py


In [None]:
# tracemalloc is built into Python and answers:
# “Which lines allocated memory?”

import tracemalloc
# tracemalloc tracks memory allocations

def build_data():
    return [i * i for i in range(1_000_000)]

tracemalloc.start()
# Start tracking memory allocations

data = build_data()
# Memory is allocated here

current, peak = tracemalloc.get_traced_memory()
# current = memory currently used
# peak = highest memory usage during execution

print(f"Current memory: {current / 1_000_000:.2f} MB")
print(f"Peak memory: {peak / 1_000_000:.2f} MB")

tracemalloc.stop()
# Stop tracking

# Current memory → how much memory is in use now

# Peak memory → maximum memory used at any point
# Peak memory is very important for:
# - containers
# - cloud limits
# - avoiding crashes

In [None]:
# Comparing memory usage between two points

import tracemalloc # built-in module to track memory allocations

tracemalloc.start() # Start tracking memory allocations

data1 = [i for i in range(500_000)] # Create a list of 500,000 integers
snapshot1 = tracemalloc.take_snapshot() # take a snapshot of memory usage after creating data1

data2 = [i for i in range(1_000_000)] # Create a list of 1,000,000 integers
snapshot2 = tracemalloc.take_snapshot() # take a snapshot of memory usage after creating data2

stats = snapshot2.compare_to(snapshot1, "lineno") # compare the two snapshots and get stats by line number

for stat in stats[:3]: # print the top 3 lines that increased memory usage the most
    print(stat)


In [19]:
# 7) Reducing Memory Footprint
# A) Avoid storing large data when you don’t need it
# Avoid:
# numbers = [i * i for i in range(10_000_000)]
# Better (uses generator, one value at a time):
# numbers = (i * i for i in range(10_000_000))

# B) Use generators instead of lists
# def squares_gen(n):
#     for i in range(n):
#         yield i * i

# C) Delete large objects when done
# data = [i for i in range(5_000_000)]
# # use data
# del data    # free memory early

# D) Reuse objects instead of creating new ones repeatedly
# Creates many temporary lists:
# result = []
# for i in range(1000):
#     result = result + [i]
# Reuses same list:
# result = []
# for i in range(1000):
#     result.append(i)

# E) Prefer smaller data structures
# - tuple uses less memory than list
# - array uses less memory than list of numbers
# - namedtuple uses less memory than normal class
# - __slots__ reduces object memory
# Example:
# from array import array
# nums = array("i", range(1_000_000))

# F) Stream data instead of loading everything into memory
# Avoid:
# lines = open("bigfile.txt").readlines()
# Better:
# with open("bigfile.txt") as f:
#     for line in f:
#         process(line)  # process one line at a time


In [20]:
# Performance Optimization Techniques: 
# Code Optimization Strategies 
# o Efficient data handling (NumPy, array, memoryviews) 
# o Vectorization and batching 
# o Caching and memoization techniques

In [21]:
# Efficient Data Handling (NumPy, array, memoryview)

# A) list vs array (basic idea)
# A Python list stores references to Python objects (more overhead).
# An array stores raw typed values (less memory, often faster for numeric data).

from array import array
# array stores numbers in a compact C-style format

nums = array("i", [1, 2, 3, 4])
# "i" means signed integer

print(nums)
# OUTPUT: array('i', [1, 2, 3, 4])

nums.append(5)          # add a number
print(nums)
# OUTPUT: array('i', [1, 2, 3, 4, 5])

# When to use array:
# - lots of numbers
# - need less memory than list
# - want faster numeric operations

array('i', [1, 2, 3, 4])
array('i', [1, 2, 3, 4, 5])


In [23]:
# B) memoryview (zero-copy view of data)
# Sometimes copying data is expensive.
# memoryview lets you look at and slice binary data without copying.

# Example: memoryview with bytes
data = b"abcdef"
# bytes is immutable binary data. There's no copying when slicing bytes.

mv = memoryview(data) # create a memoryview of the bytes data
# mv is a view, not a copy

print(mv[0]) # access first byte (ASCII code for 'a')
# OUTPUT: 97   (ASCII code for 'a')

print(mv[1:4].tobytes()) # slice memoryview and convert back to bytes
# OUTPUT: b'bcd'

# The reason to use memoryview:
# - avoid copying large data
# - work with binary data efficiently
# - useful in performance-critical code where copying is a bottleneck

# Slicing bytes normally creates copies, but memoryview allows you to slice without copying, saving time and memory.
# memoryview slicing is cheap because it just creates a new view, not a new copy of the data.

97
b'bcd'


In [24]:
# C) NumPy (fast numeric operations)
# Num - Numerical
# Py - Python
# NumPy is a powerful library for numerical computing in Python.
# It provides:
# - efficient array data structures
# - fast operations on arrays (vectorized)
# - many mathematical functions
# - support for multi-dimensional arrays
# - integration with C/C++ and Fortran code for even faster performance

# Python loop vs NumPy vectorized
import numpy as np
# numpy is used for fast vectorized operations

nums = np.arange(1_000_000)
# creates array [0, 1, 2, ... 999999]

squares = nums * nums
# vectorized: multiplies entire array at once (fast C code)

print(squares[:5])
# OUTPUT: [ 0  1  4  9 16]


[ 0  1  4  9 16]


In [None]:
# List vs numpy array - speed demo

n = 1000000000

# List comprehension
def list_squares(n):
    return [i * i for i in range(n)]

# NumPy vectorized
def numpy_squares(n):
    arr = np.arange(n)
    return arr * arr

# Calculate time for list comprehension
t1 = timeit.timeit(lambda: list_squares(n), number=1)
# Calculate time for NumPy vectorized
t2 = timeit.timeit(lambda: numpy_squares(n), number=1)


In [26]:

# Print results
print(f"List comprehension time: {t1:.2f} seconds")
print(f"NumPy vectorized time: {t2:.2f} seconds")

List comprehension time: 49.05 seconds
NumPy vectorized time: 6.38 seconds


In [27]:
# Some important functions of numpy:
# 1. np.arange() - creates an array of evenly spaced values
# eg: np.arange(5) → array([0, 1, 2, 3, 4])
# 2. np.zeros() - creates an array filled with zeros
# eg: np.zeros(5) → array([0., 0., 0., 0., 0.])
# 3. np.ones() - creates an array filled with ones
# eg: np.ones(5) → array([1., 1., 1., 1., 1.])
# 4. np.linspace() - creates an array of evenly spaced values between two endpoints
# eg: np.linspace(0, 1, 5) → array([0., 0.25, 0.5, 0.75, 1.])
# 5. np.random.rand() - creates an array of random floats in the range [0, 1)
# eg: np.random.rand(5) → array([0.123, 0.456, 0.789, 0.012, 0.345]) (values will vary)
# 6. np.sum() - computes the sum of array elements
# eg: np.sum(np.array([1, 2, 3])) → 6
# 7. np.mean() - computes the mean of array elements
# eg: np.mean(np.array([1, 2, 3])) → 2
# 8. np.dot() - computes the dot product of two arrays
# eg: np.dot(np.array([1, 2]), np.array([3, 4])) → 11
# 9. np.reshape() - gives a new shape to an array without changing its data
# eg: np.reshape(np.array([1, 2, 3, 4]), (2, 2)) → array([[1, 2], [3, 4]])

In [29]:
# B) Batching (process in chunks)
# Batching means: instead of processing one item at a time, process a group of items together.
# Why batching helps
# - fewer function calls
# - fewer overhead operations
# - better CPU cache usage
# - better network/database efficiency (very common)

# Example: batching a list into chunks
def batch_items(items, batch_size):
    # yield batches of size batch_size
    for i in range(0, len(items), batch_size):
        yield items[i:i + batch_size]   # slice returns a batch

items = list(range(1, 11))

for batch in batch_items(items, 3):
    print(batch)

# OUTPUT:
# [1, 2, 3]
# [4, 5, 6]
# [7, 8, 9]
# [10]

# Real-world usage:
# insert 1000 DB rows at once instead of 1-by-1
# send one API request with 100 items instead of 100 requests
# compute in chunks to reduce peak memory

[1, 2, 3]
[4, 5, 6]
[7, 8, 9]
[10]


In [30]:
# Caching means: store the result of an expensive operation, so next time you don’t recompute it.
# This helps when:
# - same inputs repeat
# - computation is expensive

# B) Memoization (caching function results)
# Memoization is caching specifically for function calls:
# - input → output stored
# - same input again → return stored output

# Example: expensive Fibonacci without cache (slow)
def fib(n):
    # very slow for bigger n (recomputes same work again and again)
    if n <= 1:
        return n
    return fib(n - 1) + fib(n - 2)

# Memoized Fibonacci using lru_cache (fast)
from functools import lru_cache
# lru_cache stores results of function calls

@lru_cache(maxsize=None)
def fib(n):
    # now repeated calls reuse stored results
    if n <= 1:
        return n
    return fib(n - 1) + fib(n - 2)

print(fib(40))
# OUTPUT: 102334155

# lru_cache:
# - saves results by input argument
# - avoids repeated work
# - maxsize=None means unlimited cache (use carefully)

102334155


In [31]:
# C) Manual caching (dictionary cache)
# Sometimes you want custom control.

cache = {}  # dictionary to store results

def expensive_square(n):
    if n in cache:              # if already computed
        return cache[n]         # return cached result

    result = n * n              # expensive computation (example)
    cache[n] = result           # store for next time
    return result

print(expensive_square(10))
print(expensive_square(10))

# OUTPUT:
# 100
# 100


100
100


In [32]:
# Use array when:
# you only store numbers
# you want less memory than list
# operations are simple

# Use memoryview when:
# working with bytes/binary data
# you want to slice without copying
# performance matters (large buffers)

# Use NumPy when:
# heavy numeric work
# large arrays
# math operations need to be fast

# Use vectorization when:
# you have numeric arrays
# you are using NumPy
# you want to avoid Python loops

# Use batching when:
# calling APIs / DB operations
# processing large data streams
# reducing overhead and memory spikes

# Use caching/memoization when:
# same inputs repeat
# expensive function calls happen often
# you want fast repeated lookups

In [33]:
# Accelerating Python
# o Just-in-Time compilation with Numba
# o Using Cython for performance-critical modules
# o Integrating with C/C++ via ctypes or cffi

In [34]:
# Why Python can be slow for some tasks
# Python code is usually executed by an interpreter (CPython).
# For heavy numeric loops and CPU-heavy work, that can be slow.

# So we “accelerate” Python by running the heavy part as:
# - compiled machine code
# - or fast C-level code

# Use these only when:
# - You already profiled your code (you know what is slow)
# - The slow part is CPU-bound (math, loops, processing)
# - You need more speed than pure Python can give

# Typical cases:
# - large numeric loops
# - simulations
# - image/audio processing
# - data science feature engineering
# - heavy computations in production

In [None]:
# Three common acceleration options

# A) Numba (JIT compilation)
# You write mostly normal Python
# Numba compiles certain functions at runtime into fast machine code
# Best for numeric code, loops, NumPy arrays
# Easiest entry point

# B) Cython (compile Python-like code)
# You write Python-like code but can add types
# It compiles into a C extension module
# Very fast for loops and typed variables
# Requires build step (more setup)

# C) ctypes / cffi (call existing C/C++ code)
# You already have a C library, or want to use one
# Python calls C functions directly
# Great when you want to reuse existing high-performance code
# Setup depends on platform/library

In [None]:
# Option A — Numba (JIT) Step-by-Step

# What is JIT?
# Python code is compiled into machine code when you run it, so the next runs are much faster.
!pip install numba -q

# Example: speed up a loop
import time
from numba import njit
# njit tells Numba to compile the function (No Python mode)

def py_sum_squares(n):
    # Pure Python loop (usually slower)
    total = 0
    for i in range(n):
        total += i * i
    return total

@njit
def nb_sum_squares(n):
    # Same logic, but Numba compiles it to fast machine code
    total = 0
    for i in range(n):
        total += i * i
    return total

if __name__ == "__main__":
    n = 20_000_000

    start = time.time()
    py_sum_squares(n)              # run Python version
    print("Python time:", time.time() - start)

    start = time.time()
    nb_sum_squares(n)              # first Numba run compiles (may feel slower)
    print("Numba time (1st run):", time.time() - start)

    start = time.time()
    nb_sum_squares(n)              # second run uses compiled machine code (fast)
    print("Numba time (2nd run):", time.time() - start)


In [None]:
# 5) Option B — Cython (compile performance-critical modules)

# What is Cython in simple words?
# Cython lets you write Python-like code and compile it into C.
# You can add type hints like cdef int to make loops extremely fast.

# Typical structure (real-world)
# You usually create:
# - a .pyx file (Cython code)
# - a setup.py (build config)
# - build it to create a compiled module
# Because building is a multi-file setup, here’s the key idea in the simplest possible form.

# Example Cython code (in a file called example.pyx):
# This is Cython code (looks like Python but supports C types)

# def sum_squares(int n):
#     cdef long total = 0       # typed variable (fast)
#     cdef int i
#     for i in range(n):        # loop becomes C-level fast loop
#         total += i * i
#     return total


| Version                | Speed                   |
| ---------------------- | ----------------------- |
| Pure Python loop       | Slow                    |
| NumPy                  | Fast for vectorized ops |
| Numba                  | Fast after JIT          |
| **Cython (this code)** | **Near C speed**        |


In [35]:
# Why Cython is used
# You want speed close to C
# You are okay with build steps
# You want to accelerate only a few hot functions

In [36]:
# Option C — Calling C/C++ using ctypes or cffi

# Instead of rewriting everything, you can:
# - write a small C function
# - compile it into a shared library
# - call it from Python

# When will one use this:
# - you already have C/C++ code
# - you need maximum performance
# - you want to reuse existing libraries

In [37]:
# A) ctypes (built-in, works with C libraries)
# ctypes lets Python call functions from a compiled .so (Linux/macOS) or .dll (Windows).

# # Python side:
# import ctypes
# # ctypes loads compiled C libraries and calls their functions

# # lib = ctypes.CDLL("./mylib.so")   # Linux/macOS example
# # lib = ctypes.CDLL("mylib.dll")    # Windows example

# # Suppose C has: int add(int a, int b)
# # lib.add.argtypes = (ctypes.c_int, ctypes.c_int)
# # lib.add.restype = ctypes.c_int

# # print(lib.add(10, 20))            # would output 30


In [None]:
# B) cffi (more modern for complex C)
# cffi is often easier than ctypes for larger C APIs.
# pip install cffi


In [None]:
# Choose Numba when:
# mostly numeric code
# you use NumPy arrays
# you want easiest speed boost with minimal setup

# Choose Cython when:
# you need big speedups in loops
# you are okay with compilation/build steps
# you want typed variables and C-speed

# Choose ctypes/cffi when:
# you already have C/C++ library
# or need to use an existing system library
# you need maximum performance and reuse

In [38]:
# Practical warnings

# 1) Don’t do this too early
# First optimize with:
# algorithm improvements
# better data structures
# caching
# NumPy/vectorization
# profiling
# Then accelerate hot spots only.

# 2. Setup complexity increases
# From easiest to hardest typically:
# Numba → Cython → ctypes/cffi (depends)

# Conclusion: Numba speeds up numeric Python using JIT, Cython compiles Python-like code to C for very fast loops, and ctypes/cffi let Python call existing C/C++ code for maximum performance.

In [40]:
# Module 6: Debugging, Testing, and Reliability 
# • Advanced Debugging Tools (pdb, ipdb, pdbpp) 
# • Logging and tracing strategies 
# • Using faulthandler and exception chaining (raise from) 
# • Unit Testing and Test Automation 
# o unittest, pytest, and parameterized tests 
# o Mocking and patching 
# o Code coverage and CI tips 
# • Error Handling Patterns and Fail-Safes 
# o Graceful degradation 
# o Retrying patterns 
# • Unit Testing Basics and Code Coverage Overview 
# o Purpose of unit tests and test structure 
# o Introduction to unittest framework 
# o Concept of code coverage and how to measure it with coverage.py 
# o Simple hands-on mini-demo (testing a class from the ABC/meta class module) 

In [41]:
# Advanced Debugging Tools (pdb, ipdb, pdbpp)

# What is pdp?
# pdb is Python’s built-in interactive debugger.

# It allows you to:
# - stop execution at a specific point
# - see variable values
# - execute code step by step

# No installation needed (built into Python)

def divide(a, b):
    result = a / b
    return result

print(divide(10, 0))   # Bug: division by zero


ZeroDivisionError: division by zero

In [None]:
# Debugging exceptions automatically
# Instead of adding pdb.set_trace(), you can tell Python:
# “Start debugger when an exception happens.”

# Lets use pdb to debug this code. We will set a breakpoint at the line where the division happens.

import pdb

def divide(a, b):
    result = a / b
    return result

print(divide(10, 0))
# When you run this, it will pause at the set_trace() line and give you a (Pdb) prompt where you can inspect variables and step through code.

| Command | Meaning                       |
| ------- | ----------------------------- |
| `l`     | show code around current line |
| `n`     | execute next line             |
| `s`     | step into function            |
| `p var` | print variable value          |
| `c`     | continue execution            |
| `q`     | quit debugger                 |


In [None]:
# Debugging exceptions automatically
# Instead of adding pdb.set_trace(), you can tell Python:
# “Start debugger when an exception happens.”
# python -m pdb your_script.py

In [44]:
# ipdb — pdb with better experience

# ipdb is pdb + IPython features:
# - tab completion
# - syntax highlighting
# - better variable inspection

# Install with:
# pip install ipdb

In [None]:
import ipdb

def add(a, b):
    ipdb.set_trace()     # same idea as pdb, but nicer
    return a + b

print(add(5, 7))
# When you run this, it will pause at the set_trace() line and give you an enhanced interactive prompt with IPython features.

In [45]:
# pdbpp — debugger power-up

# pdbpp (pdb++) is an enhanced debugger:
# - colorful output
# - sticky context (code stays visible)
# - smart variable display

# Install with:
# pip install pdbpp

In [None]:
import pdb

def calc(x):
    y = x * 2
    z = y + 5
    return z

pdb.set_trace()
print(calc(10))
# When you run this, it will pause at the set_trace() line and give you a more user-friendly debugging experience with colored output and better variable display.

# To run this script, save it as demo.py and execute:
# python demo.py

# Debugging Tips (Very Important)
# 1. Don’t debug blindly
# First understand expected behavior
# Then inspect actual behavior
# 2. Don’t leave debugger in production code
# Always remove set_trace() before committing.
# 3. Use debugger instead of print for complex logic
# Debugger shows live state, not static output.

In [46]:
# Logging and tracing strategies 

In [None]:
# print() is not good for real applications because:
# you cannot easily control levels (info vs error)
# you cannot easily write to files + console properly
# no timestamps by default
# no structured format
# hard to filter/search
# So we use the logging module.

In [None]:
# What is tracing?
# Tracing means following a request or operation through multiple steps/components.

# Example:
# A web request might go through:
# API handler → service layer → DB call → external API call

# Tracing helps you answer:
# “Where did time go?”
# “Which step failed?”
# “Which logs belong to the same request?”

# A common tracing technique is using a correlation id / request id.

In [None]:
# Logs have importance levels:
# DEBUG → detailed developer info (used during development)
# INFO → normal events (“started”, “done”)
# WARNING → something odd but not fatal
# ERROR → request failed or operation failed
# CRITICAL → app is in serious trouble

# You control how much logs you want by setting a log level.

In [47]:
# Example 1: Basic logging to console

import logging
# logging is Python's built-in system for structured messages

logging.basicConfig(
    level=logging.INFO,
    # INFO means show INFO, WARNING, ERROR, CRITICAL (but hide DEBUG)
    format="%(asctime)s %(levelname)s %(name)s - %(message)s"
    # format adds timestamp, level, logger name, and message
)

logger = logging.getLogger("app")
# Create a named logger (helps identify where logs come from)

logger.info("Application started")
# INFO log message

logger.warning("This is a warning")
# WARNING message

logger.error("This is an error")
# ERROR message


2026-02-06 13:55:28,144 INFO app - Application started
2026-02-06 13:55:28,146 ERROR app - This is an error


In [48]:
# Logging exceptions properly

import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("app")

def divide(a, b):
    return a / b

try:
    divide(10, 0)
except Exception:
    logger.exception("Something went wrong while dividing")
    # logger.exception automatically includes stack trace


2026-02-06 13:56:33,736 ERROR app - Something went wrong while dividing
Traceback (most recent call last):
  File "/var/folders/5g/9xpg7d6d4114s98tv10y9rgm0000gn/T/ipykernel_62586/4146551221.py", line 12, in <module>
    divide(10, 0)
  File "/var/folders/5g/9xpg7d6d4114s98tv10y9rgm0000gn/T/ipykernel_62586/4146551221.py", line 9, in divide
    return a / b
           ~~^~~
ZeroDivisionError: division by zero


In [49]:
# Logging exceptions properly

import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("app")

def divide(a, b):
    return a / b

try:
    divide(10, 0)
except Exception:
    print("Something went wrong while dividing")
    # logger.exception automatically includes stack trace


Something went wrong while dividing


In [None]:
# Logging to a file (common production requirement)

import logging

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(name)s - %(message)s",
    filename="app.log",     # write logs to a file
    filemode="a"            # append mode (do not overwrite)
)

logger = logging.getLogger("app")

logger.info("This will go into app.log")

2026-02-06 14:01:10,527 INFO app - This will go into app.log


In [None]:
# Rules / Strategies for logging
# Rule 1: Log events, not noise
# Rule 2: Always include context (request id, user id)
# Rule 3: Use appropriate log levels
# Rule 4: Avoid logging sensitive information
# Rule 5: Use structured logging if possible (e.g. JSON format)

In [None]:
# Tracing strategy: Correlation ID (request id)

# Why do we need correlation ids?
# When many requests happen at the same time, logs get mixed.
# A correlation id helps you filter logs for one request.

# Example: Add a request_id to every log line using LoggerAdapter

import logging
import uuid

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s request_id=%(request_id)s - %(message)s"
)
# Notice: format expects request_id in the log record

base_logger = logging.getLogger("app")

def handle_request():
    request_id = str(uuid.uuid4())
    # Generate a unique id for this "request"

    logger = logging.LoggerAdapter(base_logger, {"request_id": request_id})
    # LoggerAdapter injects request_id into all logs

    logger.info("Request started")

    # simulate steps
    logger.info("Calling database")
    logger.info("Calling external API")

    logger.info("Request finished")

handle_request()


In [None]:
# Measuring time in logs

import logging
import time

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("app")

def slow_step():
    start = time.time()
    time.sleep(0.4)  # simulate slow work
    elapsed = time.time() - start

    logger.info("slow_step finished in %.3f seconds", elapsed)

slow_step()


In [None]:
# Production-friendly logging (rotation)
# If logs write to a file, it can grow huge. Use rotating logs.
# Here use RotatingFileHandler to rotate logs after they reach a certain size.
# Example: rotate after 1 MB, keep 5 backups
import logging
from logging.handlers import RotatingFileHandler

handler = RotatingFileHandler("app.log", maxBytes=1_000_000, backupCount=5)
# maxBytes=1_000_000 means rotate after 1 MB
# backupCount=5 means keep 5 old log files (app.log.1, app.log.2, ...)

logger = logging.getLogger("app")
logger.setLevel(logging.INFO)
logger.addHandler(handler)
logger.info("This log will go to app.log and rotate when it reaches 1 MB")

In [None]:
# “Good Logging and Tracing” Checklist
# Use logging, not print
# Use appropriate log level (INFO in prod, DEBUG in dev)
# Log exceptions with logger.exception
# Include context (ids, user, filename, step name)
# Add correlation id / request id for tracing
# Log durations for slow operations
# Rotate logs in production

In [None]:
# Using faulthandler and exception chaining (raise from)

# faulthandler → helps when Python crashes/hangs in a “hard” way
# exception chaining (raise from) → helps preserve the real cause of an error

In [None]:
# faulthandler is a built-in module that helps you get a stack trace when Python crashes due to a segmentation fault or hangs.
# It’s useful when you have C extensions or native code that might cause crashes.
# You can enable it at the start of your program to get better crash diagnostics.

# Storyline:
# Usually, when an exception happens, Python prints a traceback and exits.

# But sometimes:
# - the program hangs (stuck forever)
# - the program crashes at C level (segfault)
# - the program is running in production and you need more info
# That’s where faulthandler helps.

In [54]:
# A) Enable faulthandler

import faulthandler
# faulthandler helps print Python stack traces even on hard crashes/hangs

faulthandler.enable()
# enable it early so it can help if something goes wrong later


In [None]:
# B) Dump stack trace if program seems stuck (timeout dump)
# This is very useful for “program is frozen” situations.

import faulthandler
import time

faulthandler.enable()
# Turn on faulthandler to allow stack trace dumps

faulthandler.dump_traceback_later(3, repeat=False)
# After 3 seconds, automatically print stack traces of all threads
# repeat=False means do it once (not repeatedly)

print("Starting work that will hang...")
time.sleep(10)
# This simulates a hang / long wait

faulthandler.cancel_dump_traceback_later()
# Cancel future scheduled dumps (good practice if code finishes earlier)


In [None]:
# C) Enable using command line (no code changes)
# You can run any Python program with faulthandler enabled:
# python -X faulthandler your_script.py
# This is helpful when you cannot edit code easily.

In [None]:
# What is exception chaining (raise from)?
# When you catch an exception and want to raise a new one, you can use “raise from” to preserve the original exception as the cause.

# Storyline:
# When you catch an exception and raise a new one, you should keep the original error as the cause.

# raise NewError(...) from original_error means:
# “I’m raising a nicer/higher-level error, but the real reason is this original error.”
# This preserves the real cause in the traceback

# Bad Example - A) Problem: losing the original error
def read_number(text):
    try:
        return int(text)
    except ValueError:
        # Raising a new error WITHOUT keeping original cause
        raise RuntimeError("Invalid input, expected a number")
# If you run read_number("abc"), you get a RuntimeError, but the original ValueError is lost, making debugging harder.

# Good Example - B) Solution: using raise from to keep original error
def read_number(text):
    try:
        return int(text)
    except ValueError as e:
        # Raising a new error but keeping original cause with "from"
        raise RuntimeError("Invalid input, expected a number") from e
# Now if you run read_number("abc"), you get a RuntimeError that says "Invalid input, expected a number", but the traceback also shows the original ValueError that caused it, making debugging easier.


# Now Python prints both:
# the original ValueError (real cause)
# plus your RuntimeError (higher-level meaning)
# This is much easier to debug.

In [55]:
# When to use raise from:
# Use it when you:
# - want to convert low-level errors into domain-specific errors
# - want clearer messages for users
# - still need the real cause for debugging

# Example real world:
# - DB layer raises ConnectionError
# - You raise ServiceUnavailableError from it

In [None]:
# Use faulthandler when:
# program freezes (deadlock/hang)
# program crashes unexpectedly
# you need reliable stack traces in production

# Use raise from when:
# you catch an exception and raise a different one
# you want a clean error message but still keep the original cause

In [56]:
# Unit Testing and Test Automation
# o unittest, pytest, and parameterized tests
# o Mocking and patching
# o Code coverage and CI tips

In [57]:
# Unit Testing and Test Automation

# What is unit testing?
# A unit test checks a small piece of code (a “unit”), usually a function or method.
# Example:
# function add(a, b) should return a+b
# you write a test that verifies that behavior

In [58]:
# Why do we need unit tests?

# Without tests:
# you change code
# something breaks somewhere else
# you find it late (or in production)

# With tests:
# you run tests in seconds
# you catch bugs early
# you refactor safely

In [None]:
# Test automation means:
# tests run automatically (not manually)
# usually on every commit / pull request
# often in CI (GitHub Actions, GitLab CI, Jenkins)

In [None]:
# calculator.py

def add(a, b):
    # Return sum of two numbers
    return a + b

def divide(a, b):
    # Raise error if dividing by zero
    if b == 0:
        raise ValueError("b cannot be zero")
    return a / b


In [None]:
# test_calculator_unittest.py

import unittest
# unittest is Python's built-in testing framework

from calculator import add, divide
# import functions we want to test

class TestCalculator(unittest.TestCase):
    # A test class groups related tests

    def test_add(self):
        # test method names must start with "test_"
        self.assertEqual(add(2, 3), 5)
        # assertEqual checks expected vs actual

    def test_divide(self):
        self.assertEqual(divide(10, 2), 5)

    def test_divide_by_zero(self):
        # assertRaises checks if an error is raised
        with self.assertRaises(ValueError):
            divide(10, 0)

if __name__ == "__main__":
    unittest.main()
    # runs all tests in this file


In [None]:
# Why pytest?
# less boilerplate than unittest
# better error output
# easy fixtures and parameterization

# Install pytest:
# pip install pytest

# A) Same tests using pytest (file: test_calculator_pytest.py)

import pytest
# pytest is a third-party testing framework

from calculator import add, divide

def test_add():
    # simple assert works in pytest
    assert add(2, 3) == 5

def test_divide():
    assert divide(10, 2) == 5

def test_divide_by_zero():
    # pytest.raises checks exceptions
    with pytest.raises(ValueError):
        divide(10, 0)

# To run tests, use: pytest test_calculator_pytest.py
# Try: pytest -v to see more details about which tests ran and their results

In [None]:
# Parameterized tests (same test with many inputs)
# Why parameterization?
# Instead of writing many similar tests, you write one test and give it many input sets.

import pytest
from calculator import add

@pytest.mark.parametrize(
    "a,b,expected",
    [
        (1, 2, 3),
        (0, 0, 0),
        (-1, 5, 4),
    ]
)
def test_add_param(a, b, expected):
    assert add(a, b) == expected
# This runs the same test three times with different values for a, b, and expected.
# To run: pytest test_calculator_pytest.py -v

In [59]:
# Mocking and patching 

In [None]:
# What is mocking?
# Replace a real dependency (API/DB/time/random/file) with a fake one so tests are fast and predictable.

# You mock when your code depends on something that is:
# - slow (API call)
# - unstable (network)
# - external (database)
# - random (random numbers)
# - time-based (current time)

# lets mock a function which is mocking a weather API call
# weather.py
import requests

def get_temperature(city):
    # Calls an external service (bad for unit tests)
    response = requests.get(f"https://example.com/weather?city={city}")
    data = response.json()
    return data["temp"]

# test_weather.py
from unittest.mock import patch
# patch temporarily replaces a function/object during the test

from weather import get_temperature

def test_get_temperature():
    fake_json = {"temp": 30}

    # patch "requests.get" inside the weather module context
    with patch("weather.requests.get") as mock_get:
        # configure what the mocked get() should return
        mock_get.return_value.json.return_value = fake_json

        temp = get_temperature("Delhi")
        assert temp == 30


In [None]:
# Code coverage and CI tips

In [None]:
# Code Coverage means how much of code is tested?
# Coverage answers:
# “How many lines of my code were executed by tests?”

# High coverage does NOT guarantee no bugs, but low coverage usually means many untested paths.

# Install coverage library:
# pip install coverage

# Run tests with coverage (unittest):
# coverage run -m unittest
# coverage report -m

# Run tests with coverage (pytest):
# coverage run -m pytest
# coverage report -m

# Generate HTML report:
# coverage html
# This creates an htmlcov/index.html file you can open in a browser to see which lines are covered (green) and which are missed (red).

In [None]:
# CI Tips (Continuous Integration)

# What is CI?
# When you push code to GitHub/GitLab, it automatically runs tests and checks (like coverage) before merging.
# This helps catch issues early and maintain code quality.

# CI “must do” checklist
# - install dependencies
# - run tests (pytest or unittest)
# - run linting (optional but common)
# - run coverage and fail if too low

# Simple Rules to follow:
# 1.   Keep tests fast (unit tests should run in seconds)
# 2.   Avoid real network/DB calls in unit tests (mock them)
# 3.   Separate slow tests as “integration tests”
# 4.   Run CI on every pull request

In [60]:
# Error Handling Patterns and Fail-Safes 
# o Graceful degradation 
# o Retrying patterns

In [None]:
# Graceful Degradation

# Examples:
# API down → show cached data
# image fails → show placeholder
# recommendation service fails → show popular items
# DB read fails → return empty list with warning

# A) Bad approach (crash on error)
def get_username(data):
    return data["user"]["name"]   # crashes if key missing
# If user is missing → program stops.

# B) Graceful degradation using safe defaults
def get_username(data):
    # get() returns None if key is missing
    user = data.get("user", {})
    return user.get("name", "Guest")
# If user is missing → returns "Guest" instead of crashing.

# C) Graceful degradation with try/except
def load_config(path):
    try:
        with open(path) as f:
            return f.read()
    except FileNotFoundError:
        # fallback behavior
        print("Config file not found, using defaults")
        return "{}"
# If config file is missing → prints warning and returns empty config instead of crashing.

In [None]:
# 3) Graceful degradation with feature fallback
def get_profile_picture(user):
    try:
        return download_image(user["photo_url"])
    except Exception:
        # fallback image
        return "default_profile.png"
# If photo_url is missing or download fails → returns default image instead of crashing.

In [None]:
# Retrying Patterns
# Why retry?
# Sometimes operations fail due to temporary issues (network glitch, DB lock, API rate limit).
# Retrying can help recover from these transient errors without user intervention.

# Useful for temporary failures, such as:
# network glitches
# API timeouts
# database connection issues

# Not useful for:
# invalid input
# programming errors
# permission issues

In [None]:
# Naive retry

import time

def fetch_data():
    # pretend this sometimes fails
    raise ConnectionError("Network issue")

def fetch_with_retry():
    for attempt in range(3):
        try:
            return fetch_data()
        except ConnectionError:
            print("Attempt failed, retrying...")
            time.sleep(1)
    raise RuntimeError("All retries failed")
# This tries to fetch data up to 3 times, waiting 1 second between attempts. If all attempts fail, it raises a RuntimeError.

In [None]:
# 6) Better retry pattern (with attempt number)

import time

def fetch_with_retry(max_retries=3):
    for attempt in range(1, max_retries + 1):
        try:
            print(f"Attempt {attempt}")
            return fetch_data()
        except ConnectionError as e:
            print("Error:", e)
            time.sleep(1)
    return None   # graceful failure instead of crash
# This version prints the attempt number and error message, and returns None if all retries fail instead of raising an exception.

In [None]:
# Exponential backoff - useful in production to avoid overwhelming services
# What is exponential backoff?
# Instead of waiting a fixed time between retries, you wait longer after each failure (e.g. 1s, then 2s, then 4s).
# This helps reduce load on a struggling service and increases chances of recovery.

# If many clients retry at the same time:
# - they overload the server
# - retries keep failing
# - Exponential backoff waits longer each time.

import time

def fetch_with_backoff(max_retries=4):
    delay = 1
    for attempt in range(1, max_retries + 1):
        try:
            return fetch_data()
        except ConnectionError:
            print(f"Retry {attempt}, waiting {delay} seconds")
            time.sleep(delay)
            delay *= 2   # increase delay exponentially
    return None
# This version waits 1s after the first failure, 2s after the second, and 4s after the third before giving up.

In [None]:
# 8) Retrying + graceful degradation (together)

def get_user_data():
    data = fetch_with_backoff()
    if data is None:
        # fallback behavior
        print("Service unavailable, returning cached data")
        return {"name": "Guest"}
    return data
# This tries to fetch user data with retries. If it ultimately fails, it returns a default user instead of crashing.

In [None]:
# Do NOT retry:
# invalid input errors
# permission errors
# syntax/logic bugs

# Retry only when:
# failure is likely temporary

In [None]:
# Real-world example:
# Log every failure and retry
# Set a maximum retry limit
# Use exponential backoff
# Always provide fallback behavior
# Never hide serious errors silently

| Situation               | Best Pattern         |
| ----------------------- | -------------------- |
| Missing optional data   | Graceful degradation |
| Temporary network issue | Retry                |
| External API down       | Retry + fallback     |
| Invalid input           | Fail fast            |
| Critical corruption     | Stop and alert       |


In [61]:
# Unit Testing Basics and Code Coverage Overview 
# o Purpose of unit tests and test structure 
# o Introduction to unittest framework 
# o Concept of code coverage and how to measure it with coverage.py 
# o Simple hands-on mini-demo (testing a class from the ABC/meta class module) 

In [None]:
# Mini Demo: Testing a Class from an ABC Module

# We’ll create:
# an ABC interface (contract)
# a concrete class that implements it
# a small test suite for it using unittest

# Step A: Create the ABC module (file: payments.py)
from abc import ABC, abstractmethod
# ABC lets us create abstract base classes
# abstractmethod marks methods that must be implemented by child classes

class PaymentGateway(ABC):
    # This is an abstract base class (a contract)

    @abstractmethod
    def pay(self, amount):
        # Every child class MUST implement pay()
        pass

    @abstractmethod
    def refund(self, amount):
        # Every child class MUST implement refund()
        pass


class CreditCardGateway(PaymentGateway):
    # This class implements the contract, so it can be instantiated

    def pay(self, amount):
        # basic validation
        if amount <= 0:
            raise ValueError("amount must be positive")
        return f"paid {amount} by credit card"

    def refund(self, amount):
        # basic validation
        if amount <= 0:
            raise ValueError("amount must be positive")
        return f"refunded {amount} to credit card"
# PaymentGateway defines required methods
# CreditCardGateway must implement them
# We also added simple validation to test both success and failure cases

# Step B: Write unit tests using unittest (file: test_payments.py)
import unittest
# unittest provides TestCase and assertions

from payments import CreditCardGateway
# import the class we want to test

class TestCreditCardGateway(unittest.TestCase):
    # Group tests related to CreditCardGateway

    def setUp(self):
        # setUp runs before each test method
        self.gateway = CreditCardGateway()

    def test_pay_success(self):
        # Arrange
        amount = 100

        # Act
        result = self.gateway.pay(amount)

        # Assert
        self.assertEqual(result, "paid 100 by credit card")

    def test_refund_success(self):
        amount = 50
        result = self.gateway.refund(amount)
        self.assertEqual(result, "refunded 50 to credit card")

    def test_pay_invalid_amount(self):
        # Check that invalid input raises the right error
        with self.assertRaises(ValueError):
            self.gateway.pay(0)

    def test_refund_invalid_amount(self):
        with self.assertRaises(ValueError):
            self.gateway.refund(-10)

if __name__ == "__main__":
    unittest.main()
    # runs all tests in this file

# Step C: Run the tests
# python -m unittest -v


In [62]:
# Module 7: Writing Production-Grade Python Code
# • Code Organization & Architecture
# • Modular structure, packaging, and dependency management
# • Virtual environments, poetry, and pip-tools
# • Clean Code and Best Practices
# o PEP8, type hints, static analysis (mypy, flake8, pylint)
# • Design patterns for Python (Singleton, Factory, Observer)
# • Logging, configuration management, and environment variables
# • Deploying and Monitoring Python Applications
# o Building CLI tools and daemons
# o Dockerizing Python apps
# o Health checks, observability, and error monitoring

In [63]:
# • Code Organization & Architecture
# • Modular structure, packaging, and dependency management

In [64]:
# What is Code Organization?
# It means arranging your code so that:
# - it’s easy to find things
# - easy to change without breaking everything
# - easy for other developers to understand
# Instead of one huge file, we split code into modules and packages.

# A module is a single Python file: something.py
# Example: math_utils.py is a module.

In [None]:
# What is a package?
# A package is a folder containing Python code, usually with an __init__.py.
# Example:
myapp/
  __init__.py
  services.py
  utils.py

# Keep business logic separate from I/O, so it can be tested easily.

In [None]:
# A Small “Production-Style” Project Example
# Order total calculator

# Folder Structure:
# myshop/
#   pyproject.toml
#   src/
#     myshop/
#       __init__.py
#       main.py
#       services/
#         __init__.py
#         pricing.py
#       models/
#         __init__.py
#         order.py
#   tests/
#     test_pricing.py


In [None]:
# A) src/myshop/models/order.py
# This file contains data structures (models).
from dataclasses import dataclass
# dataclass makes a small "data container" class easily

@dataclass(frozen=True)
class Order:
    # Order is a simple model holding order data
    items_count: int        # number of items in the order
    price_per_item: float   # cost per item


# B) src/myshop/services/pricing.py
# This file contains business logic (pricing rules).
from myshop.models.order import Order
# import the Order model

def calculate_total(order: Order) -> float:
    # calculate total price for an order

    if order.items_count < 0:
        raise ValueError("items_count cannot be negative")
        # fail fast on invalid input

    if order.price_per_item < 0:
        raise ValueError("price_per_item cannot be negative")

    subtotal = order.items_count * order.price_per_item
    # base total

    # example production rule: discount for bulk orders
    if order.items_count >= 10:
        subtotal *= 0.9  # 10% discount

    return round(subtotal, 2)
    # round to 2 decimals for currency-like values


# C) src/myshop/main.py
# This file is the entry point (like CLI or app start).
from myshop.models.order import Order
# import model

from myshop.services.pricing import calculate_total
# import business logic

def main():
    # create a sample order (in real apps, input comes from user/api/db)
    order = Order(items_count=12, price_per_item=99.99)

    total = calculate_total(order)
    # run business logic

    print("Order total:", total)
    # output only in main layer (not inside pricing logic)

if __name__ == "__main__":
    main()
    # run only when executed directly


In [65]:
# 6) Packaging and Dependency Management

In [66]:
# What is Packaging?
# Making your code installable like a product.

# So you can do:
# - pip install myshop
# - import it anywhere
# - deploy it reliably

In [67]:
# What is a dependency?
# A dependency is a library your project needs, like:
# - requests
# - pytest
# - numpy

# Dependency management means:
# tracking what libraries are required
# keeping versions consistent across machines

In [None]:
# 7) pyproject.toml (modern packaging config)
# This is the standard file for packaging and dependencies (used by Poetry, uv, pip-tools, etc.).

# Example minimal pyproject.toml (Poetry-style):

[tool.poetry]
name = "myshop"
version = "0.1.0"
description = "A simple order calculator"
authors = ["You <you@example.com>"]

[tool.poetry.dependencies]
python = "^3.11"

[tool.poetry.group.dev.dependencies]
pytest = "^8.0.0"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


| Section                                | Purpose          |
| -------------------------------------- | ---------------- |
| `[tool.poetry]`                        | Project metadata |
| `[tool.poetry.dependencies]`           | Runtime needs    |
| `[tool.poetry.group.dev.dependencies]` | Dev tools        |
| `[build-system]`                       | How to build     |


In [None]:
# 8) How to run tests in this structure
# Example test file: tests/test_pricing.py
from myshop.models.order import Order
from myshop.services.pricing import calculate_total

def test_discount_applies_for_10_or_more():
    order = Order(items_count=10, price_per_item=10.0)
    assert calculate_total(order) == 90.0


In [None]:
# Virtual Environments (venv)
python -m venv .venv
# Creates a virtual environment folder named .venv in your project

# Windows:
.\.venv\Scripts\Activate.ps1
# macOS/Linux:
source .venv/bin/activate

pip install requests
# Installs requests only inside this venv

pip freeze > requirements.txt
# Saves exact installed versions into requirements.txt

pip install -r requirements.txt
# Installs exact versions listed


In [None]:
# Poetry — “project manager” for Python dependencies

# Poetry is a tool that:
# - creates and manages virtual environments automatically
# - manages dependencies cleanly
# - locks exact versions in a lock file
# - helps package and publish projects

# Poetry uses:
# pyproject.toml → what you want (high-level dependency rules)
# poetry.lock → what you actually got (exact pinned versions)

# Using Poetry
# Step 1: Create a new project
poetry new myproject
# Creates a standard project structure

# Step 2: Or initialize Poetry in an existing project
poetry init
# Creates pyproject.toml interactively

# Step 3: Add a dependency
poetry add requests
# Adds requests to pyproject.toml and pins versions in poetry.lock

# Step 4: Install dependencies
poetry install
# Creates/uses venv and installs dependencies exactly as locked

# Step 5: Run your program inside Poetry’s environment
poetry run python app.py
# Runs app.py using the Poetry-managed venv

# Step 6: Open a shell in Poetry environment
poetry shell
# Activates the venv shell for this project

Why Poetry is useful
- You don’t manually manage requirements.txt most of the time
- Lock file ensures all developers get the same versions
- One command sets up the whole project

In [None]:
# pip-tools
# pip-tools is a simpler alternative to Poetry for dependency management.
# It helps you maintain:
# - a human-written list of direct dependencies (requirements.in)
# - an auto-generated list of all dependencies with exact versions (requirements.txt)

# It typically uses two files:
# requirements.in → what you choose (direct dependencies)
# requirements.txt → what gets installed (fully pinned + sub-dependencies)

# Step 1: Install pip-tools
pip install pip-tools
# Installs pip-compile and pip-sync

# Step 2: Create requirements.in
# Example requirements.in (you write this):
requests
flask

# Step 3: Compile pinned requirements
pip-compile requirements.in
# Produces requirements.txt with exact versions (including sub-dependencies)

# Step 4: Install exactly what’s pinned
pip-sync
# Makes your environment match requirements.txt exactly (adds/removes packages)

Why pip-tools is useful
- Keeps dependencies clean and reproducible
- You edit only requirements.in
- requirements.txt is machine-generated and consistent

In [None]:
# Use venv + pip when:
# small scripts
# simple projects
# you want minimal tooling

# Use Poetry when:
# you want an all-in-one tool (env + deps + packaging)
# team projects
# you like pyproject.toml + lock file workflow

# Use pip-tools when:
# you want to stay in the “pip ecosystem”
# you prefer requirements files
# you want reliable pinned versions with a simple workflow

In [68]:
# Why best practices matter

# When code grows:
# - many people read it
# - bugs hide in unclear logic
# - poor style causes misunderstandings

# Best practices help:
# - humans understand code faster
# - tools catch bugs early
# - teams work consistently

In [None]:
# PEP 8 — Python Style Guide (The Foundation)
# PEP 8 is the official style guide for Python code.

# It defines:
# - how to name variables
# - how to format code
# - how to structure imports
# - how long lines should be

In [None]:
# A) Naming conventions (most important PEP 8 rule)
# Function names: lowercase_with_underscores
def calculate_total_price():
    pass

# Variable names: lowercase_with_underscores
item_count = 10

# Class names: CapitalizedWords
class OrderService:
    pass

# Constants: ALL_CAPS
MAX_RETRIES = 3


In [None]:
# B) Indentation and spacing
def add(a, b):
    result = a + b      # 4 spaces for indentation
    return result

# Blank lines improve readability

# Rules:
# - 4 spaces per indent
# - blank lines between functions/classes

In [None]:
# C) Line length and readability
# Bad (too long, hard to read)
total = price * quantity * tax_rate * discount_factor * service_charge

# Good (broken into readable lines)
total = (
    price * quantity
    * tax_rate
    * discount_factor
    * service_charge
)


In [None]:
# D) Imports order (PEP 8)
# Standard library imports
import os
import sys

# Third-party imports
import requests

# Local application imports
from myapp.utils import helpers


In [None]:
# 4) Type Hints — Make Code Self-Documenting

# Type hints tell:
# - what type of data a variable expects
# - what type a function returns

# A) Basic type hints example
def add(a: int, b: int) -> int:
    # a and b must be integers
    # function returns an integer
    return a + b
# This makes it clear to readers and tools that add() works with integers.

# B) Type hints for collections
from typing import List, Dict

def average(scores: List[int]) -> float:
    return sum(scores) / len(scores)

def get_user(user: Dict[str, str]) -> str:
    return user["name"]
# This shows that average() expects a list of integers and returns a float, while get_user() expects a dictionary with string keys and values.

# C) Optional and Union types
from typing import Optional

def find_user(user_id: int) -> Optional[str]:
    # returns a string or None
    if user_id == 0:
        return None
    return "Alice"
# This indicates that find_user() might return a string (username) or None if the user is not found.

In [None]:
# Static Analysis — Catch Problems Before Running Code
# Static analysis tools analyze code without executing it.

# They help find:
# - type errors
# - unused variables
# - unreachable code
# - style violations
# - suspicious patterns

In [None]:
# 6) mypy — Type Checker
# It:
# “Does the code use values in a way that matches the type hints?”

# Install mypy:
pip install mypy

# Example Code:
def greet(name: str) -> str:
    return "Hello " + name

greet(10) # This will cause a type error because 10 is not a string.

# To run mypy:
mypy your_script.py
# mypy will report that greet(10) is a type error because 10 is an int, not a str.
# Output:
# error: Argument 1 to "greet" has incompatible type "int"; expected "str"



In [None]:
# flake8 — Style + Simple Errors

# What flake8 checks
# - PEP 8 formatting
# - unused imports
# - unused variables
# - basic mistakes

# Install flake8:
pip install flake8

# Example:
import math

def square(x):
    y = x * x
    return x * x
# This code has two issues:
# 1. The variable y is assigned but never used.
# 2. The math import is unused.

# To run flake8:
flake8 your_script.py
# flake8 will report:
# your_script.py:1:1: F401 'math' imported but unused
# your_script.py:3:5: F841 local variable 'y' is assigned to but never used

In [None]:
# pylint — Deeper Code Quality Checks
# It does:
# - more opinionated than flake8
# - checks naming, complexity, structure
# - gives a score (quality indicator)

# Install pylint:
# pip install pylint

# Example:
def f(x):
    return x + 1
# This function is very simple, but pylint might complain about:
# - function name is too short (f)
# - missing docstring

# To run pylint:
pylint your_script.py
# pylint will report:
# your_script.py:1:0: C0103: Function name "f" doesn't conform to snake_case naming style (invalid-name)
# your_script.py:1:0: C0114: Missing function docstring (missing-function-docstring)

| Tool       | Purpose                    |
| ---------- | -------------------------- |
| PEP 8      | Human-readable style rules |
| Type hints | Self-documenting + safety  |
| mypy       | Type correctness           |
| flake8     | Style + small mistakes     |
| pylint     | Overall code quality       |


In [None]:
# One Simple Complete Example

from typing import List

def calculate_average(scores: List[int]) -> float:
    if not scores:
        raise ValueError("scores list cannot be empty")

    total = sum(scores)
    count = len(scores)

    return total / count

# Why this is good:
# - Type hints make it clear what the function expects and returns.
# - It checks for empty input and raises a clear error.
# - It uses built-in functions for clarity and efficiency.

In [70]:
# Design patterns for Python (Singleton, Factory, Observer) 

In [None]:
# 1) Singleton Pattern (One instance only)
# “Only one object of this class should exist in the whole program.”

# Example real-world usage:
# - App configuration object
# - Logger object
# - Database connection manager (sometimes)

class Singleton:
    _instance = None
    # _instance will store the single created object

    def __new__(cls):
        # __new__ controls object creation (runs before __init__)
        if cls._instance is None:
            # If no instance exists yet, create one
            cls._instance = super().__new__(cls)
        return cls._instance
        # Always return the same instance

# Create objects
a = Singleton()
b = Singleton()

print(a is b)
# OUTPUT: True


In [None]:
# 2) Factory Pattern (Create objects in a clean way)
# “A function or class that creates and returns objects for you.”
# Instead of writing:
if type == "car": ...
elif type == "bike": ...
# everywhere, we centralize object creation in one place.

In [None]:
# Step 1: Create a base concept
class Payment:
    def pay(self, amount):
        raise NotImplementedError

# Step 2: Create concrete implementations
class CardPayment(Payment):
    def pay(self, amount):
        return f"Paid {amount} using Card"

class UPIPayment(Payment):
    def pay(self, amount):
        return f"Paid {amount} using UPI"

# Step 3: Create a factory function
def payment_factory(method):
    # Decide which object to create based on method
    if method == "card":
        return CardPayment()
    if method == "upi":
        return UPIPayment()
    raise ValueError("Unknown payment method")

# Step 4: Use the factory
p1 = payment_factory("card")
print(p1.pay(100))
# OUTPUT: Paid 100 using Card

p2 = payment_factory("upi")
print(p2.pay(200))
# OUTPUT: Paid 200 using UPI


In [None]:
# 3) Observer Pattern (Notify many listeners)
# Observer means:
# “When one thing changes, automatically notify many other things.”

# Example real-world usage:
# - Event systems (button click → notify all listeners)
# - Pub/Sub systems (message published → notify all subscribers)
# - YouTube channel (new video → notify all subscribers)
# - Stock price updates (price change → notify all traders)

# Step 1: Create the Subject (the thing being watched)
class NewsChannel:
    def __init__(self):
        self.subscribers = []
        # list of observer functions

    def subscribe(self, fn):
        # add a subscriber (observer)
        self.subscribers.append(fn)

    def unsubscribe(self, fn):
        # remove a subscriber
        self.subscribers.remove(fn)

    def publish(self, message):
        # notify all subscribers
        for fn in self.subscribers:
            fn(message)

# Step 2: Create observers (subscribers)
def mobile_user(msg):
    print("Mobile notification:", msg)

def email_user(msg):
    print("Email notification:", msg)

# Step 3: Connect them and publish updates
channel = NewsChannel()

channel.subscribe(mobile_user)
channel.subscribe(email_user)

channel.publish("New video uploaded!")
# OUTPUT:
# Mobile notification: New video uploaded!
# Email notification: New video uploaded!
# This way, when the news channel publishes a message, all subscribers are automatically notified without the channel needing to know about them individually.

In [None]:
# Use Singleton when:
# exactly one shared instance makes sense
# you need shared state/resource manager

# Use Factory when:
# object creation depends on input/config
# you want to avoid repeated if/elif object creation logic

# Use Observer when:
# one event should notify many receivers
# you want event-driven design

In [None]:
# Deploying and Monitoring Python Applications
# o Building CLI tools and daemons
# o Dockerizing Python apps
# o Health checks, observability, and error monitoring

In [None]:
# Building CLI tools and daemons
# A CLI tool is a Python program you run like this:
# python mytool.py --name Alice

# Simple CLI using argparse
import argparse
# argparse helps read arguments from command line

def main():
    parser = argparse.ArgumentParser(description="Simple greeting tool")
    # Create argument parser with a description

    parser.add_argument("--name", required=True, help="Your name")
    # --name is a required command-line argument

    args = parser.parse_args()
    # Parse arguments from command line

    print(f"Hello, {args.name}")
    # Use the argument

if __name__ == "__main__":
    main()

# python app.py --name Bob
# OuTPUT: Hello, Bob

In [None]:
import time
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("worker")

def run_worker():
    logger.info("Worker started")
    while True:
        logger.info("Doing background work...")
        time.sleep(5)
        # simulate periodic task

if __name__ == "__main__":
    run_worker()


In [None]:
# What is Dockerization?
# Docker lets you:
# Package your app + Python + dependencies into one container.

# Why Docker is used
# - same environment everywhere
# - easy scaling
# - simple CI/CD integration
# - cloud-friendly

# Simple Dockerized Python app
# A) Python app (app.py)
print("Hello from Dockerized Python app")

# B) Dockerfile (instructions for Docker)
# Use official Python image
FROM python:3.11-slim

# Set working directory inside container
WORKDIR /app

# Copy files into container
COPY app.py .

# Command to run when container starts
CMD ["python", "app.py"]

# C) Build Docker image
docker build -t my-python-app .

# D) Run Docker container
docker run my-python-app

# Output: 
# Hello from Dockerized Python app


In [None]:
# A health check answers:
# “Is the app running and ready to serve requests?”

# Used by:
# - Docker
# - Kubernetes
# - load balancers
# - monitoring systems

# Simple health check example
from http.server import BaseHTTPRequestHandler, HTTPServer

class HealthHandler(BaseHTTPRequestHandler):
    def do_GET(self):
        if self.path == "/health":
            self.send_response(200)
            self.end_headers()
            self.wfile.write(b"OK")
        else:
            self.send_response(404)
            self.end_headers()

server = HTTPServer(("0.0.0.0", 8000), HealthHandler)
server.serve_forever()
# This creates a simple HTTP server that responds to GET /health with 200 OK and "OK" in the body. You can use this endpoint for health checks in Docker/Kubernetes.

# Access it:
# curl http://localhost:8000/health

# Output: OK

In [None]:
# Observability means understanding your app through:
# logs
# metrics
# traces

# This helps answer:
# what happened?
# where did it fail?
# why is it slow?

# Metrics are numbers like:
# requests per second
# error count
# response time

In [None]:
# Error monitoring:
# - captures unhandled exceptions
# - sends alerts
# - stores stack traces
# - helps debug production issues

# Example: Capture and log errors
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("app")

def process_order(order_id):
    try:
        if order_id == 0:
            raise ValueError("Invalid order id")
    except Exception:
        logger.exception("Order processing failed")
# This will log the error with a stack trace, which is crucial for debugging production issues. You can integrate this with error monitoring services like Sentry for real-time alerts and tracking.