[Reference](https://blog.stackademic.com/optimizing-python-performance-tips-and-tricks-for-faster-data-processing-38992cf57dcd)

In [1]:
import cProfile

def my_function():
    # Code to be profiled
    ...

if __name__ == '__main__':
    cProfile.run('my_function()')

         4 function calls in 0.000 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <ipython-input-1-367c8f714e24>:3(my_function)
        1    0.000    0.000    0.000    0.000 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




In [2]:
import numpy as np

# Slow loop-based approach
result = []
for i in range(1000000):
    result.append(i * 2)

# Fast vectorized approach
arr = np.arange(1000000)
result = arr * 2

In [4]:
import multiprocessing

def process_data(data):
    # CPU-intensive data processing
    ...

if __name__ == '__main__':
    pool = multiprocessing.Pool()
    data_chunks = [data[i:i+1000] for i in range(0, len(data), 1000)]
    results = pool.map(process_data, data_chunks)
    pool.close()
    pool.join()

In [5]:
# cython_example.pyx
def sum_squares(int n):
    cdef int i, total = 0
    for i in range(n):
        total += i * i
    return total

# setup.py
from distutils.core import setup
from Cython.Build import cythonize

setup(
    ext_modules = cythonize("cython_example.pyx")
)

In [6]:
from numba import jit

@jit(nopython=True)
def sum_squares(n):
    total = 0
    for i in range(n):
        total += i * i
    return total

In [8]:
def process_data(file_path):
    with open(file_path) as file:
        for line in file:
            # Process each line
            ...
            yield result

# Iterate over the generator
for result in process_data('large_file.txt'):
    # Do something with each result
    ...

In [9]:
from functools import lru_cache

@lru_cache(maxsize=None)
def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

In [11]:
import asyncio
import aiohttp

async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            return await response.text()

async def main():
    urls = [
        'http://example.com',
        'http://example.org',
        'http://example.net'
    ]
    tasks = []
    for url in urls:
        task = asyncio.ensure_future(fetch(url))
        tasks.append(task)
    results = await asyncio.gather(*tasks)
    for result in results:
        print(result)

loop = asyncio.get_event_loop()
loop.run_until_complete(main())

In [12]:
import dask.array as da

# Create a large array
x = da.random.random((10000, 10000), chunks=(1000, 1000))

# Compute the mean
result = x.mean().compute()
print(result)

0.49992790469129333


In [14]:
import io

with open('large_file.txt', 'rb') as file:
    buffer = io.BufferedReader(file)
    for line in buffer:
        # Process each line
        ...

In [15]:
import numpy as np
import multiprocessing
from functools import lru_cache
from numba import jit

# Vectorized function using NumPy
def vectorized_operation(arr):
    return np.sqrt(arr) + np.sin(arr)

# CPU-bound function for parallel processing
def cpu_bound_operation(n):
    total = 0
    for i in range(n):
        total += i * i
    return total

# Memoized recursive function
@lru_cache(maxsize=None)
def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

# JIT-compiled function using Numba
@jit(nopython=True)
def sum_squares(n):
    total = 0
    for i in range(n):
        total += i * i
    return total

# Parallel processing using multiprocessing
def parallel_processing(n):
    pool = multiprocessing.Pool()
    results = pool.map(cpu_bound_operation, range(n))
    pool.close()
    pool.join()
    return sum(results)

# Main function
def main():
    # Vectorized operation
    arr = np.random.rand(1000000)
    result = vectorized_operation(arr)
    print("Vectorized operation result:", result[:10])

    # Parallel processing
    n = 1000
    result = parallel_processing(n)
    print("Parallel processing result:", result)

    # Memoized recursive function
    n = 30
    result = fibonacci(n)
    print("Fibonacci result:", result)

    # JIT-compiled function
    n = 1000000
    result = sum_squares(n)
    print("Sum of squares result:", result)

if __name__ == '__main__':
    main()