In [0]:
%pip install matplotlib numpy

In [0]:
import numpy as np
import matplotlib.pyplot as plt

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pyarrow as pa
import pyarrow.compute as pc
import time

def generate_vectorization_visual():
    # Define array size
    N = 1_000_000
    x = np.random.rand(N)  # Random array

    # Loop-based computation (inefficient)
    start = time.time()
    y_loop = [i**2 for i in x]  # List comprehension
    loop_time = time.time() - start

    # NumPy vectorized computation
    start = time.time()
    y_numpy = x**2  # NumPy broadcasting
    numpy_time = time.time() - start

    # PyArrow vectorized computation
    x_arrow = pa.array(x)  # Convert NumPy array to PyArrow array
    start = time.time()
    y_arrow = pc.power(x_arrow, 2)  # PyArrow vectorized operation
    arrow_time = time.time() - start

    # Create a more visually appealing bar chart
    plt.figure(figsize=(8, 5))
    bars = plt.bar(
        ["Loop-based", "NumPy", "PyArrow"],
        [loop_time, numpy_time, arrow_time],
        color=["red", "green", "blue"],
        edgecolor="black",
        linewidth=1.2,
        alpha=0.75,
    )

    # Add value labels on top of bars
    for bar in bars:
        height = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            height + 0.001,
            f"{height:.2f}",
            ha="center",
            fontsize=12,
            fontweight="bold",
        )

    # Add labels and title with improved font styling
    plt.ylabel("Execution Time (seconds)", fontsize=12, fontweight="bold")
    plt.title("Loop vs. NumPy vs. PyArrow Vectorization in Python", fontsize=14, fontweight="bold")

    # Improve grid visibility
    plt.grid(axis="y", linestyle="--", alpha=0.6)

    plt.show()

In [0]:
if __name__ == "__main__":
    # Generate visualization and create PowerPoint
    generate_vectorization_visual()