[Reference](https://python.plainenglish.io/vectorization-in-pandas-simplifying-data-operations-3a4fda08a184)

[Reference](https://python.plainenglish.io/vectorization-in-pandas-simplifying-data-operations-3a4fda08a184)



# Example 1: Basic Arithmetic Operations

In [1]:
import pandas as pd

data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
# Using vectorization to add columns 'A' and 'B'
df['C'] = df['A'] + df['B']
print(df['C'])

0    5
1    7
2    9
Name: C, dtype: int64


# Example 2: Applying Functions

In [2]:
import pandas as pd

data = {'A': [1, 2, 3]}
df = pd.DataFrame(data)
# Define a custom function
def square(x):
    return x ** 2

# Applying the 'square' function to the 'A' column
df['A_squared'] = df['A'].apply(square)
print(df['A_squared'])

0    1
1    4
2    9
Name: A_squared, dtype: int64


# Example 3: Conditional Operations

In [3]:
import pandas as pd

data = {'A': [1, 2, 3]}
df = pd.DataFrame(data)

# Creating a new column 'D' based on a condition in column 'A'
df['D'] = df['A'].apply(lambda x: 'Even' if x % 2 == 0 else 'Odd')

print(df)

   A     D
0  1   Odd
1  2  Even
2  3   Odd


# Traditional Loop-Based Processing

In [4]:
# Adding two lists element-wise without vectorization
list1 = [1, 2, 3, 4, 5]
list2 = [6, 7, 8, 9, 10]
result = []

for i in range(len(list1)):
    result.append(list1[i] + list2[i])
print(result)

[7, 9, 11, 13, 15]


# Vectorized Processing with NumPy

In [5]:
import numpy as np

# Adding two NumPy arrays element-wise with vectorization
array1 = np.array([1, 2, 3, 4, 5])
array2 = np.array([6, 7, 8, 9, 10])
result = array1 + array2
print(result)

[ 7  9 11 13 15]


# Efficiency Comparison: NumPy Vectorized vs. Traditional Loop-Based Element-Wise Addition

In [6]:
import numpy as np
import timeit

# Create two NumPy arrays and two lists for the comparison
array1 = np.random.randint(1, 100, size=1000000)
array2 = np.random.randint(1, 100, size=1000000)
list1 = list(array1)
list2 = list(array2)

# Vectorized processing with NumPy
def numpy_vectorized():
    result = array1 + array2

# Traditional loop-based processing
def loop_based():
    result = []
    for i in range(len(list1)):
        result.append(list1[i] + list2[i])

# Measure execution time for NumPy vectorized approach
numpy_time = timeit.timeit(numpy_vectorized, number=100)

# Measure execution time for traditional loop-based approach
loop_time = timeit.timeit(loop_based, number=100)

print(f"NumPy Vectorized Approach: {numpy_time:.5f} seconds")
print(f"Traditional Loop-Based Approach: {loop_time:.5f} seconds")

NumPy Vectorized Approach: 0.19820 seconds
Traditional Loop-Based Approach: 24.03206 seconds
