[Reference](https://medium.com/codex/say-goodbye-to-loops-in-python-and-welcome-vectorization-e4df66615a52)

# USE CASE 1: Finding the Sum of numbers


In [1]:
import time 
start = time.time()

 
# iterative sum
total = 0
# iterating through 1.5 Million numbers
for item in range(0, 1500000):
    total = total + item


print('sum is:' + str(total))
end = time.time()

print(end - start)

sum is:1124999250000
0.3833932876586914


In [2]:
import numpy as np

start = time.time()

# vectorized sum - using numpy for vectorization
# np.arange create the sequence of numbers from 0 to 1499999
print(np.sum(np.arange(1500000)))

end = time.time()

print(end - start)

1124999250000
0.028107166290283203


# USE CASE 2: Mathematical Operations (on DataFrame)


In [3]:
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(0, 50, size=(5000000, 4)), columns=('a','b','c','d'))
df.shape
# (5000000, 5)
df.head()

Unnamed: 0,a,b,c,d
0,29,25,36,46
1,42,17,32,41
2,49,11,44,32
3,7,28,6,46
4,11,19,38,23


In [4]:
import time 
start = time.time()

# Iterating through DataFrame using iterrows
for idx, row in df.iterrows():
    # creating a new column 
    df.at[idx,'ratio'] = 100 * (row["d"] / row["c"])  
end = time.time()
print(end - start)
### 109 Seconds

  df.at[idx,'ratio'] = 100 * (row["d"] / row["c"])
  df.at[idx,'ratio'] = 100 * (row["d"] / row["c"])


336.6297433376312


In [5]:
start = time.time()
df["ratio"] = 100 * (df["d"] / df["c"])

end = time.time()
print(end - start)

0.06600117683410645


# USE CASE 3: If-else Statements (on DataFrame)


In [6]:
import time 
start = time.time()

# Iterating through DataFrame using iterrows
for idx, row in df.iterrows():
    if row.a == 0:
        df.at[idx,'e'] = row.d    
    elif (row.a <= 25) & (row.a > 0):
        df.at[idx,'e'] = (row.b)-(row.c)    
    else:
        df.at[idx,'e'] = row.b + row.c

end = time.time()

print(end - start)

478.7003917694092


In [8]:
# using vectorization 

start = time.time()
df['e'] = df['b'] + df['c']
df.loc[df['a'] <= 25, 'e'] = df['b'] -df['c']
df.loc[df['a']==0, 'e'] = df['d']
end = time.time()
print(end - start)

0.5808093547821045


# USE CASE 4 (Advance): Solving Machine Learning/Deep Learning Networks


In [9]:
import numpy as np
# setting initial values of m 
m = np.random.rand(1,5)

# input values for 5 million rows
x = np.random.rand(5000000,5)

In [13]:
import numpy as np
m = np.random.rand(1,5)
x = np.random.rand(5000000,5)

total = 0
zer = np.arange(0,5000000)
tic = time.process_time()

for i in range(0,5000000):
    total = 0
    for j in range(0,5):
        total = total + x[i][j]*m[0][j] 
        
    zer[i] = total 

toc = time.process_time()
print ("Computation time = " + str((toc - tic)) + "seconds")

Computation time = 23.000777673000016seconds


In [14]:
tic = time.process_time()

#dot product 
np.dot(x,m.T) 

toc = time.process_time()
print ("Computation time = " + str((toc - tic)) + "seconds")

Computation time = 0.08809572099994512seconds
