# Comparaciones de rendimiento

Calculemos raíces cuadradas de varios números de distintas maneras:

### Python normal:

In [None]:
from math import sqrt
import time as tm

In [None]:
n = 10000000

In [None]:
print("Comenzando a calcular...")
start = tm.time()
normal_results = [sqrt(i) for i in range(n)]
end = tm.time()
print(f"Tiempo total: {end - start}s")

## Numpy:

In [None]:
import numpy as np
import time as tm

In [None]:
print("Comenzando a calcular...")
start = tm.time()
data = np.arange(n)
numpy_results = np.sqrt(data)
end = tm.time()
print(f"Tiempo total: {end - start}s")

## Joblib

In [None]:
import numpy as np
from math import sqrt
from joblib import Parallel
from joblib import delayed
import time as tm

Dos trabajadores:

In [None]:
print("Comenzando a calcular...")
start = tm.time()
parallel_pool = Parallel(n_jobs=2)
parallel_sqrt = delayed(sqrt)
parallel_tasks = [parallel_sqrt(i) for i in range(n)]
parallel_results = parallel_pool(parallel_tasks)
end = tm.time()
print(f"Tiempo total: {end - start}s")

Cuatro trabajadores:

In [None]:
print("Comenzando a calcular...")
start = tm.time()
parallel_pool = Parallel(n_jobs=4)
parallel_sqrt = delayed(sqrt)
parallel_tasks = [parallel_sqrt(i) for i in range(n)]
parallel_results = parallel_pool(parallel_tasks)
end = tm.time()
print(f"Tiempo total: {end - start}s")

¿Qué ocurre si usamos la función raíz de numpy?

In [None]:
print("Comenzando a calcular...")
start = tm.time()
parallel_pool = Parallel(n_jobs=2)
parallel_sqrt = delayed(np.sqrt) # Notar la diferencia
parallel_tasks = [parallel_sqrt(i) for i in range(n)]
parallel_results = parallel_pool(parallel_tasks)
end = tm.time()
print(f"Tiempo total: {end - start}s")

Finalmente con batch_size fijo:

In [None]:
print("Comenzando a calcular...")
start = tm.time()
parallel_pool = Parallel(n_jobs=2, batch_size=100000)
parallel_sqrt = delayed(sqrt)
parallel_tasks = [parallel_sqrt(i) for i in range(n)]
parallel_results = parallel_pool(parallel_tasks)
end = tm.time()
print(f"Tiempo total: {end - start}s")

In [None]:
print("Comenzando a calcular...")
start = tm.time()
parallel_pool = Parallel(n_jobs=2, batch_size=500000)
parallel_sqrt = delayed(sqrt)
parallel_tasks = [parallel_sqrt(i) for i in range(n)]
parallel_results = parallel_pool(parallel_tasks)
end = tm.time()
print(f"Tiempo total: {end - start}s")

In [None]:
print("Comenzando a calcular...")
start = tm.time()
parallel_pool = Parallel(n_jobs=4, batch_size=int(n/4))
parallel_sqrt = delayed(sqrt)
parallel_tasks = [parallel_sqrt(i) for i in range(n)]
parallel_results = parallel_pool(parallel_tasks)
end = tm.time()
print(f"Tiempo total: {end - start}s")