### Define an employee structured datatype

In [80]:
import numpy as np

npEmpDataDef = np.dtype(dtype=[('id', np.uint), ('name', 'S64'),('age', np.ubyte), ('height', np.ubyte)]) # or np.str_

### define init and helper functions

In [81]:
def assignEmpName(npEmpArr):
    for i in range(len(npEmpArr)):
        npEmpArr[i]['name'] = "name" + str(i)


def assignEmpID(npEmpArr):
    for i in range(len(npEmpArr)):
        npEmpArr[i]['id'] = i+1


def assignEmpAge(npEmpArr):
    for i in range(len(npEmpArr)):
        npEmpArr[i]['age'] = np.random.randint(20, high=60)


def assignEmpHeight(npEmpArr):
    for i in range(len(npEmpArr)):
        npEmpArr[i]['height'] = np.random.randint(150, high=180)


def findAvgs(npEmpArr) -> tuple:
    sum_height = 0
    sum_age = 0
    for i in range(len(npEmpArr)):
        sum_height += npEmpArr[i]['height']
        sum_age += npEmpArr[i]['age']
    
    return sum_age/len(npEmpArr), sum_height/len(npEmpArr)

def findAvgsUsingNumpyRoutine(npEmpArr) -> tuple:
    return np.average(npEmpArr["age"]), np.average(npEmpArr["height"])

### Using our functions on an array

In [82]:
empNum = 1000000 # number of employees

npEmpArr = np.zeros(empNum, dtype = npEmpDataDef)

In [83]:
npEmpArr

array([(0, b'', 0, 0), (0, b'', 0, 0), (0, b'', 0, 0), ...,
       (0, b'', 0, 0), (0, b'', 0, 0), (0, b'', 0, 0)],
      dtype=[('id', '<u4'), ('name', 'S64'), ('age', 'u1'), ('height', 'u1')])

In [84]:
assignEmpName(npEmpArr)
assignEmpAge(npEmpArr)
assignEmpHeight(npEmpArr)
assignEmpID(npEmpArr)

In [85]:
npEmpArr[:5]

array([(1, b'name0', 58, 162), (2, b'name1', 43, 157),
       (3, b'name2', 37, 155), (4, b'name3', 44, 152),
       (5, b'name4', 48, 160)],
      dtype=[('id', '<u4'), ('name', 'S64'), ('age', 'u1'), ('height', 'u1')])

In [86]:
import time

start = time.time_ns()
age_avg, height_avg = findAvgs(npEmpArr)
time_taken_normal = time.time_ns()-start

start = time.time_ns()
age_avg_np, height_avg_np = findAvgsUsingNumpyRoutine(npEmpArr)
time_taken_np = time.time_ns()-start

our custom function's results

In [87]:
display((age_avg, height_avg))
print("time taken(ns) :",time_taken_normal)

(39.489147, 164.497963)

time taken(ns) : 3814146300


numpy's results

In [88]:
display((age_avg_np, height_avg_np))
print("time taken using numpy(ns):",time_taken_np)

(39.489147, 164.497963)

time taken using numpy(ns): 18152800


In [89]:
time_difference = time_taken_normal - time_taken_np
print("difference in computation time(ns) :", time_difference)

difference in computation time(ns) : 3795993500


Numpy is probably performing better because for loops in python are slower. Numpy uses C under the hood, and any loop in it will almost always be faster than python loops. Morever, numpy uses an optimized implementation for calculating average