Python Lists vs NumPy Arrays:
- NumPy arrays are faster (optimized in C).
- NumPy stores data in continuous memory blocks.
- Uses less memory (optimized storage).
- Built-in mathemetical functions.
- Supports direct vectorized operations.

In [3]:
# execution performance
import numpy as np
import time

size = 1_000_000

py_list = [i for i in range(size)]

st = time.time()
sq_list = [x**2 for x in py_list]
end = time.time()

print(f"time for py_list: {end-st}")

np_arr = np.array(py_list)
st = time.time()
sq_array = np_arr**2
end = time.time()

print(f"time for py_list: {end-st}")

time for py_list: 0.0825190544128418
time for py_list: 0.009235858917236328


In [4]:
import sys

print(f"size of python list: {sys.getsizeof(py_list) * len(py_list)} bytes.")

print(f"size of python list: {np_arr.nbytes} bytes.")

size of python list: 8448728000000 bytes.
size of python list: 8000000 bytes.


So why is Numpy faster?
- Homogeneous Data Types and Contiguous Memory
- Vector Operations & C Implementations
- Reduced Overhead

<b>Creation of numpy arrays</b>

In [None]:

arr1 = np.array([1,2,3,4,5])
print(arr1, type(arr1), arr1.dtype, arr1.shape)

arr2 = np.array([1,2,3,4,5,"prime"]) # numpy try to convert all ele of arr to one datatype
print(arr2, type(arr2), arr2.dtype, arr2.shape)

arr2D = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr2D, type(arr2D), arr2D.dtype, arr2D.shape)

[1 2 3 4 5] <class 'numpy.ndarray'> int64 (5,)
['1' '2' '3' '4' '5' 'prime'] <class 'numpy.ndarray'> <U21 (6,)
[[1 2 3]
 [4 5 6]
 [7 8 9]] <class 'numpy.ndarray'> int64 (3, 3)


In [None]:
arr0 = np.zeros((2,3), dtype="int64") # create arr with 0
print(arr0, type(arr0), arr0.dtype, arr0.shape)

arr1 = np.ones((2,)) # create arr with 1
print(arr1, type(arr1), arr1.dtype, arr1.shape)

arr2 = np.full((3,4), 100) # create arr with given val
print(arr2, type(arr2), arr2.dtype, arr2.shape)

arr3 = np.eye((3))  # creates identity matrix
print(arr3, type(arr3), arr3.dtype, arr3.shape)

arr4 = np.arange(1,11) #(st, stop(exclusive), step)
print(arr4, type(arr4), arr4.dtype, arr4.shape)

arr5 = np.linspace(1, 100, 4) # (st, end(inclusive), size_of_arr)
print(arr5, type(arr5), arr5.dtype, arr5.shape)


[[0 0 0]
 [0 0 0]] <class 'numpy.ndarray'> int64 (2, 3)
[1. 1.] <class 'numpy.ndarray'> float64 (2,)
[[100 100 100 100]
 [100 100 100 100]
 [100 100 100 100]] <class 'numpy.ndarray'> int64 (3, 4)
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]] <class 'numpy.ndarray'> float64 (3, 3)
[ 1  2  3  4  5  6  7  8  9 10] <class 'numpy.ndarray'> int64 (10,)
[  1.  34.  67. 100.] <class 'numpy.ndarray'> float64 (4,)


<b><ul>Properties</ul></b>

In [22]:
np_arr = np.array([[1,2,3],[4,5,6],[7,8,9]])

print(np_arr.shape) # dimensions m X n
print(np_arr.size) # no. of ele
print(np_arr.dtype) # datatype
print(np_arr.ndim)

float_arr = np_arr.astype(np.float64)
print(float_arr, type(float_arr), float_arr.dtype)

(3, 3)
9
int64
2
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]] <class 'numpy.ndarray'> float64
