Python Lists vs NumPy Arrays:
- NumPy arrays are faster (optimized in C).
- NumPy stores data in continuous memory blocks.
- Uses less memory (optimized storage).
- Built-in mathemetical functions.
- Supports direct vectorized operations.

In [2]:
# execution performance
import numpy as np
import time

size = 1_000_000

py_list = [i for i in range(size)]

st = time.time()
sq_list = [x**2 for x in py_list]
end = time.time()

print(f"time for py_list: {end-st}")

np_arr = np.array(py_list)
st = time.time()
sq_array = np_arr**2
end = time.time()

print(f"time for py_list: {end-st}")

time for py_list: 0.1991126537322998
time for py_list: 0.015485048294067383


In [3]:
import sys

print(f"size of python list: {sys.getsizeof(py_list) * len(py_list)} bytes.")

print(f"size of python list: {np_arr.nbytes} bytes.")

size of python list: 8448728000000 bytes.
size of python list: 8000000 bytes.


So why is Numpy faster?
- Homogeneous Data Types and Contiguous Memory
- Vector Operations & C Implementations
- Reduced Overhead

<b>Creation of numpy arrays</b>

In [4]:

arr1 = np.array([1,2,3,4,5])
print(arr1, type(arr1), arr1.dtype, arr1.shape)

arr2 = np.array([1,2,3,4,5,"prime"]) # numpy try to convert all ele of arr to one datatype
print(arr2, type(arr2), arr2.dtype, arr2.shape)

arr2D = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr2D, type(arr2D), arr2D.dtype, arr2D.shape)

[1 2 3 4 5] <class 'numpy.ndarray'> int64 (5,)
['1' '2' '3' '4' '5' 'prime'] <class 'numpy.ndarray'> <U21 (6,)
[[1 2 3]
 [4 5 6]
 [7 8 9]] <class 'numpy.ndarray'> int64 (3, 3)


In [5]:
arr0 = np.zeros((2,3), dtype="int64") # create arr with 0
print(arr0, type(arr0), arr0.dtype, arr0.shape)

arr1 = np.ones((2,)) # create arr with 1
print(arr1, type(arr1), arr1.dtype, arr1.shape)

arr2 = np.full((3,4), 100) # create arr with given val
print(arr2, type(arr2), arr2.dtype, arr2.shape)

arr3 = np.eye((3))  # creates identity matrix
print(arr3, type(arr3), arr3.dtype, arr3.shape)

arr4 = np.arange(1,11) #(st, stop(exclusive), step)
print(arr4, type(arr4), arr4.dtype, arr4.shape)

arr5 = np.linspace(1, 100, 4) # (st, end(inclusive), size_of_arr)
print(arr5, type(arr5), arr5.dtype, arr5.shape)


[[0 0 0]
 [0 0 0]] <class 'numpy.ndarray'> int64 (2, 3)
[1. 1.] <class 'numpy.ndarray'> float64 (2,)
[[100 100 100 100]
 [100 100 100 100]
 [100 100 100 100]] <class 'numpy.ndarray'> int64 (3, 4)
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]] <class 'numpy.ndarray'> float64 (3, 3)
[ 1  2  3  4  5  6  7  8  9 10] <class 'numpy.ndarray'> int64 (10,)
[  1.  34.  67. 100.] <class 'numpy.ndarray'> float64 (4,)


<b><ul>Properties</ul></b>

In [6]:
np_arr = np.array([[1,2,3],[4,5,6],[7,8,9]])

print(np_arr.shape) # dimensions m X n
print(np_arr.size) # no. of ele
print(np_arr.dtype) # datatype
print(np_arr.ndim)

float_arr = np_arr.astype(np.float64)
print(float_arr, type(float_arr), float_arr.dtype)

(3, 3)
9
int64
2
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]] <class 'numpy.ndarray'> float64


<b>Reshaping Arrays</b>

In [11]:
arr = np.array([[1,2,3],[1,2,3]])
print(arr)

reshaped = arr.reshape((3,2))
print(reshaped)

# we should have same size of both arrays

flattened = arr.flatten() # 2d => 1d
print(flattened)

[[1 2 3]
 [1 2 3]]
[[1 2]
 [3 1]
 [2 3]]
[1 2 3 1 2 3]


<b>Indexing</b>

In [None]:
# similar to normal arrays

# Fancy Indexing

arr = np.array([1,2,3,0,5,6])
idx = [0, 3, 5]

print(arr[idx])

# Boolean Indexing

print(arr[arr > 2]) 
print(arr[arr > 3])
print(arr[arr%2 == 0]) # all even ele.


[1 0 6]
[3 5 6]
[5 6]
[2 0 6]


<b>Slicing</b>

In [None]:
print(arr[1:6:2]) # [st : end : step]

# py_list when done slicing on them creates a copy of that so that why they are slower
# np_arr when done slicing they form a view no more memory user no any operations hence fast


[2 0 6]


In [27]:
# ---------------------------Python List-----------------

print("py_list creates a new array: ")
py_list = [1,2,3,4,5]
sub_list = py_list[1:4]

sub_list[1] = 200

print(py_list)
print(sub_list)

# ---------------------Numpy Array-----------------------

print("np_arr creates view: ")
np_arr = np.array([1,2,3,4,5])
sub_arr = np_arr[1:4]

sub_arr[0] = 200

print(np_arr)
print(sub_arr)

py_list creates a new array: 
[1, 2, 3, 4, 5]
[2, 200, 4]
np_arr creates view: 
[  1 200   3   4   5]
[200   3   4]
