In [1]:
!pip install numpy



In [1]:
import numpy as np

In [5]:
# creating a numpy array
arr = np.array([1, 2, 3, 4, 5])
print(arr, type(arr))

[1 2 3 4 5] <class 'numpy.ndarray'>


In [6]:
import time

# execution performance
size = 10_000_000

# python list
py_list = list(range(size))
start = time.time()
sq_list = [x**2 for x in py_list]
end = time.time()
print(f"python list time = {end-start} seconds")

# numpy array
np_arr = np.array(py_list)
start = time.time()
sq_array = np_arr**2
end = time.time()
print(f"numpy array time = {end-start} seconds")

python list time = 0.5795071125030518 seconds
numpy array time = 0.05550050735473633 seconds


In [7]:
#memory usage
import sys
print(f"python list size: {sys.getsizeof(py_list) * len(py_list)} bytes") # it have additional metadata 
print(f"numpy array size: {np_arr.nbytes} bytes")

python list size: 800000560000000 bytes
numpy array size: 80000000 bytes


In [4]:
# create - from list
arr = np.array([1, 2, 3, 4, 5])
print(arr, type(arr), arr.dtype, arr.shape)

arr2 = np.array([1, 2, 3, 4, 5, "prime"])
[print(arr2, type(arr2), arr2.dtype, arr2.shape)] # all converted to string, U means unicode 

arr2D = np.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
print(arr2D, type(arr2D), arr2D.dtype, arr2D.shape)

[1 2 3 4 5] <class 'numpy.ndarray'> int64 (5,)
['1' '2' '3' '4' '5' 'prime'] <class 'numpy.ndarray'> <U21 (6,)
[[1 2 3]
 [4 5 6]
 [7 8 9]] <class 'numpy.ndarray'> int64 (3, 3)


In [13]:
# creating numpy arrays from scratch
arr1 = np.zeros((2, 3)) # prefilled with zeros # initially its float64
arr1 = np.zeros((2, 3), dtype=np.int32) #changed from float64 to int32
print(arr1, arr1.dtype, arr1.shape)

arr2 = np.ones((3, 4)) # prefilled with ones
print(arr2, arr2.dtype, arr2.shape)

arr3 = np.full((2, 2), 7) # prefilled with any specific value 7
print(arr3, arr3.dtype, arr3.shape)

arr4 = np.eye(3) # identity matrix
print(arr4, arr4.dtype, arr4.shape)

arr5 = np.arange(1, 11, 2) # start, stop, step - range elements
print(arr5, arr5.dtype, arr5.shape)

arr6 = np.linspace(0, 1, 5) # start, stop, number of elements - evenly spaced
print(arr6, arr6.dtype, arr6.shape)

[[0 0 0]
 [0 0 0]] int32 (2, 3)
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]] float64 (3, 4)
[[7 7]
 [7 7]] int64 (2, 2)
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]] float64 (3, 3)
[1 3 5 7 9] int64 (5,)
[0.   0.25 0.5  0.75 1.  ] float64 (5,)


In [14]:
# properties
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

print(arr.shape) # (3, 3) - rows, columns
print(arr.size) # 9 - total elements
print(arr.dtype) # int64 - data type of elements
print(arr.ndim) # 2D - number of dimensions

float_arr = arr.astype(np.float64)
print(float_arr, float_arr.dtype) # converted to float64

(3, 3)
9
int64
2
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]] float64


In [16]:
# operations on arrays
# 1. reshaping arrays
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr, arr.shape)

reshaped = arr.reshape((3, 2))
print(reshaped, reshaped.shape)

flattened = arr.flatten() # to 1D array
print(flattened, flattened.shape)

[[1 2 3]
 [4 5 6]] (2, 3)
[[1 2]
 [3 4]
 [5 6]] (3, 2)
[1 2 3 4 5 6] (6,)


In [None]:
# operations on arrays
# 2. indexing on arrays
arr = np.array([1, 2, 3, 4, 5])

print(arr[0]) # first element
print(arr[-1]) # last element

print(".........................")

arr2D = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2D[0][2]) # first row, third column
print(arr2D[1, 1]) # second row, second column

print(".........................")

#3. fancy indexing - selecting multiple elements at once
arr = np.array([1, 2, 3, 4, 5])
idx = [0, 2, 4]
print(arr[idx]) # elements at index 0, 2, 4

print(".........................")

# boolean indexing
arr = np.array([1, 2, 3, 4, 5])
print(arr[arr > 3]) # elements greater than 3
print(arr[(arr % 2 == 0)]) # even elements
print(arr[(arr % 2 != 0)]) # odd elements

1
5
.........................
3
5
.........................
[1 3 5]
.........................
[4 5]
[2 4]
[1 3 5]


In [23]:
#4. slicing arrays
arr = np.array([1, 2, 3, 4, 5])

# [start:stop:step]
print(arr[1:4])
print(arr[:3]) # first 3 elements
print(arr[2:]) # from index 2 to end
print(arr[::2]) # every second element

[2 3 4]
[1 2 3]
[3 4 5]
[1 3 5]


In [26]:
# copy vs view
nums = [1, 2, 3, 4, 5]
sub_list = nums[1:3] # slicing creates a new list (copy)
print(sub_list)
sub_list[0] = 200
print(sub_list)
print(nums) # original list unchanged

arr = np.array([1, 2, 3, 4, 5])
sub_arr = arr[1:3].copy() # slicing creates a view (not a copy) . Use .copy() to create a copy
print(sub_arr)
sub_arr[0] = 200
print(sub_arr)
print(arr)

[2, 3]
[200, 3]
[1, 2, 3, 4, 5]
[2 3]
[200   3]
[1 2 3 4 5]


In [4]:
# data type
arr = np.array([1, 2, 3, 4, 5])
print(arr,arr.dtype) # int64

arr_comp = np.array([3+5j])
print(arr_comp, arr_comp.dtype) # complex128

arr_obj = np.array(["hello", {1, 2, 3}, 3.14])
print(arr_obj, arr_obj.dtype) # object

[1 2 3 4 5] int64
[3.+5.j] complex128
['hello' {1, 2, 3} 3.14] object


In [7]:
# multidimensional arrays 
arr2D = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2D)

print(np.sum(arr2D))          # sum of all elements
sum_of_coloumns = np.sum(arr2D, axis=0)  # sum of each column
print(sum_of_coloumns)

sum_of_rows = np.sum(arr2D, axis=1)  # sum of each row
print(sum_of_rows)

print(arr2D[0:2, 1:2]) # slicing 2D array - first 2 rows, second column 0:2, 1:2 means row1 to row2 and column2 to column2

[[1 2 3]
 [4 5 6]
 [7 8 9]]
45
[12 15 18]
[ 6 15 24]
[[2]
 [5]]


In [12]:
# 3D array
arr3D = np.array([[[1, 2], [3, 4],[5, 6]], [[7, 8], [9, 10], [11, 12]]])# 2 blocks, each with 3 rows and 2 columns 2x3x2
print(arr3D, arr3D.shape)

#indexing
print(arr3D[1,1,0])

#slicing
print(arr3D[:, 0, :]) # first row from both layers/blocks
print(arr3D[ :, :, 0]) # first col from both layers/blocks

[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]] (2, 3, 2)
9
[[1 2]
 [7 8]]
[[ 1  3  5]
 [ 7  9 11]]


***

### Vectorization & Broadcasting

<b>1. Vectorization</b>: It is the process of applying operations to entire arrays or sequences of data at once, rather than iterating through individual elements using explicit loops. 

<b>2. Broadcasting</b>: It is a mechanism that allows NumPy to perform operations on arrays of different shapes without explicitly reshaping them (creating copies of the smaller array to match the larger array's shape). 

<b> Broadcasting Condition <b>

For broadcasting to happen the dimensions should be compatible. Numpy compares shape element-wise. It starts dimension comparison with the trailing (i.e. rightmost) dimension and works its way left. 
Two dimensions are compatible when <ol> <li>they are equal, or</li> <li>one of them is 1. </li>

In [15]:
# vectorization 
arr1 = np.array([1, 2, 3, 4, 5])
arr2 = np.array([10, 20, 30, 40, 50])
print(arr1 ** 2)
print(arr1 + arr2)

[ 1  4  9 16 25]
[11 22 33 44 55]


In [20]:
# broadcasting - scaling arrays without using extra memory & allows operations on array of diff shapes
print(arr1 + 5)
arr3 = np.array([1, 2, 3, 4, 5])
arr4 = np.array([[10, 20, 30, 40, 50],[10, 20, 30, 40, 50]])
print(arr3 + arr4)

[ 6  7  8  9 10]
[[11 22 33 44 55]
 [11 22 33 44 55]]


In [None]:
# normalize - mean = 0, std_dev = 1 
arr = np.array([[1, 2], [3, 4]])

mean = np.mean(arr)
std_dev = np.std(arr)

normalized_arr = (arr - mean) / std_dev
print(normalized_arr)

# mean = 0, std_dev = 1
print(mean)
print(std_dev)

[[-1.34164079 -0.4472136 ]
 [ 0.4472136   1.34164079]]
2.5
1.118033988749895


In [None]:
# mathematical functions
# aggregate functions
arr = np.array([1, 2, 3, 4, 5])
print(np.sum(arr))     
print(np.prod(arr))
print(np.mean(arr))
print(np.std(arr))
print(np.var(arr))
print(np.min(arr)) 
print(np.max(arr))
print(np.argmin(arr)) # index of min element
print(np.argmax(arr)) # index of max element

15
120
3.0
1.4142135623730951
2.0
1
5
0
4


In [25]:
#power functions
arr = np.array([1, 2, 3, 4, 5])
print(np.sqrt(arr)) # square root
print(np.square(arr)) # square
print(np.pow(arr, 3)) # power of 3

[1.         1.41421356 1.73205081 2.         2.23606798]
[ 1  4  9 16 25]
[  1   8  27  64 125]


In [27]:
# logarithmic functions
arr = np.array([1, 2, 3, 4, 5])

print(np.log(arr)) # natural log
print(np.log10(arr)) # log base 10
print(np.log2(arr)) # log base 2
print(np.exp(arr)) # exponential e^x

[0.         0.69314718 1.09861229 1.38629436 1.60943791]
[0.         0.30103    0.47712125 0.60205999 0.69897   ]
[0.         1.         1.5849625  2.         2.32192809]
[  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591 ]


In [33]:
# rounding functions
arr = np.array([1, 2, -3, 4, -5])
print(np.round(arr)) # round to nearest integer
print(np.floor(arr + 0.7)) # round down
print(np.ceil(arr + 0.3)) # round up
print(np.trunc(arr + 0.9)) # truncate decimal part


# extra functions
print(np.unique(arr)) # unique elements
print(np.sort(arr)) # sorted array
print(np.abs(arr)) # absolute values

[ 1  2 -3  4 -5]
[ 1.  2. -3.  4. -5.]
[ 2.  3. -2.  5. -4.]
[ 1.  2. -2.  4. -4.]
[-5 -3  1  2  4]
[-5 -3  1  2  4]
[1 2 3 4 5]


In [34]:
print(np.round(2.678))    # 3.0
print(np.floor(2.678))    # 2.0
print(np.ceil(2.678))     # 3.0
print(np.trunc(2.678))    # 2.0

3.0
2.0
3.0
2.0
