### Python Lists vs NumPy Arrays 

In [1]:
import numpy as np
import time

In [2]:
# NumPy Arrays vs Python Lists - Execution time comparison

# large dataset
size = 100_000_000  # 100 million numbers

# Python Lists
python_list = list(range(size))

start = time.time()
list_squared = [x**2 for x in python_list]  # square of all nums
end = time.time()
print("Python list time:", end - start, "seconds")

# NumPy Arrays
numpy_array = np.array(python_list)
start = time.time()
# Vectorization
sq_array = numpy_array ** 2
end = time.time()
print("numpy array time:", end - start, "seconds")

Python list time: 7.1896727085113525 seconds
numpy array time: 0.32616233825683594 seconds


In [3]:
#memory
import sys

print(f"Python list size = {sys.getsizeof(python_list) * len(python_list)} bytes")
print(f"numpy array size = {numpy_array.nbytes} bytes")

Python list size = 80000005600000000 bytes
numpy array size = 800000000 bytes


*** 
### NumPy Arrays

In [4]:
#create - from lists
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1, type(arr1), arr1.dtype, arr1.shape)

arr2 = np.array([1, 2, 3, 4, 5, "Shree"])
print(arr2, type(arr2), arr2.dtype, arr2.shape)

arr2D = np.array([[1, 2, 3],[4, 5, 6],[7, 8, 9], [10, 11, 12]])
print(arr2D, arr2D.shape)

[1 2 3 4 5] <class 'numpy.ndarray'> int64 (5,)
['1' '2' '3' '4' '5' 'Shree'] <class 'numpy.ndarray'> <U21 (6,)
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]] (4, 3)


In [5]:
#create
arr1 = np.zeros((2, 3), dtype = "int64") #prefill
print(arr1, arr1.shape)

arr2 = np.ones((4,), dtype = "int64") #prefill
print(arr2, arr2.shape)

arr3 = np.full((3, 4), 100) #prefill with val
print(arr3, arr3.shape)

arr4 = np.eye((3)) #identity matrix
print(arr4, arr4.shape)

arr5 = np.arange(1, 11, 2) #Start Stop Step - range elements
print(arr5, arr5.shape)

arr6 = np.linspace(1, 100, 3) #evenly spaced arrays
print(arr6, arr6.shape)

[[0 0 0]
 [0 0 0]] (2, 3)
[1 1 1 1] (4,)
[[100 100 100 100]
 [100 100 100 100]
 [100 100 100 100]] (3, 4)
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]] (3, 3)
[1 3 5 7 9] (5,)
[  1.   50.5 100. ] (3,)


In [6]:
 # Properties
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

print(arr.shape) #dimensions = m x n
print(arr.size) #total elements - 9
print(arr.dtype) #arr type
print(arr.ndim) #arr dim

float_arr = arr.astype(np.float64)
print(float_arr, float_arr.dtype)

(3, 3)
9
int64
2
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]] float64


In [7]:
#Operations on arrays

arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr, arr.shape)

reshaped = arr.reshape((3, 2))
print(reshaped, reshaped.shape)

flattened = arr.flatten() #2D --> 1D
print(flattened, flattened.shape) 

[[1 2 3]
 [4 5 6]] (2, 3)
[[1 2]
 [3 4]
 [5 6]] (3, 2)
[1 2 3 4 5 6] (6,)


In [8]:
#Indexing
# arr = np.array([1, 2, 3, 4, 5])
# print(arr[0]) #1
# print(arr[2]) #3
# print(arr[-1]) #5

arr = np.array([[1, 2, 3],[4, 5, 6]])
print(arr)
print(arr[0][0])
print(arr[0][1])
print(arr[1][0])
print(arr[1][2])

#Fancy Indexing
arr = np.array([1, 2, 3, 4, 5])

idx = [0, 1, 3]
print(arr[idx])

#Boolean Indexing
arr = np.array([1, 2, 3, 4, 5])

print(arr[arr > 2])
print(arr[arr % 2 == 0])
print(arr[arr % 2 != 0])

[[1 2 3]
 [4 5 6]]
1
2
4
6
[1 2 4]
[3 4 5]
[2 4]
[1 3 5]


In [9]:
#Slicing
arr = np.array([1, 2, 3, 4, 5])

print(arr[1:4]) # Start : Stop : Step
print(arr[:])
print(arr[1:])
print(arr[:1])
print(arr[::-1])
print(arr[::2])

[2 3 4]
[1 2 3 4 5]
[2 3 4 5]
[1]
[5 4 3 2 1]
[1 3 5]


In [10]:
#Copy vs View
nums = [1, 2, 3, 4, 5]
sub_list = nums[1:3]
print(sub_list)
sub_list[0] = 200
print(sub_list)
print(nums)
print("\n")
arr = np.array([1, 2, 3, 4, 5])
sub_arr = arr[1:3].copy() #Here if you remove .copy() --> result would bee change in main arr
print(sub_arr)
sub_arr[0] = 200
print(sub_arr)
print(arr)

[2, 3]
[200, 3]
[1, 2, 3, 4, 5]


[2 3]
[200   3]
[1 2 3 4 5]


In [17]:
#Common NumPY Data Types
arr = np.array([1, 2, 3.5, 4, 5, "prime"])
print(arr, arr.dtype)

arr1 = np.array([3 + 5j])
arr2 = np.array([2 + 3j])

print(arr1 + arr2)
print(arr1 - arr2)

arr = np.array(["prime", {1, 2, 3}, 3.14])

print(arr, arr.dtype)

['1' '2' '3.5' '4' '5' 'prime'] <U32
[5.+8.j]
[1.+2.j]
['prime' {1, 2, 3} 3.14] object


In [23]:
# Multi-dimensional arrays

arr2D = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2D)

print(np.sum(arr2D))

sum_of_colums = np.sum(arr2D, axis = 0)
print(sum_of_colums)

sum_of_rows = np.sum(arr2D, axis = 1)
print(sum_of_rows)

print(arr2D[0:2, 1:2])

[[1 2 3]
 [4 5 6]
 [7 8 9]]
45
[12 15 18]
[ 6 15 24]
[[2]
 [5]]


In [37]:
#3D array

arr3D = np.array([ [ [1, 2], [3, 4], [5, 6] ], [ [7, 8], [9, 10], [11, 12] ] ]) # 2 x 3 x 2
print(arr3D, arr3D.shape)

#indexing
print(arr3D[0, 1, 1])
print(arr3D[1, 1, 0])

#Slicing
print(arr3D[:, 0, :]) #first row from both layers
print(arr3D[:, :, 0]) #first col from both layers

arr3D[:,0,:] = 99
print(arr3D)


[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]] (2, 3, 2)
4
9
[[1 2]
 [7 8]]
[[ 1  3  5]
 [ 7  9 11]]
[[[99 99]
  [ 3  4]
  [ 5  6]]

 [[99 99]
  [ 9 10]
  [11 12]]]


In [40]:
#vectorization

arr = np.array([1, 2, 3, 4, 5]) 
arr2 = np.array([6, 7, 8, 9, 10]) 

print(arr ** 2)
print(arr + 10)
print(arr + arr2)

[ 1  4  9 16 25]
[11 12 13 14 15]
[ 7  9 11 13 15]


In [46]:
#broadcasting
# General broadcasting rules
# When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing (i.e. rightmost) dimension and works its way left. Two dimensions are compatible when

# they are equal, or

# one of them is 1.

# If these conditions are not met, a ValueError: operands could not be broadcast together exception is thrown, indicating that the arrays have incompatible shapes.

arr = np.array([1, 2, 3, 4, 5]) 
arr2 = np.array([ [6, 7, 8, 9, 10], [11, 12, 13, 14, 15] ]) 

print(arr.shape)  # 1 x 5
print(arr2.shape) # 2 x 5 

print(arr + arr2)

# arr = np.array([ [1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15] ]) 
# arr2 = np.array([ [6, 7, 8, 9, 10], [11, 12, 13, 14, 15] ]) 

# print(arr.shape)  # 3 x 5
# print(arr2.shape) # 2 x 5 
# print(arr + arr2)

# ValueError: operands could not be broadcast together with shapes (3,5) (2,5) 


(5,)
(2, 5)
[[ 7  9 11 13 15]
 [12 14 16 18 20]]


In [53]:
# vector normalization --> normalizing an array means transforming its values so they fit into a specific scale or range

arr = np.array([[1,2],[3,4]])

mean = np.mean(arr)
std_dev = np.std(arr)

normalized_arr = (arr-mean)/std_dev
print(normalized_arr)

print(mean)
print(std_dev)

print(np.sqrt(5/4))

[[-1.34164079 -0.4472136 ]
 [ 0.4472136   1.34164079]]
2.5
1.118033988749895
1.118033988749895


In [64]:
#Mathematical fnx

#aggregation - single output value

arr = np.array([1, 2, 3, 4, 5])

print(np.sum(arr))
print(np.prod(arr))
print(np.min(arr))
print(np.max(arr))
print(np.argmin(arr)) # min val indx
print(np.argmax(arr)) # max val indx

print(np.mean(arr))
print(np.std(arr))
print(np.median(arr))
print(np.var(arr))

15
120
1
5
0
4
3.0
1.4142135623730951
3.0
2.0


In [66]:
#power fnx
arr = np.array([1, 2, 3, 4, 5])

print(np.square(arr))
print(np.sqrt(arr))
print(np.pow(arr, 3))

[ 1  4  9 16 25]
[1.         1.41421356 1.73205081 2.         2.23606798]
[  1   8  27  64 125]


In [70]:
#log
arr = np.array([1, 2, 3, 4, 5])

print(np.log(arr))
print(np.log10(arr))
print(np.log2(arr))
print(np.exp(arr))

[0.         0.69314718 1.09861229 1.38629436 1.60943791]
[0.         0.30103    0.47712125 0.60205999 0.69897   ]
[0.         1.         1.5849625  2.         2.32192809]
[  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591 ]


In [82]:
#rounding

print(np.round(3.94))
print(np.ceil(3.15))
print(np.floor(3.94))
print(np.trunc(3.94)) # Removes decimals

arr = np.array([1, 4, -2, 2, 3, -4, 4, -5])

print(np.unique(arr))
print(np.sort(arr))

print(np.abs(arr)) # changes -ve values to +ve

4.0
4.0
3.0
3.0
[-5 -4 -2  1  2  3  4]
[-5 -4 -2  1  2  3  4  4]
[1 4 2 2 3 4 4 5]
