In [2]:
import numpy as np

# NUMPY

## The NumPy ndarray: A Multidimensional Array Object

In [167]:
np.arange(10) #1D list ranging from 0 to 9

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [182]:
#generating random data in arrays
data = np.random.randn(3,2) #2D array
print("Data:\n", data)
print("\n")
print("Shape:", data.shape) #shape of the array
print("Size:", data.size) #number of elements in array.
print("Data type:", data.dtype)

Data:
 [[ 0.86449148 -1.49420188]
 [ 0.67218771  0.72339072]
 [-0.52500236  0.27771571]]


Shape: (3, 2)
Size: 6
Data type: float64


## Creating ndarrays

In [186]:
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
print(arr1)
print("Dimension:", arr1.ndim)
print("Data type", arr1.dtype)

[6.  7.5 8.  0.  1. ]
Dimension: 1
Data type float64


In [189]:
data2 = [[1,2,3],[4,5,6]]
arr2 = np.array(data2, dtype="int64") #creating an 2D array with data type int64
print(arr2)
print("Dimension:", arr2.ndim)
print("Data type:", arr2.dtype)

[[1 2 3]
 [4 5 6]]
Dimension: 2
Data type: int64


In [207]:
print("Array of zeros:", np.zeros(10)) #1D array
print("\n")
print("Array of ones:\n", np.ones((5,5))) #2D array
print("\n")
print("Uninitialized array of garbage:\n", np.empty((2,3,3))) #3D array
print("\n")
print("Array full of 10's:\n", np.full((3,3),10))
print("\n")
print("5X5 quare identity matrix:\n", np.identity(5)) #eye could also be used intead of identity

Array of zeros: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


Array of ones:
 [[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


Uninitialized array of garbage:
 [[[6.23042070e-307 4.67296746e-307 1.69121096e-306]
  [8.90096090e-307 4.00528573e-307 7.56599128e-307]
  [1.11256817e-306 1.06811422e-306 1.42417221e-306]]

 [[1.11260619e-306 8.90094053e-307 1.86919378e-306]
  [1.06809792e-306 1.37962456e-306 1.69111861e-306]
  [1.78020169e-306 1.37961777e-306 7.56599807e-307]]]


Array full of 10's:
 [[10 10 10]
 [10 10 10]
 [10 10 10]]


5X5 quare identity matrix:
 [[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


## Data types conversion

In [211]:
arr = np.array([2,4,5,6])
print("Original data type of array:", arr.dtype)
arr_float = arr.astype("float64") #convert to float64 data type
print("New data type of array:", arr_float.dtype)

Original data type of array: int32
New data type of array: float64


## Arithmetic with NumPy Arrays

In [214]:
arr = np.array([[1,2,3],[4,5,6]])
print("Element-wise multiplication:\n", arr*arr)
print("\n")
print("Element-wise addition:\n", arr+arr) #equals to arr*2

Element-wise multiplication:
 [[ 1  4  9]
 [16 25 36]]


Element-wise addition:
 [[ 2  4  6]
 [ 8 10 12]]


In [228]:
#array comparisons
arr1 = np.random.randint(1,4, size=4) #'size' is optional
arr2 = np.random.randint(1,4, 4)
print("arr1>arr2:\n", arr1>arr2)
print("\n")
print("arr1<arr2:\n", arr1<arr2)
print("\n")
print("arr1=arr2:\n", arr1==arr2)

arr1>arr2:
 [False  True False False]


arr1<arr2:
 [ True False  True  True]


arr1=arr2:
 [False False False False]


## Basic Indexing and Slicing

In [233]:
arr = np.arange(10)
print("Original array:\n", arr)
arr_slice = arr[5:8]
print("Sliced array:\n", arr_slice)
arr_slice[1] = 1234
print("Original array:\n", arr)

Original array:
 [0 1 2 3 4 5 6 7 8 9]
Sliced array:
 [5 6 7]
Original array:
 [   0    1    2    3    4    5 1234    7    8    9]


In [237]:
arr = np.random.randint(0,10, (4,4))
print("Original array:\n", arr)
print("\n")
print("Slice:\n", arr[1:3,1:3])
print("\n")
print("Slice:\n", arr[-3:-1,1:3])
print("\n")

Original array:
 [[3 0 6 3]
 [6 9 8 6]
 [2 5 2 4]
 [8 7 8 5]]


Slice:
 [[9 8]
 [5 2]]


Slice:
 [[9 8]
 [5 2]]




In [243]:
#slice assignment
arr[::2, 2:4] = -99
arr

array([[  3,   0, -99, -99],
       [  6,   9,   8,   6],
       [  2,   5, -99, -99],
       [  8,   7,   8,   5]])

## Boolean Indexing


In [254]:
names = np.array(["Yaw","Joe","Kusi", "Tumi","Yaw","Momo"])
print("Names:\n", names)
print("\n")
data = np.random.randn(6,4)
print("Data:\n", data)
print("\n")
print("Sliced data:\n", data[names=="Yaw"])
print("\n")
print("Sliced data:\n", data[names=="Yaw",2:])
#slicing everything but Yaw
print("\n")
print("Sliced data:\n", data[~(names=="Yaw")]) #data[names!="Yaw"] could also be used

Names:
 ['Yaw' 'Joe' 'Kusi' 'Tumi' 'Yaw' 'Momo']


Data:
 [[ 1.56218761 -0.75611162 -1.51774368 -1.77611609]
 [ 1.01209129 -0.26265479 -0.35389469 -0.67139901]
 [ 0.17433823 -1.85119515 -0.31622463 -0.7844617 ]
 [ 1.92182104  1.50254165 -1.13000455  0.50497028]
 [-1.34861423  1.91436841  1.16346897  0.38390086]
 [ 0.43530183 -0.51723945  1.20291674  0.45423521]]


Sliced data:
 [[ 1.56218761 -0.75611162 -1.51774368 -1.77611609]
 [-1.34861423  1.91436841  1.16346897  0.38390086]]


Sliced data:
 [[-1.51774368 -1.77611609]
 [ 1.16346897  0.38390086]]


Sliced data:
 [[ 1.01209129 -0.26265479 -0.35389469 -0.67139901]
 [ 0.17433823 -1.85119515 -0.31622463 -0.7844617 ]
 [ 1.92182104  1.50254165 -1.13000455  0.50497028]
 [ 0.43530183 -0.51723945  1.20291674  0.45423521]]


In [258]:
#multiple boolean indexing with |(or) and &(and)
mask = (names=="Yaw") | (names=="Momo")
print("Mask:\n", mask)
print("\n")
print("Sliced data:\n", data[mask])

Mask:
 [ True False False False  True  True]


Sliced data:
 [[ 1.56218761 -0.75611162 -1.51774368 -1.77611609]
 [-1.34861423  1.91436841  1.16346897  0.38390086]
 [ 0.43530183 -0.51723945  1.20291674  0.45423521]]


In [260]:
#setting data values
data[data<0] = 0
print("New data:\n", data)
print("\n")
data[names !="Yaw"] = 7
print("New data:\n",data)

New data:
 [[1.56218761 0.         0.         0.        ]
 [1.01209129 0.         0.         0.        ]
 [0.17433823 0.         0.         0.        ]
 [1.92182104 1.50254165 0.         0.50497028]
 [0.         1.91436841 1.16346897 0.38390086]
 [0.43530183 0.         1.20291674 0.45423521]]


New data:
 [[1.56218761 0.         0.         0.        ]
 [7.         7.         7.         7.        ]
 [7.         7.         7.         7.        ]
 [7.         7.         7.         7.        ]
 [0.         1.91436841 1.16346897 0.38390086]
 [7.         7.         7.         7.        ]]


## Fancy indexing

In [267]:
arr = np.empty((8,4))
for i in range(len(arr)):
    arr[i] = i
print("Array:\n", arr)
print("\n")
print("Subset of array:\n", arr[[4,3,6]]) #positive indices
print("\n")
print("Subset of array:\n", arr[[-4,-3,-6]]) #negative indices
print("\n")
print("Subset of arry:\n", arr[[1,3,5],[0,1,2]]) #passing multiple index array

Array:
 [[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]]


Subset of array:
 [[4. 4. 4. 4.]
 [3. 3. 3. 3.]
 [6. 6. 6. 6.]]


Subset of array:
 [[4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [2. 2. 2. 2.]]


Subset of arry:
 [1. 3. 5.]


## Transposing Arrays and Swapping Axes

In [6]:
arr = np.arange(16).reshape(4,4)
print("Array:\n", arr)
print("\n")
print("Transpose array:\n", arr.T)

Array:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


Transpose array:
 [[ 0  4  8 12]
 [ 1  5  9 13]
 [ 2  6 10 14]
 [ 3  7 11 15]]


## Universal Functions: Fast Element-Wise Array Functions

In [8]:
arr = np.arange(10)
print(np.sqrt(arr)) #square root of array
print(np.exp(arr)) #exponential of array

[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]
[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]


In [10]:
x = np.random.randn(10)
y = np.random.randn(10)
#new array that has element-wise maximum of x and y
z = np.maximum(x,y)
print(z)

[ 0.66639624 -0.3971186   0.11542991 -0.00297773 -0.18307832  0.65965097
  0.57371737  0.31668636  0.82031099 -0.37127821]


In [11]:
#getting remainder and whole parts of elements in array
arr = np.random.randn(10)*5
print("Array:\n", arr)
print("\n")
remainder, whole = np.modf(arr)
print("Remainder:\n", remainder)
print("\n")
print("Whole:\n", whole)

Array:
 [ 2.07758793 -7.93682308 -0.93343045 -7.55386033  6.42228317  3.39034702
 -4.70031783  3.62168415  2.57070038  1.99983543]


Remainder:
 [ 0.07758793 -0.93682308 -0.93343045 -0.55386033  0.42228317  0.39034702
 -0.70031783  0.62168415  0.57070038  0.99983543]


Whole:
 [ 2. -7. -0. -7.  6.  3. -4.  3.  2.  1.]


In [34]:
x = np.arange(-5,5)
y = np.sqrt(x)
#checks if value in array is datatype nan
np.isnan(y)

  y = np.sqrt(x)


array([ True,  True,  True,  True,  True, False, False, False, False,
       False])

In [38]:
np.greater(x,y) #checks if x is greater than y
np.less_equal(x,y) #checks if x is less than or equal to y

array([False, False, False, False, False,  True,  True, False, False,
       False])

## Array-Oriented Programming with Arrays

In [52]:
#Finding magnitude of two vectors
x1 = np.array([0,6])
x2 = np.array([8,0])
x3 = x1 - x2
print("Magnitude:\n", np.sqrt(x3.dot(x3)))

Magnitude:
 10.0


In [56]:
points = np.arange(-5,5,0.01) #1000 equally spaced points
xs, ys = np.meshgrid(points, points)
print("xs:\n", xs)
print("\n")
print("ys:\n", ys)

xs:
 [[-5.   -4.99 -4.98 ...  4.97  4.98  4.99]
 [-5.   -4.99 -4.98 ...  4.97  4.98  4.99]
 [-5.   -4.99 -4.98 ...  4.97  4.98  4.99]
 ...
 [-5.   -4.99 -4.98 ...  4.97  4.98  4.99]
 [-5.   -4.99 -4.98 ...  4.97  4.98  4.99]
 [-5.   -4.99 -4.98 ...  4.97  4.98  4.99]]


ys:
 [[-5.   -5.   -5.   ... -5.   -5.   -5.  ]
 [-4.99 -4.99 -4.99 ... -4.99 -4.99 -4.99]
 [-4.98 -4.98 -4.98 ... -4.98 -4.98 -4.98]
 ...
 [ 4.97  4.97  4.97 ...  4.97  4.97  4.97]
 [ 4.98  4.98  4.98 ...  4.98  4.98  4.98]
 [ 4.99  4.99  4.99 ...  4.99  4.99  4.99]]


## Expressing Conditional Logic as Array Operations

In [59]:
arr = np.random.randn(10)*5
print("Array:\n", arr)
print("\n")
print("New array:\n", np.where(arr<0, 0, arr))

Array:
 [  1.08953995  -0.48935895  -4.14886057   2.32642039  -0.96205388
   0.59733507  -0.95487062 -11.18808335   1.65066189   2.11926738]


New array:
 [1.08953995 0.         0.         2.32642039 0.         0.59733507
 0.         0.         1.65066189 2.11926738]


arr1 = np.random.randn(10)*2
arr2 = np.random.randn(10)*2
print("Array 1:\n", arr1)
print("Array 2:\n", arr2)
print("New array:\n", np.where(arr1>arr2, arr1, arr2)) #this is the same as the np.maximum(arr1,arr2)

## Mathematical and Statistical Methods


In [72]:
arr = np.random.randn(10)*2
print("Mean:", arr.mean())
print("Median:", np.median(arr))
print("Sum:", np.sum(arr))

Mean: -0.3689152559076695
Median: -0.07220079767485349
Sum: -3.6891525590766947


In [87]:
arr_2D = np.random.randn(3,3)
print("Mean of columns only:", np.mean(arr_2D, axis=0))
print("Mean of rows only:", arr_2D.mean(axis=1))

Mean of columns only: [-0.33087634 -0.34802801  0.20394813]
Mean of rows only: [-0.26323663  0.55804957 -0.76976916]


In [91]:
arr = np.arange(5) #1D array
print("Array:\n", arr)
print("Cumulative sum:", np.cumsum(arr))
print("\n")
arr = np.arange(9).reshape(3,3) #2D array
print("Array:\n", arr)
print("\n")
print("Cumulative sum on each colum:\n", np.cumsum(arr, axis=0))
print("\n")
print("Cumulative sum on each row:\n", np.cumsum(arr, axis=1))

Array:
 [0 1 2 3 4]
Cumulative sum: [ 0  1  3  6 10]


Array:
 [[0 1 2]
 [3 4 5]
 [6 7 8]]


Cumulative sum on each colum:
 [[ 0  1  2]
 [ 3  5  7]
 [ 9 12 15]]


Cumulative sum on each row:
 [[ 0  1  3]
 [ 3  7 12]
 [ 6 13 21]]


## Methods for Boolean Arrays

In [100]:
arr = np.random.randn(10)
(arr>0).sum() #number of positive values

4

In [105]:
boo = np.array([True, False, True, True, False])
boo.all() #checks if all values are true
boo.any() #checks if any value is true

True

## Sorting