# Numpy (Numerical Python)
- ndarray, numpy data array, saves data and address continuously, making the computing faster.
- versatile N-dimensional array object for storing data
- vectorization enables a degree of computational efficiency (parallel computing), using compiled C code instead of Python
- makes easier for linear algebra and fourier transform

In [4]:
import sys
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

print(f'python ver.\t: {sys.version}\n\n'
      f'numpy ver.\t: {np.__version__}\n\t\t: {np.__file__}\n\n'
      f'pandas ver.\t: {pd.__version__}\n\t\t: {pd.__file__}\n\n'
      f'matplotlib ver.\t: {matplotlib.__version__}\n\t\t: {matplotlib.__file__}')


python ver.	: 3.11.9 (main, Apr 19 2024, 11:43:47) [Clang 14.0.6 ]

numpy ver.	: 1.24.3
		: /Users/User/anaconda3/lib/python3.11/site-packages/numpy/__init__.py

pandas ver.	: 1.5.3
		: /Users/User/anaconda3/lib/python3.11/site-packages/pandas/__init__.py

matplotlib ver.	: 3.7.1
		: /Users/User/anaconda3/lib/python3.11/site-packages/matplotlib/__init__.py


## Speed: list vs. ndarray

In [6]:
import numpy as np
my_list = list(range(1000000))
my_arr = np.arange(1000000)

# for i in my_list[:10]:
#     print(i)

%timeit -n 10 my_list2 = [x * 2 for x in my_list]
%timeit -n 10 my_arr2 = my_arr * 2

36.2 ms ± 2.68 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
994 µs ± 170 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Multidimensional Array Object

In [8]:
# Generate some random data
# data = np.random.randn(6)
data = np.random.randn(2,3)
print(f'data = \n{data}\n\n'
      f'type(data) : {type(data)}\n\n'
      f'data * 10 =\n{data*10}\n\n'
      f'data + data =\n {data+data}')

data = 
[[-0.35700492  1.02647674  1.22130145]
 [-0.43063031 -0.09213323 -0.07932218]]

type(data) : <class 'numpy.ndarray'>

data * 10 =
[[-3.57004921 10.26476739 12.21301454]
 [-4.30630311 -0.92133235 -0.79322175]]

data + data =
 [[-0.71400984  2.05295348  2.44260291]
 [-0.86126062 -0.18426647 -0.15864435]]


In [22]:
a = np.array([1,2,3,4])
print(f'a: {a}')
print(f'a.shape: {a.shape}') # (4,) != (4,1)'
print(f'a.reshape(4,1):\n {a.reshape(4,1)}')
print(f'a.reshape(4,1).shape: {a.reshape(4,1).shape}')

a: [1 2 3 4]
a.shape: (4,)
a.reshape(4,1):
 [[1]
 [2]
 [3]
 [4]]
a.reshape(4,1).shape: (4, 1)


## Numpy Array basic attributes
- ndarray.shape: returns a tuple representing the shape of the array (row, column ......)
- ndarray.ndim: returns the number of dimensions (axes) of the array
- ndarray.size: returns the total number of elements in the array, which is the product of 'shape' tuple
- ndarray/itemsize: returns the size in bytes of each element in the array
- ndarray.dytpe: returns the data type of the elements in the array

In [23]:
a = np.array([1,2,3,4,5], dtype=float) # int to float
a, a.dtype # default to print if not specified

(array([1., 2., 3., 4., 5.]), dtype('float64'))

## create ndarray
- np.arange(start,stop,step,dtype)

In [25]:
arr = np.array([1,2,3,4,5,6,7,8,9,10])
arr1 = np.arange(1,11, dtype=float)

print(f'arr\t\t\t = {arr}\n'
      f'type(arr)\t\t= {type(arr)})\n'
      f'arr.dtype\t\t= {arr.dtype}\n'
      f'arr.itemsize\t\t = {arr.itemsize}\n'
      f'arr.nbytes\t\t= {arr.nbytes}\n')

print(f'arr1\t\t\t = {arr1}\n'
      f'type(arr1)\t\t= {type(arr1)})\n'
      f'arr1.dtype\t\t= {arr1.dtype}\n'
      f'arr1.itemsize\t\t = {arr1.itemsize}\n'
      f'arr1.nbytes\t\t= {arr1.nbytes}\n')

arr			 = [ 1  2  3  4  5  6  7  8  9 10]
type(arr)		= <class 'numpy.ndarray'>)
arr.dtype		= int64
arr.itemsize		 = 8
arr.nbytes		= 80

arr1			 = [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
type(arr1)		= <class 'numpy.ndarray'>)
arr1.dtype		= float64
arr1.itemsize		 = 8
arr1.nbytes		= 80



## np.zeros, ones, full, eye

In [37]:
print("zeros: ")
print(np.zeros((3,2)))
print("ones: ")
print(np.ones((3,2)))
print("full: ")
print(np.full((3,2),7))
# eye k=0, k=-1, k=......
print("eye: ")
print(np.eye(3)) # creates a 2D array with ones onthe diagonal and zeros eleswhere
print("eye, k=-1: ")
print(np.eye(3, k=-1))
print("diag: ")
print(np.diag((1,2,3,4,5)))

zeros: 
[[0. 0.]
 [0. 0.]
 [0. 0.]]
ones: 
[[1. 1.]
 [1. 1.]
 [1. 1.]]
full: 
[[7 7]
 [7 7]
 [7 7]]
eye: 
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
eye, k=-1: 
[[0. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]]
diag: 
[[1 0 0 0 0]
 [0 2 0 0 0]
 [0 0 3 0 0]
 [0 0 0 4 0]
 [0 0 0 0 5]]


## astype(): change datatype

In [41]:
arr32 = np.array([1,2,3,4,5])
print(f'arr32{arr32}\t{arr32.dtype}')

arr64 = arr32.astype(np.float64)
print(f'arr62{arr64}\t{arr64.dtype}')

arr32[1 2 3 4 5]	int64
arr62[1. 2. 3. 4. 5.]	float64


## leverage from another array

In [43]:
int_array = np.arange(10)
calibers = np.array([.22, .270, .380, .44, .50], dtype=np.float64)
int_array, calibers.dtype, int_array.astype(calibers.dtype) # leverage calibers data type

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 dtype('float64'),
 array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]))

## Arithmetic Operation

In [44]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(f'arr =\n{arr}\n\n'
      f'arr * arr =\n {arr*arr}\n\n'
      f'arr - arr =\n {arr-arr}\n\n'
      f'1 / arr:\n{1/arr}\n\n'
      f'arr ** 0.5 =\n{arr**0.5}')

arr =
[[1 2 3]
 [4 5 6]]

arr * arr =
 [[ 1  4  9]
 [16 25 36]]

arr - arr =
 [[0 0 0]
 [0 0 0]]

1 / arr:
[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]

arr ** 0.5 =
[[1.         1.41421356 1.73205081]
 [2.         2.23606798 2.44948974]]


## Comparsion

In [4]:
import numpy as np
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
print(f'arr =\n{arr}\n\narr2 = \n{arr2}\n')
arr > arr, arr2 ==arr, arr2 < arr

arr =
[[1. 2. 3.]
 [4. 5. 6.]]

arr2 = 
[[ 0.  4.  1.]
 [ 7.  2. 12.]]



(array([[False, False, False],
        [False, False, False]]),
 array([[False, False, False],
        [False, False, False]]),
 array([[ True, False,  True],
        [False,  True, False]]))

In [5]:
# np.random.seed(20) # np.random.RandomState(42)
ar = np.random.randint(0, 25, 10) # 10 random numbers between 0 and 25
mask = ar%2

print(f'ar\t\t: {ar}\n'
      f'ar%2==0\t\t: {ar%2==0}\n'
      f'mask\t\t: {mask}\n'
      f'ar[mask]\t: {ar[mask]}\n' # mask as index: bask and forth in 0,1,0,1
      f'ar[ar%2]\t: {ar[ar%2]}')

ar		: [ 7 10  5 12 18 23  5  2  5 19]
ar%2==0		: [False  True False  True  True False False  True False False]
mask		: [1 0 1 0 0 1 1 0 1 1]
ar[mask]	: [10  7 10  7  7 10 10  7 10 10]
ar[ar%2]	: [10  7 10  7  7 10 10  7 10 10]


## Logical Operator

In [4]:
import numpy as np
l1 = np.array([True, False, True, False])
l2 = np.array([False, False, True, False])

In [11]:
np.logical_and(l1,l2), l1*l2

(array([False, False,  True, False]), array([False, False,  True, False]))

In [7]:
np.logical_or(l1,l2),l1+l2

(array([ True, False,  True, False]), array([ True, False,  True, False]))

In [8]:
np.logical_xor(l1,l2)

array([ True, False, False, False])

In [9]:
np.logical_not(l2)

array([ True,  True, False,  True])

## To eliminate missing values

In [12]:
ar = np.array(['Hungary', 'Nigeria', 'Guatemuala', '','Poland','','Japan'])
ar

array(['Hungary', 'Nigeria', 'Guatemuala', '', 'Poland', '', 'Japan'],
      dtype='<U10')

In [13]:
ar[ar=='']='USA'
ar

array(['Hungary', 'Nigeria', 'Guatemuala', 'USA', 'Poland', 'USA',
       'Japan'], dtype='<U10')

## Arrays of integers can be used to index an array to produce another array

In [17]:
ar = 11*np.arange(0,10)
print(f'{ar}\n'
      f'{ar[[1, 3, 4, 2, 7]]}') # means fancy index

[ 0 11 22 33 44 55 66 77 88 99]
[11 33 44 22 77]


In [18]:
# Assignment is also possibly with array indexing
ar[[1,3]] = 50
ar

array([ 0, 50, 22, 50, 44, 55, 66, 77, 88, 99])

## Indexing & Slicing (Start, Stop, Step)

In [26]:
import numpy as np
a = np.array([1,2,3,4,5]) # a = np.arange(1,6)
a

array([1, 2, 3, 4, 5])

In [24]:
print(a[1])
print(a[2:4])
print(a[-2:])
print(a[::2])
print(a[[1,3,4]]) # fancy index, which prints out the specific index

2
[3 4]
[4 5]
[1 3 5]
[2 4 5]


## index : [row, column]
## fancy index: [[3,4,1], [2,1]
## slice: [row: row, column, column, ....]

In [29]:
arr = np.arange(10)
print(arr)
print(arr[5])
print(arr[5:8])
print(arr[5:8:2])

arr[5:8] = 12
print(arr)

[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]
[5 7]
[ 0  1  2  3  4 12 12 12  8  9]


## Copy & Deep Copy
- slice point back to original array

In [30]:
arr_slice = arr[5:8]
arr, arr_slice

(array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9]), array([12, 12, 12]))

In [31]:
# point to original arr_slice. Leverage back to original
arr_slice[1] = 12345
arr, arr_slice

(array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
            9]),
 array([   12, 12345,    12]))

In [32]:
a = np.arange(4)
b = c = a; d = b

print(f'a = {a}\n'
      f'b = {b}\n'
      f'c = {c}\n'
      f'd = {d}\n')

d[1:3] = [22, 33]
print(f'a = {a}\n'
      f'b = {b}\n'
      f'c = {c}\n'
      f'd = {d}\n')

a = [0 1 2 3]
b = [0 1 2 3]
c = [0 1 2 3]
d = [0 1 2 3]

a = [ 0 22 33  3]
b = [ 0 22 33  3]
c = [ 0 22 33  3]
d = [ 0 22 33  3]



## Deep Copy
- .copy()

In [33]:
b = a.copy()
b[1] = 100
b, a

(array([  0, 100,  33,   3]), array([ 0, 22, 33,  3]))

## Reshape & Flatten
- ndarray.reshape(shape, order)

In [34]:
import numpy as np
a = np.array([1,2,3,4,5,6,7,8,9,10,11,12])
b = np.reshape(a, (2,-1))
c = np.reshape(a,(2,2,-1))
d = np.reshape(a, (2,3,-1))
print(f'a = \n{a}shape: {a.shape}\n\n'
      f'a = \n{b}shape: {b.shape}\n\n'
      f'a = \n{c}shape: {c.shape}\n\n'
      f'a = \n{d}shape: {d.shape}\n\n')

a = 
[ 1  2  3  4  5  6  7  8  9 10 11 12]shape: (12,)

a = 
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]shape: (2, 6)

a = 
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]shape: (2, 2, 3)

a = 
[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]]shape: (2, 3, 2)




## Flatten: Numpy flatten creates a 1D array from a multi-dimensional array
- remain the original array the same, like a = b.copy()

In [35]:
arr = np.arange(15).reshape((5,3))
a = arr.flatten()
a[0] = 10
a, arr

(array([10,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]),
 array([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11],
        [12, 13, 14]]))

## Ravel: Point to the original array

In [36]:
arr = np.arange(15).reshape((5,3))
a = arr.ravel()
a[0] = 10
a, arr

(array([10,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]),
 array([[10,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11],
        [12, 13, 14]]))

## Indexing with slices

In [37]:
arr2d = np.array([[1,2,3], [4,5,6,], [7,8,9]])
arr2d, arr2d[:2] # row only

(array([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]),
 array([[1, 2, 3],
        [4, 5, 6]]))

In [38]:
arr2d[:2, 1:] # row and column

array([[2, 3],
       [5, 6]])

## Boolean Indexing

In [41]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
names, data

(array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4'),
 array([[ 0.90018319,  1.14245637,  0.43330602, -0.04470062],
        [-0.80153321, -0.57649132, -1.37496885, -2.04898672],
        [ 0.71873856,  1.96076091, -1.82102094,  0.59354976],
        [-0.57997128, -0.10988428,  0.66701748, -0.42241233],
        [-0.25707614,  0.52670165,  1.16292375,  0.45790536],
        [-1.15335259, -0.45453891,  0.31761188, -0.12324829],
        [-2.30168019, -0.4807103 , -0.60768044,  1.30164605]]))

In [43]:
names == 'Bob', data

(array([ True, False, False,  True, False, False, False]),
 array([[ 0.90018319,  1.14245637,  0.43330602, -0.04470062],
        [-0.80153321, -0.57649132, -1.37496885, -2.04898672],
        [ 0.71873856,  1.96076091, -1.82102094,  0.59354976],
        [-0.57997128, -0.10988428,  0.66701748, -0.42241233],
        [-0.25707614,  0.52670165,  1.16292375,  0.45790536],
        [-1.15335259, -0.45453891,  0.31761188, -0.12324829],
        [-2.30168019, -0.4807103 , -0.60768044,  1.30164605]]))

In [44]:
data[names == 'Bob'] # Print out row 0 and 3

array([[ 0.90018319,  1.14245637,  0.43330602, -0.04470062],
       [-0.57997128, -0.10988428,  0.66701748, -0.42241233]])

In [45]:
data[names == 'Bob', 2:], data[names == 'Bob', 3]

(array([[ 0.43330602, -0.04470062],
        [ 0.66701748, -0.42241233]]),
 array([-0.04470062, -0.42241233]))

In [46]:
names != 'Bob', data[(names != 'Bob')]

(array([False,  True,  True, False,  True,  True,  True]),
 array([[-0.80153321, -0.57649132, -1.37496885, -2.04898672],
        [ 0.71873856,  1.96076091, -1.82102094,  0.59354976],
        [-0.25707614,  0.52670165,  1.16292375,  0.45790536],
        [-1.15335259, -0.45453891,  0.31761188, -0.12324829],
        [-2.30168019, -0.4807103 , -0.60768044,  1.30164605]]))

In [49]:
mask = (names =='Bob') | (names == 'Will')
mask, data[mask]

(array([ True, False,  True,  True,  True, False, False]),
 array([[ 0.90018319,  1.14245637,  0.43330602, -0.04470062],
        [ 0.71873856,  1.96076091, -1.82102094,  0.59354976],
        [-0.57997128, -0.10988428,  0.66701748, -0.42241233],
        [-0.25707614,  0.52670165,  1.16292375,  0.45790536]]))

In [50]:
data[data < 0] = 0
data

array([[0.90018319, 1.14245637, 0.43330602, 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.71873856, 1.96076091, 0.        , 0.59354976],
       [0.        , 0.        , 0.66701748, 0.        ],
       [0.        , 0.52670165, 1.16292375, 0.45790536],
       [0.        , 0.        , 0.31761188, 0.        ],
       [0.        , 0.        , 0.        , 1.30164605]])

In [51]:
data[names != 'Joe'] = 7
names != 'Joe', data

(array([ True, False,  True,  True,  True, False, False]),
 array([[7.        , 7.        , 7.        , 7.        ],
        [0.        , 0.        , 0.        , 0.        ],
        [7.        , 7.        , 7.        , 7.        ],
        [7.        , 7.        , 7.        , 7.        ],
        [7.        , 7.        , 7.        , 7.        ],
        [0.        , 0.        , 0.31761188, 0.        ],
        [0.        , 0.        , 0.        , 1.30164605]]))

## Fancy Index

In [52]:
arr = np.zeros((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [53]:
arr[[4,3,0,6]] # arr row index[4,3,0,6]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [54]:
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [56]:
arr = np.arange(32).reshape((8,4))
arr, arr[[1,5,7,2], [0,3,1,2]] # (1,0)(5,3)(7,1)(2,2) index

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23],
        [24, 25, 26, 27],
        [28, 29, 30, 31]]),
 array([ 4, 23, 29, 10]))

## Transposing Arrays and Swapping Axes

In [60]:
arr = np.arange(15).reshape((3,5))
arr, arr.T, np.transpose(arr)

(array([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]]),
 array([[ 0,  5, 10],
        [ 1,  6, 11],
        [ 2,  7, 12],
        [ 3,  8, 13],
        [ 4,  9, 14]]),
 array([[ 0,  5, 10],
        [ 1,  6, 11],
        [ 2,  7, 12],
        [ 3,  8, 13],
        [ 4,  9, 14]]))

## Universal Functions
- ufunc: universal function, which is a function that can, in some defined way, imitate all other funcitons
- e.g. sqrt(), sin(), cos(), dot(), log(), exp()

## Fast Element-wise Array Functions
sqrt(), exp()

In [63]:
import numpy as np
arr = np.arange(9).reshape(3,3)
print(f'arr = \n{arr}\n\n'
      f'np.sqrt(arr) =\n{np.sqrt(arr)}\n\n'
      f'np.sqrt(arr.round(2) =\n{np.sqrt(arr).round(2)}\n\n' # round to two decimal places
      f'np.exp(arr) =\n{np.exp(arr)}')

arr = 
[[0 1 2]
 [3 4 5]
 [6 7 8]]

np.sqrt(arr) =
[[0.         1.         1.41421356]
 [1.73205081 2.         2.23606798]
 [2.44948974 2.64575131 2.82842712]]

np.sqrt(arr.round(2) =
[[0.   1.   1.41]
 [1.73 2.   2.24]
 [2.45 2.65 2.83]]

np.exp(arr) =
[[1.00000000e+00 2.71828183e+00 7.38905610e+00]
 [2.00855369e+01 5.45981500e+01 1.48413159e+02]
 [4.03428793e+02 1.09663316e+03 2.98095799e+03]]


sin(), log()

In [64]:
arr = np.arange(1, 11)
arr, np.sin(arr, np.log(arr)), np.log10(arr)

(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),
 array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427,
        -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111]),
 array([0.        , 0.30103   , 0.47712125, 0.60205999, 0.69897   ,
        0.77815125, 0.84509804, 0.90308999, 0.95424251, 1.        ]))

maximum(), abs()

In [65]:
x = np.random.randn(8) # find a random number between -3 and 3
y = np.random.randn(8)
print(f'x =\n{x}\n\n'
      f'y =\n{y}\n\n'
      f'np.maximum(x, y) =\n{np.maximum(x,y)}\n\n'
      f'np.minimum(x, y) =\n{np.minimum(x,y)}\n\n'
      f'np.abs(x) =\n{np.abs(x)}')

x =
[ 1.14977307 -1.13410754 -2.59658364  1.55223913 -0.49938708 -0.53196456
 -0.91091081  0.93749998]

y =
[-0.36772485  0.05125011  1.16459596 -1.02850547 -1.41541021  0.93166874
 -2.07822247  1.30629468]

np.maximum(x, y) =
[ 1.14977307  0.05125011  1.16459596  1.55223913 -0.49938708  0.93166874
 -0.91091081  1.30629468]

np.minimum(x, y) =
[-0.36772485 -1.13410754 -2.59658364 -1.02850547 -1.41541021 -0.53196456
 -2.07822247  0.93749998]

np.abs(x) =
[1.14977307 1.13410754 2.59658364 1.55223913 0.49938708 0.53196456
 0.91091081 0.93749998]


## Expressing Conditional logic as Array Operations

In [66]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
cond

array([ True, False,  True,  True, False])

## np.where(bool, True, False)
- if true, take the second argument; otherwise, take the third one

In [70]:
result = np.where(cond, xarr, yarr) # cond true(xarr), false(yarr)
result

array([1.1, 2.2, 1.3, 1.4, 2.5])

In [71]:
arr = np.random.randn(4,4)
arr, np.where(arr > 0, 2, -2)

(array([[ 1.22360293, -0.94361975, -0.88343594, -0.15671727],
        [ 0.82071396,  1.36347822, -0.66973938,  1.41633865],
        [-0.73519396,  0.36314334,  0.77968333,  0.45439511],
        [-0.68925173,  0.02032924,  1.50832316, -0.09759425]]),
 array([[ 2, -2, -2, -2],
        [ 2,  2, -2,  2],
        [-2,  2,  2,  2],
        [-2,  2,  2, -2]]))

In [73]:
np.where(arr > 0, 2, arr) # set only positive values to 2

array([[ 2.        , -0.94361975, -0.88343594, -0.15671727],
       [ 2.        ,  2.        , -0.66973938,  2.        ],
       [-0.73519396,  2.        ,  2.        ,  2.        ],
       [-0.68925173,  2.        ,  2.        , -0.09759425]])

## Stack & Split

In [78]:
a = np.array([1, 2 ,3])
b = np.array([4, 5, 6])

print(f'a: {a}\nb: {b}\n')

print(np.stack((a,b), axis = 0), '\nshape\t:', np.stack((a,b), axis = 0), '\n')
print(np.stack((a,b), axis = 1), '\nshape\t:', np.stack((a,b), axis = 1).shape)

a: [1 2 3]
b: [4 5 6]

[[1 2 3]
 [4 5 6]] 
shape	: [[1 2 3]
 [4 5 6]] 

[[1 4]
 [2 5]
 [3 6]] 
shape	: (3, 2)


In [79]:
np.vstack((a,b)) # vertical stack # same as np.stack((a,b), axis = 1)

array([[1, 2, 3],
       [4, 5, 6]])

In [80]:
np.hstack((a,b)) # horizontal stack

array([1, 2, 3, 4, 5, 6])

In [83]:
np.concatenate([a,b], axis = 0) 

array([1, 2, 3, 4, 5, 6])

In [85]:
arr = np.random.randn(5,2)
print(arr)
first, second, third = np.split(arr, [1,3])
first, second, third

[[ 1.82482397 -0.66491538]
 [-0.9228546   1.04377606]
 [ 0.93386315  1.04189794]
 [-0.37375057  0.54277826]
 [-0.02479118  0.77547127]]


(array([[ 1.82482397, -0.66491538]]),
 array([[-0.9228546 ,  1.04377606],
        [ 0.93386315,  1.04189794]]),
 array([[-0.37375057,  0.54277826],
        [-0.02479118,  0.77547127]]))

## Broadcasting
When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing (i.e. rightmost) dimension and works its way left. Two dimensions are compatible when

- they are equal, or
- one of them is 1.

In [87]:
arr = np.arange(5)
arr, arr*4

(array([0, 1, 2, 3, 4]), array([ 0,  4,  8, 12, 16]))

In [90]:
arr = np.random.randn(4, 3)
print(arr, '\nmean: ', arr.mean(0), '\n') # 0: row

demeaned = arr- arr.mean(0)
print(demeaned, '\ndemeaned: ', demeaned.mean(0), '\n') # almost 0

[[ 2.20176925  0.14535325 -0.88906144]
 [-0.14222404  0.37121742 -0.50355675]
 [ 1.06574787 -0.53056219  0.66630105]
 [ 0.49481133 -1.3895812  -0.28033782]] 
mean:  [ 0.9050261  -0.35089318 -0.25166374] 

[[ 1.29674315  0.49624643 -0.6373977 ]
 [-1.04725014  0.7221106  -0.25189301]
 [ 0.16072176 -0.17966901  0.91796479]
 [-0.41021478 -1.03868802 -0.02867408]] 
demeaned:  [ 1.38777878e-17 -5.55111512e-17  0.00000000e+00] 



In [91]:
row_means = arr.mean(1)

print(f'arr :\n{arr}\n\n'
      f'row_means(1)\t\t: {row_means}\n'
      f'row_means.shape\t: {row_means.shape}\n')

arr :
[[ 2.20176925  0.14535325 -0.88906144]
 [-0.14222404  0.37121742 -0.50355675]
 [ 1.06574787 -0.53056219  0.66630105]
 [ 0.49481133 -1.3895812  -0.28033782]]

row_means(1)		: [ 0.48602036 -0.09152112  0.40049557 -0.39170256]
row_means.shape	: (4,)

