# Creating NumPy arrays

In [2]:
import numpy as np

In [2]:
data1 = [6, 7.5, 8, 0, 1]
# convert a list to a numpy array
arr1 = np.array(data1, float)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [3]:
print(type(arr1))
print(arr1)

<class 'numpy.ndarray'>
[6.  7.5 8.  0.  1. ]


In [5]:
print(arr1.dtype)
# dtype returns the data type of the array

print(arr1.shape)
# note the shape is a tuple of single element.
# you may think of it as a 5x1 matrix

print()
print(np.info(arr1))
# np.info() gives you a lot of information about the array

float64
(5,)

class:  ndarray
shape:  (5,)
strides:  (8,)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  True
data pointer: 0x1a8e961bc60
byteorder:  little
byteswap:  False
type: float64
None


In [7]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
# convert a list of lists to a numpy array
# the result is a 2D array
arr2 = np.array(data2)
print(arr2)

[[1 2 3 4]
 [5 6 7 8]]


In [8]:
print(arr2.dtype)
print(arr2.size)
print(arr2.ndim)
print(arr2.shape)

int64
8
2
(2, 4)


## Special Arrays

In [9]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
np.zeros((3,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [11]:
np.empty((2,3,2))

array([[[9.01698631e-312, 3.16202013e-322],
        [0.00000000e+000, 0.00000000e+000],
        [1.11261502e-306, 5.73535946e+174]],

       [[1.86386121e+160, 1.67501716e-076],
        [1.28880032e+165, 2.77197945e-057],
        [2.09204411e-076, 1.39290975e+165]]])

## Arrays of Fixed Intervals

In [3]:
print(np.arange(8))
print(np.arange(0, 8, 2))
# similar to the range() function

[0 1 2 3 4 5 6 7]
[0 2 4 6]


In [4]:
print(np.linspace(0, 10, num = 5, endpoint = True, dtype = float))
print(np.linspace(0, 10, num = 5, endpoint = False, dtype = int))
# np.linspace() returns evenly spaced numbers over a specified interval


[ 0.   2.5  5.   7.5 10. ]
[0 2 4 6 8]


## Arrays of Random Numbers

In [5]:
np.random.rand(5)

array([0.72400938, 0.6967517 , 0.71837296, 0.44982913, 0.7013219 ])

In [6]:
np.random.rand(4, 2)

array([[0.99437735, 0.8712144 ],
       [0.44205426, 0.88216198],
       [0.87649054, 0.34599479],
       [0.91366175, 0.01188964]])

In [7]:
for _ in range(5):
    np.random.seed(100) # set seed so that the random number is always the same
    print(np.random.rand(1))

[0.54340494]
[0.54340494]
[0.54340494]
[0.54340494]
[0.54340494]


In [8]:
np.random.seed(100)
for _ in range(5):
    print(np.random.rand(1))
    # seed is outside the loop, so the random number is not the same

[0.54340494]
[0.27836939]
[0.42451759]
[0.84477613]
[0.00471886]


In [9]:
# draw a 5x3 array from a standard normal distribution
np.random.randn(5, 3)

array([[ 0.35467445, -0.78606433, -0.2318722 ],
       [ 0.20797568,  0.93580797,  0.17957831],
       [-0.5771615 , -0.53337271, -0.22540212],
       [-0.31491934,  0.42299678, -0.43882681],
       [-0.50141346, -1.65870284, -1.0075773 ]])

In [10]:
# draw 15 numbers from a normal distribution with mean 5 and standard deviation 10
np.random.normal(5, 10, 15)   

array([-0.08734431, 12.8148842 , -1.54381034,  5.41172468,  2.98083093,
       -3.7081315 ,  7.2893207 ,  0.91960058,  3.96074862, 20.67178785,
        9.97024715, 16.55872335, 23.38611677, 20.35726618,  7.54997727])

In [11]:
# create a 2x4 array of random integers in the interval [0, 5)
np.random.randint(0, 5, (2, 4))

array([[1, 4, 2, 3],
       [3, 4, 2, 4]], dtype=int32)

In [12]:
# 5 random integers from 10 to 100 with replacement
np.random.choice(range(10, 100), size = 5, replace=True)   #replace 


array([13, 58, 62, 53, 46])

In [13]:
# simulate flipping a coin 10 times, tested 100 times.
np.random.binomial(1,0.1, 100)   

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [14]:
np.random.choice([3,5,7,9], size=(3,5)) 

array([[7, 9, 5, 5, 3],
       [5, 9, 7, 5, 3],
       [3, 5, 9, 5, 7]])

# Array indexing & slicing

In [16]:
arr3 = np.arange(30).reshape(5,6)
arr3

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

In [18]:
arr3[0]
# retrieves the first row, result is still an array, one dimensional.

array([0, 1, 2, 3, 4, 5])

In [19]:
arr3[0:1]
# also retrieves the first row, but result is two dimensional due to slicing

array([[0, 1, 2, 3, 4, 5]])

In [20]:
arr3[1, 1]
# retrieves the value from second row and second column
# the index should follow the axes 0, 1, 2, ...

np.int64(7)

In [21]:
arr3[1:2, 1:2]
# also retrieves value from the first row, second column, but result is a 2D array.
# it can be updated by assigning a new value to it. 
# e.g. arr3[1:2, 1:2] = 100

array([[7]])

In [22]:
arr3[:, 1:2]
# retrieves all row values in the second column

array([[ 1],
       [ 7],
       [13],
       [19],
       [25]])

In [23]:
arr3[[1, 3]]
# retrieves the second and fourth rows

array([[ 6,  7,  8,  9, 10, 11],
       [18, 19, 20, 21, 22, 23]])

In [24]:
arr3[(0, 2, 4), (1, 2, 3)]
# if two sequences are given, the length must match.

array([ 1, 14, 27])

In [25]:
arr3[2] > 15
# returns a boolean array

array([False, False, False, False,  True,  True])

In [26]:
arr3[3, arr3[2] > 15]
# retrieves the forth row, where the value in the third row is greater than 15

array([22, 23])

In [27]:
mask = np.array([1, 8, 0, 0, 1], dtype=bool)
mask
# non-zero values are considered True
# zero values are considered False

array([ True,  True, False, False,  True])

In [28]:
arr3[mask, 5]
# retrieves the first, second, and fifth row, and the sixth column

array([ 5, 11, 29])

# Array operations

## Basic Operations

In [29]:
arr4 = np.array([[1, 2, 3],[4, 5, 6]])
arr4

array([[1, 2, 3],
       [4, 5, 6]])

In [30]:
arr4 * arr4

array([[ 1,  4,  9],
       [16, 25, 36]])

In [31]:
arr4 - arr4

array([[0, 0, 0],
       [0, 0, 0]])

In [32]:
1 / arr4

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [33]:
arr4 * 0.5

array([[0.5, 1. , 1.5],
       [2. , 2.5, 3. ]])

In [34]:
np.append(arr4, [[-100, -999, 0]], axis=0)
# the value to add must have the same shape if axis is specified.
# otherwise, the array is flattened before use.
# it only returns a new array. it does not change the original array.

array([[   1,    2,    3],
       [   4,    5,    6],
       [-100, -999,    0]])

In [35]:
arr4.tolist()

[[1, 2, 3], [4, 5, 6]]

In [36]:
arr4.astype(str)
# astype converts the array to a str type

array([['1', '2', '3'],
       ['4', '5', '6']], dtype='<U21')

In [37]:
zero_arr = np.zeros(5, int)
zero_arr

array([0, 0, 0, 0, 0])

In [38]:
zero_arr.fill(4)
# note that .fill returns None while changing the original array

In [39]:
zero_arr

array([4, 4, 4, 4, 4])

In [40]:
zero_arr[0] = 5.8
zero_arr   # since the datatype is int, 5.8 will be truncated

array([5, 4, 4, 4, 4])

## Transposing Arrays

In [42]:
arr5 = np.arange(15).reshape((3, 5))
arr5

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [43]:
arr5.reshape(-1, 3)
# -1 means as many as needed. i.e., all values.
# 3 means 3 columns
# note that reshape returns a new array but does not change the original array

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [44]:
arr5.T   
# transpose returns a view without copying
# note that transpose does not work on a one-dimensional array

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [45]:
arr5  
# so the array stays unchanged

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [46]:
arr6 = np.random.randn(6, 3)
np.dot(arr6.T, arr6)
# dot product of the transpose of the array with itself

array([[15.83723247, -6.99496759,  0.373784  ],
       [-6.99496759,  6.85570835, -2.07541975],
       [ 0.373784  , -2.07541975,  4.60567398]])

## Mathematical and Statistical Methods

In [47]:
arr7 = np.arange(10)
print(arr7)

[0 1 2 3 4 5 6 7 8 9]


In [48]:
np.sqrt(arr7)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [49]:
np.exp(arr7)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [50]:
x, y = np.random.randn(8), np.random.randn(8)
print(x)
print(y)

[-1.59203835  0.69948798  0.11292401 -0.76978559  0.3038585  -1.2977107
 -0.32988693 -0.96689767]
[ 0.55227176 -1.96426406 -0.62976376 -0.76557812 -0.14811843  0.90492003
  0.30568476 -1.26940026]


In [51]:
np.maximum(x, y)

array([ 0.55227176,  0.69948798,  0.11292401, -0.76557812,  0.3038585 ,
        0.90492003,  0.30568476, -0.96689767])

In [52]:
arr8 = np.arange(15).reshape((3, 5))
arr8

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [53]:
print(arr8.mean())
print(arr8.sum())
print(arr8.std())

print(arr8.mean(axis=1))
# taking the average along axis 1
print(arr8.sum(axis=0))
# taking the sum along axis 0

7.0
105
4.320493798938574
[ 2.  7. 12.]
[15 18 21 24 27]


# `NaN`

In [54]:
# NaN: means not a number
# it indicates missing value

print(np.nan)
print(type(np.nan))
# note the type of NaN is float.

nan
<class 'float'>


In [55]:
# NaN is a trouble maker
np.nan + 12, np.nan > 10, np.nan * 15

(nan, False, nan)

In [56]:
# np.isnan: to find whether there is NaN
np.isnan([12, 0.9, 5, np.nan])

array([False, False, False,  True])

In [57]:
# np.sum: to find out how many NaNs
np.isnan([12, 0.9, 5, np.nan]).sum()
# True value is equivalent to 1
# False value is equivalent to 0

np.int64(1)

In [58]:
# np.argwhere: to find out the indices of non-zero elements
np.argwhere(np.arange(4).reshape((2,2)))

array([[0, 1],
       [1, 0],
       [1, 1]])

In [59]:
# np.argwhere + np.isnan: to find out the indices of NaN
np.argwhere(np.isnan([12, 0.9, 5, np.nan]))

array([[3]])

In [60]:
# np.any: returns True if there is any True value in the array
np.any([1, 0, 0, 0])
# this is equivalent to testing
# np.any([True, False, False, False])

np.True_

In [61]:
# we use np.any to determine whether there is any NaN values
np.isnan([12, 0.9, 5, np.nan]).any()

np.True_

In [62]:
# np.all: returns True if all the array elements are True
np.all([1, 1, 1])

# this is equivalent to testing
# np.all([True, True, True])

np.True_

In [63]:
np.all([1, 1, 0])

np.False_

In [64]:
# we use np.all to determine whether all values are NaN
np.isnan([np.nan, np.nan, np.nan]).all()

np.True_

# File I/O

In [79]:
values1 = np.random.random((10, 5))
np.savetxt('../data_out/nparray.txt', values1)

In [83]:
values2 = np.loadtxt('../data_out/nparray.txt', dtype = float)
print(values2)

[[0.35635898 0.76286615 0.53281217 0.36405572 0.41126815]
 [0.24624624 0.11203632 0.21297768 0.69958934 0.92405496]
 [0.89076414 0.35501525 0.82317797 0.38149612 0.03400909]
 [0.88722056 0.4971579  0.02616028 0.8918212  0.99259084]
 [0.28025065 0.44650545 0.85163306 0.47433037 0.27746328]
 [0.84165882 0.19512497 0.8563142  0.30779172 0.11482631]
 [0.23205323 0.1901128  0.89687523 0.71286647 0.27232044]
 [0.33140446 0.13344876 0.47179231 0.87548878 0.65723473]
 [0.73897465 0.83128974 0.78972567 0.75736278 0.5927053 ]
 [0.21856877 0.48801456 0.62094638 0.95552635 0.24609705]]
