In [1]:
import numpy as np
list1 = [0,1,2,3,4]
arr1d = np.array(list1)

# Print the array and its type
print(type(arr1d))

<class 'numpy.ndarray'>


#### The key difference between an array and a list is, arrays are designed to handle vectorized operations while a python list is not.<br/>
That means, if you apply a function it is performed on every item in the array, rather than on the whole array object.

In [2]:
# Add 2 to each element of arr1d
arr1d + 2

array([2, 3, 4, 5, 6])

#### That was not possible with a list. But you can do that on a ndarray.

#### Another characteristic is that, once a numpy array is created, you cannot increase its size. To do so, you will have to create a new array. But such a behavior of extending the size is natural in a list.

#### You can also pass a list of lists to create a matrix like a 2d array.

In [3]:
# Create a 2d array from a list of lists
list2 = [[0,1,2], [3,4,5], [6,7,8]]
arr2d = np.array(list2)
print(arr2d)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


#### You may also specify the datatype by setting the dtype argument. Some of the most commonly used numpy dtypes are: 'float', 'int', 'bool', 'str' and 'object'.

<br/>To control the memory allocations you may choose to use one of ‘float32’, ‘float64’, ‘int8’, ‘int16’ or ‘int32’.

In [4]:
# Create a float 2d array
arr2d_f = np.array(list2, dtype='float')
arr2d_f

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

#### The decimal point after each number is indicative of the float datatype. <br/>You can also convert it to a different datatype using the astype() method.

In [5]:
# Convert to 'int' datatype
arr2d_f.astype('int')

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [6]:
# Convert to int then to str datatype
arr2d_f.astype('int').astype('str')

array([['0', '1', '2'],
       ['3', '4', '5'],
       ['6', '7', '8']], dtype='<U11')

#### A numpy array must have all items to be of the same data type, unlike lists. This is another significant difference.

#### However, if you are uncertain about what datatype your array will hold or if you want to hold characters and numbers in the same array, you can set the dtype as 'object'.

In [7]:
# Create a boolean array
arr2d_b = np.array([1, 0, 10], dtype='bool')
arr2d_b

array([ True, False,  True])

In [8]:
# Create an object array to hold numbers as well as strings
arr1d_obj = np.array([1, 'a'], dtype='object')
arr1d_obj

array([1, 'a'], dtype=object)

In [9]:
# Convert an array back to a list
arr1d_obj.tolist()

[1, 'a']

#### To summarise, the main differences with python lists are:<br/>
1) Arrays support vectorised operations, while lists don’t.<br/>
2) Once an array is created, you cannot change its size. You will have to create a new array or overwrite the existing one. <br/>
3) Every array has one and only one dtype. All items in it should be of that dtype.<br/>
4) An equivalent numpy array occupies much less space than a python list of lists.

### How to inspect the size and shape of a numpy array?

<li>If it is a 1D or a 2D array or more. (ndim)</li>

<li>How many items are present in each dimension (shape)</li>

<li>What is its datatype (dtype)</li>

<li>What is the total number of items in it (size)</li>

<li>Samples of first few items in the array (through indexing)</li>

In [10]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]
arr2 = np.array(list2, dtype='float')
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [11]:
# shape
print('Shape: ', arr2.shape)

# dtype
print('Datatype: ', arr2.dtype)

# size
print('Size: ', arr2.size)

# ndim
print('Num Dimensions: ', arr2.ndim)

Shape:  (3, 4)
Datatype:  float64
Size:  12
Num Dimensions:  2


### How to extract specific items from an array?

In [12]:
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [13]:
# Extract the first 2 rows and columns
arr2[:2, :2]
# list2[:2, :2]  # error

array([[1., 2.],
       [3., 4.]])

#### Additionally, numpy arrays support boolean indexing.

<br/>A boolean index array is of the same shape as the array-to-be-filtered and it contains only True and False values. The values corresponding to True positions are retained in the output.

In [14]:
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [15]:
# Get the boolean output by applying the condition to each element.
b = arr2 > 4
b

array([[False, False, False, False],
       [False, False,  True,  True],
       [ True,  True,  True,  True]])

In [16]:
arr2[b]

array([5., 6., 5., 6., 7., 8.])

### How to reverse the rows and the whole array?

In [17]:
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [18]:
# Reverse only the row positions
arr2[::-1, ]

array([[5., 6., 7., 8.],
       [3., 4., 5., 6.],
       [1., 2., 3., 4.]])

In [19]:
# Reverse the row and column positions
arr2[::-1, ::-1]

array([[8., 7., 6., 5.],
       [6., 5., 4., 3.],
       [4., 3., 2., 1.]])

### How to represent missing values and infinite?

In [20]:
# Insert a nan and an inf
arr2[1,1] = np.nan  # not a number
arr2[1,2] = np.inf  # infinite
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., nan, inf,  6.],
       [ 5.,  6.,  7.,  8.]])

In [21]:
# Replace nan and inf with -1. Don't use arr2 == np.nan
missing_bool = np.isnan(arr2) | np.isinf(arr2)
arr2[missing_bool] = -1  
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

### How to compute mean, min, max on the ndarray?

In [22]:
# mean, max and min
print("Mean value is: ", arr2.mean())
print("Max value is: ", arr2.max())
print("Min value is: ", arr2.min())

Mean value is:  3.5833333333333335
Max value is:  8.0
Min value is:  -1.0


In [23]:
# Row wise and column wise min
print("Column wise minimum: ", np.amin(arr2, axis=0))
print("Row wise minimum: ", np.amin(arr2, axis=1))

Column wise minimum:  [ 1. -1. -1.  4.]
Row wise minimum:  [ 1. -1.  5.]


In [24]:
# Cumulative Sum
np.cumsum(arr2)

array([ 1.,  3.,  6., 10., 13., 12., 11., 17., 22., 28., 35., 43.])

### How to create a new array from an existing array?

#### If you just assign a portion of an array to another array, the new array you just created actually refers to the parent array in memory.

That means, if you make any changes to the new array, it will reflect in the parent array as well.

<br/>So to avoid disturbing the parent array, you need to make a copy of it using copy(). All numpy arrays come with the copy() method.<br/>

In [25]:
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [26]:
# Assign portion of arr2 to arr2a. 
# Doesn't really create a new array.
arr2a = arr2[:2,:2]  
arr2a[:1, :1] = 100  # 100 will reflect in arr2
print(arr2)
print(arr2a)

[[100.   2.   3.   4.]
 [  3.  -1.  -1.   6.]
 [  5.   6.   7.   8.]]
[[100.   2.]
 [  3.  -1.]]


In [27]:
# Copy portion of arr2 to arr2b
arr2b = arr2[:2, :2].copy()
arr2b[:1, :1] = 101  # 101 will not reflect in arr2
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

### Reshaping and Flattening Multidimensional arrays

#### Reshaping is changing the arrangement of items so that shape of the array changes while maintaining the same number of dimensions.
<br/>Flattening, however, will convert a multi-dimensional array to a flat 1d array. And not any other shape.

In [28]:
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

#### First, let’s reshape the arr2 array from 3×4 to 4×3 shape.

In [29]:
# Reshape a 3x4 array to 4x3 array
arr2.reshape(4, 3)

array([[100.,   2.,   3.],
       [  4.,   3.,  -1.],
       [ -1.,   6.,   5.],
       [  6.,   7.,   8.]])

### What is the difference between flatten() and ravel()?

#### The difference between ravel() and flatten() is, the new array created using ravel() is actually a reference to the parent array. So, any changes to the new array will affect the parent as well. But is memory efficient since it does not create a copy.

In [30]:
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

In [31]:
# Flatten it to a 1d array
arr2.flatten()

array([100.,   2.,   3.,   4.,   3.,  -1.,  -1.,   6.,   5.,   6.,   7.,
         8.])

In [32]:
# Changing the flattened array does not change parent
b1 = arr2.flatten()  
b1[0] = 200  # changing b1 does not affect arr2
arr2

array([[100.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

In [33]:
# Changing the raveled array changes the parent also.
b2 = arr2.ravel()  
b2[0] = 101  # changing b2 changes arr2 also
arr2

array([[101.,   2.,   3.,   4.],
       [  3.,  -1.,  -1.,   6.],
       [  5.,   6.,   7.,   8.]])

### How to create sequences, repetitions and random numbers using numpy?

#### The np.arange() function comes handy to create customised number sequences as ndarray.

In [34]:
# Lower limit is 0 be default
print(np.arange(5))  

# 0 to 9
print(np.arange(0, 10))  

# 0 to 9 with step of 2
print(np.arange(0, 10, 2))  

# 10 to 1, decreasing order
print(np.arange(10, 0, -1))

[0 1 2 3 4]
[0 1 2 3 4 5 6 7 8 9]
[0 2 4 6 8]
[10  9  8  7  6  5  4  3  2  1]


#### Say, you want to create an array of exactly 10 numbers between 1 and 50, Can you compute what would be the step value?

<br/>Well, I am going to use the np.linspace() instead.

In [35]:
# Start at 1 and end at 50
np.linspace(start=1, stop=50, num=10, dtype=int)

array([ 1,  6, 11, 17, 22, 28, 33, 39, 44, 50])

#### Notice since we explicitly forced the dtype to be int, the numbers are not equally spaced because of the rounding.

#### Similar to np.linspace(), there is also np.logspace() which rises in a logarithmic scale. In np.logspace(), the given start value is actually base^start and ends with base^stop, with a default based value of 10.

In [36]:
# Limit the number of digits after the decimal to 2
np.set_printoptions(precision=2)  

# Start at 10^1 and end at 10^50
np.logspace(start=1, stop=50, num=10, base=10) 

array([1.00e+01, 2.78e+06, 7.74e+11, 2.15e+17, 5.99e+22, 1.67e+28,
       4.64e+33, 1.29e+39, 3.59e+44, 1.00e+50])

#### The np.zeros() and np.ones() functions lets you create arrays of desired shape where all the items are either 0’s or 1’s.

In [37]:
np.zeros([2,2])

array([[0., 0.],
       [0., 0.]])

In [38]:
np.ones([2,2])

array([[1., 1.],
       [1., 1.]])

### How to create repeating sequences?

#### np.tile() will repeat a whole list or array n times. <br/>Whereas, np.repeat() repeats each item n times.

In [39]:
a = [1,2,3] 

# Repeat whole of 'a' two times
print('Tile:   ', np.tile(a, 2))

# Repeat each element of 'a' two times
print('Repeat: ', np.repeat(a, 2))

Tile:    [1 2 3 1 2 3]
Repeat:  [1 1 2 2 3 3]


### How to generate random numbers?

In [40]:
# Random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

[[0.47 0.98]
 [0.33 0.41]]


In [41]:
# Normal distribution with mean=0 and variance=1 of shape 2,2
print(np.random.randn(2,2))

[[ 1.4   1.25]
 [ 1.23 -2.14]]


In [42]:
# Random integers between [0, 10) of shape 2,2
print(np.random.randint(0, 10, size=[2,2]))

[[5 2]
 [6 5]]


In [43]:
# One random number between [0,1)
print(np.random.random())

0.4870076061639439


In [44]:
# Random numbers between [0,1) of shape 2,2
print(np.random.random(size=[2,2]))

[[0.52 0.42]
 [0.58 0.33]]


In [45]:
# Pick 10 items from a given list, with equal probability
print(np.random.choice(['a', 'e', 'i', 'o', 'u'], size=10))  

['e' 'o' 'u' 'o' 'o' 'u' 'i' 'u' 'o' 'i']


In [46]:
# Pick 10 items from a given list with a predefined probability 'p'
print(np.random.choice(['a', 'e', 'i', 'o', 'u'], size=10, p=[0.3, .1, 0.1, 0.4, 0.1]))  # picks more o's

['o' 'o' 'i' 'a' 'o' 'a' 'a' 'o' 'o' 'u']


#### If you want to repeat the same set of random numbers every time, you need to set the seed or the random state. The see can be any value. The only requirement is you must set the seed to the same value every time you want to generate the same set of random numbers.

<br/>Once np.random.RandomState is created, all the functions of the np.random module becomes available to the created randomstate object.

In [47]:
# Create the random state
rn = np.random.RandomState(100)

# Create random numbers between [0,1) of shape 2,2
print(rn.rand(2,2))

[[0.54 0.28]
 [0.42 0.84]]


In [48]:
# Set the random seed
np.random.seed(100)

# Create random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

[[0.54 0.28]
 [0.42 0.84]]


### How to get the unique items and the counts?

#### The np.unique() method can be used to get the unique items. If you want the repetition counts of each item, set the return_counts parameter to True.

In [49]:
# Create random integers of size 10 between [0,10)
np.random.seed(100)
arr_rand = np.random.randint(0, 10, size=10)
print(arr_rand)

[8 8 3 7 7 0 4 2 5 2]


In [50]:
# Get the unique items and their counts
uniqs, counts = np.unique(arr_rand, return_counts=True)
print("Unique items : ", uniqs)
print("Counts       : ", counts)

Unique items :  [0 2 3 4 5 7 8]
Counts       :  [1 2 1 1 1 2 2]
