In [3]:
import numpy as np

arr = np.array([1,2,3,4,5])

print(arr)

# Check version
print(np.__version__)

[1 2 3 4 5]
2.0.1


### Creating a NumPy ndarray Object

In [4]:
arr = np.array([1,2,3,4,5,6])
print(arr)
print(type(arr))

[1 2 3 4 5 6]
<class 'numpy.ndarray'>


In [6]:
arr = np.array((1,2,3,4,5,6,7))
print(arr)

[1 2 3 4 5 6 7]


#### 0-D Array
0-D arrays, or Scalars, are the elements in an array. Each value in an array is a 0-D array.

In [7]:
arr = np.array(42)
print(arr)

42


1-D Array
An array that has 0-D arrays as its elements is called uni-dimensional or 1-D array.

These are the most common and basic arrays.

In [8]:
arr = ([1,2,3,4,5])
print(arr)

[1, 2, 3, 4, 5]


2-D Array
An array that has 1-D arrays as its elements is called a 2-D array.
These are often used to represent matrix or 2nd order tensors.

In [10]:
arr = ([[1,2,3], [4,2,3]])
print(arr)

[[1, 2, 3], [4, 2, 3]]


3-D Array
An array that has 2-D arrays (matrices) as its elements is called 3-D array.

These are often used to represent a 3rd order tensor.

In [12]:
arr = ([[[0,1,3], [4,5,6]], [[9,7,9], [5,6,8]]])
print(arr)

[[[0, 1, 3], [4, 5, 6]], [[9, 7, 9], [5, 6, 8]]]


#### Checking the number of dimensions

In [17]:
a = np.array(43)
b = np.array([1,2,3])
c = np.array([[1,2,3], [4,5,6]])
d = np.array([[[1,2,3], [4,5,6]], [[5,6,7], [8,9,5]]])

print(a.ndim)
print(b.ndim)
print(c.ndim)
print(d.ndim)

0
1
2
3


### Higher Dimension Arrays

In [18]:
arr = np.array([1,2,3,4], ndmin=5)

print(arr)
print("Number of dimension: ", arr.ndim)

[[[[[1 2 3 4]]]]]
Number of dimension:  5


## Numpy Array Indexing

In [19]:
arr = np.array([1,2,3,4,5])

print(arr[1])

2


In [20]:
arr = np.array([1,2,3,4,5,6])
print(arr[2] + arr[3])

7


### Acess 2-D Array Elements

In [22]:
arr = np.array([[0,1,2], [4,7,8]])
print("2nd element on 1st row: ",arr[0,1])


2nd element on 1st row:  1


In [23]:
arr = np.array([[0,2,3,4,5,6,8], [4,5,6,7,4,8,9]])
print("5th element in second row: ", arr[1,6])

5th element in second row:  9


In [24]:
arr = np.array([[1,2,3,4], [5,6,7,8]])
print("last element in second row: ", arr[1,-1])

last element in second row:  8


### NumPy Array Slicing

Slicing in python means taking elements from one given index to another given index.

We pass slice instead of index like this: [start:end].

We can also define the step, like this: [start:end:step].

If we don't pass start its considered 0

If we don't pass end its considered length of array in that dimension

If we don't pass step its considered 1

In [30]:
arr = np.array([1,2,3,4,5,6,7,8])
print(arr[:5])
print(arr[::2])
print(arr[:3])
print(arr[-3:-1])
print(arr[1:5:2])

[1 2 3 4 5]
[1 3 5 7]
[1 2 3]
[6 7]
[2 4]


### Slicing 2-D Arrays

In [35]:
arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])
# From the second element, slice elements from index 1 to index 4 (not included):
print(arr[1,1:4])

# From both elements, return index 2:
print(arr[0:2,2])

# From both elements, slice index 1 to index 4 (not included), this will return a 2-D array:
print(arr[0:2, 1:4])

[7 8 9]
[3 8]
[[2 3 4]
 [7 8 9]]


### NumPy Data Types

By default Python have these data types:

strings - used to represent text data, the text is given under quote marks. e.g. "ABCD"
integer - used to represent integer numbers. e.g. -1, -2, -3
float - used to represent real numbers. e.g. 1.2, 42.42
boolean - used to represent True or False.
complex - used to represent complex numbers. e.g. 1.0 + 2.0j, 1.5 + 2.5j

NumPy has some extra data types, and refer to data types with one character, like i for integers, u for unsigned integers etc.

Below is a list of all data types in NumPy and the characters used to represent them.

i - integer
b - boolean
u - unsigned integer
f - float
c - complex float
m - timedelta
M - datetime
O - object
S - string
U - unicode string
V - fixed chunk of memory for other type ( void )

In [36]:
arr = np.array([1,2,3,4])
print(arr.dtype)

int64


In [38]:
# Createting an array with a defined datatype

arr = np.array([1,2,3,4], dtype='S')
print(arr.dtype)

|S1


### Converting Data type on Existing Arrays

The best way to change the data type of an existing array, is to make a copy of the array with the astype() method.

The astype() function creates a copy of the array, and allows you to specify the data type as a parameter.

The data type can be specified using a string, like 'f' for float, 'i' for integer etc. or you can use the data type directly like float for float and int for integer.

In [41]:
# Change data type from float to integer by using 'i' as parameter value:

arr = np.array([1.2, 3.4, 6, 9])
print(arr.dtype)
print(arr)

new_arr = arr.astype('i')
print(new_arr.dtype)
print(new_arr)

float64
[1.2 3.4 6.  9. ]
int32
[1 3 6 9]


In [42]:
# Change data type from float to integer by using int as parameter value:
arr = np.array([1.1, 2.1, 3.1])

new_arr = arr.astype(int)

print(new_arr)
print(new_arr.dtype)

[1 2 3]
int64


In [43]:
# Change data type from integer to boolean:

arr = np.array([1,0,3])

new_arr = arr.astype(bool)

print(new_arr)
print(new_arr.dtype)

[ True False  True]
bool


### NumPy Array Copy vs View

The main difference between a copy and a view of an array is that the copy is a new array, and the view is just a view of the original array.

The copy owns the data and any changes made to the copy will not affect original array, and any changes made to the original array will not affect the copy.

The view does not own the data and any changes made to the view will affect the original array, and any changes made to the original array will affect the view.

In [47]:
arr = np.array([1,2,3,4,5,6,7])
new_arr_1 = arr.copy()
new_arr_1[0] = 234
print("This is copy() function")
print(arr)
print(new_arr_1)

print("\nThis is the view() function:")

new_arr_2 = arr.view()
new_arr_2[0] = 234
print(arr)
print(new_arr_2)

This is copy() function
[1 2 3 4 5 6 7]
[234   2   3   4   5   6   7]

This is the view() function:
[234   2   3   4   5   6   7]
[234   2   3   4   5   6   7]


### Check if Array Owns its Data

In [49]:
arr = np.array([1,2,3,4])

x = arr.copy()
y = arr.view()
# Every NumPy array has the attribute base that returns None if the array owns the data.
# Otherwise, the base  attribute refers to the original object.
print(x.base)
print(y.base)

None
[1 2 3 4]


### NumPy Array Shape

In [51]:
arr = np.array([[1,3,4,5],[5,6,7,8]])
print(arr.shape)

(2, 4)


In [54]:
arr = np.array([1,2,3,4], ndmin=5)

print(arr)
print("Shape of array: ", arr.shape)

[[[[[1 2 3 4]]]]]
Shape of array:  (1, 1, 1, 1, 4)


### NumPy Array Reshaping

Reshaping means changing the shape of an array.

The shape of an array is the number of elements in each dimension.

By reshaping we can add or remove dimensions or change number of elements in each dimension.

### Reshape from 1-D to 2-D

In [57]:
arr = np.array([1,2,3,4,5,6,7,8,9,10,11,12])


# Reshape From 1-D to 2-D
new_arr = arr.reshape(4,3)
print(new_arr)

# Reshape From 1-D to 3-D
new_arr_1 = arr.reshape(2,3,2)
print(new_arr_1)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]]


### Reshape from 1-D to 3-D

Convert the following 1-D array with 12 elements into a 3-D array.

The outermost dimension will have 2 arrays that contains 3 arrays, each with 2 elements:

In [58]:
arr = np.array([1,2,3,4,5,6,7,8,9,10,11,12])

# Reshape From 1-D to 3-D
new_arr_1 = arr.reshape(2,3,2)
print(new_arr_1)

[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]]


### Unknown Dimension

You are allowed to have one "unknown" dimension.

Meaning that you do not have to specify an exact number for one of the dimensions in the reshape method.

Pass -1 as the value, and NumPy will calculate this number for you.

In [59]:
arr = np.array([1,2,3,4,5,6,7,8])

newarr = arr.reshape(2,2,-1)
print(new_arr)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


### Flattening the arrays

Flattening array means converting a multidimensional array into a 1D array.

We can use reshape(-1) to do this.

In [60]:
arr = np.array([[1, 2, 3], [4, 5, 6]])

new_arr = arr.reshape(-1)

print(new_arr)

[1 2 3 4 5 6]


### NumPy Array Iterating

#### Iterating Arrays

Iterating means going through elements one by one.

As we deal with multi-dimensional arrays in numpy, we can do this using basic for loop of python.

If we iterate on a 1-D array it will go through each element one by one.

In [61]:
arr = np.array([1,2,3])

for x in arr:
    print(x)

1
2
3


#### Iterating 2-D Arrays

In [62]:
arr = np.array([[1,2,3], [4,5,6]])

for x in arr:
    print(x)

[1 2 3]
[4 5 6]


If we iterate on a n-D array it will go through n-1th dimension one by one.

To return the actual values, the scalars, we have to iterate the arrays in each dimension.

In [65]:
arr = np.array([[1,2,3], [4,5,6]])

nums = []

for x in arr:
    for y in x:
        print(y)
        nums.append(int(y))
print(nums)


1
2
3
4
5
6
[1, 2, 3, 4, 5, 6]


### Iterating 3-D Arrays

In [66]:
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

for x in arr:
    print(x)

[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]


In [67]:
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

for x in arr:
    for y in x:
        for z in y:
            print(z)

1
2
3
4
5
6
7
8
9
10
11
12


### Iterating Arrays Using nditer()

The function nditer() is a helping function that can be used from very basic to very advanced iterations. It solves some basic issues which we face in iteration, lets go through it with examples.

Iterating on Each Scalar Element
In basic for loops, iterating through each scalar of an array we need to use n for loops which can be difficult to write for arrays with very high dimensionality.

In [68]:
arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

for x in np.nditer(arr):
    print(x)

1
2
3
4
5
6
7
8


### Iterating Array With Different Data Types

We can use op_dtypes argument and pass it the expected datatype to change the datatype of elements while iterating.

NumPy does not change the data type of the element in-place (where the element is in array) so it needs some other space to perform this action, that extra space is called buffer, and in order to enable it in nditer() we pass flags=['buffered'].

In [69]:
arr = np.array([1, 2, 3])

for x in np.nditer(arr, flags=['buffered'], op_dtypes=['S']):
    print(x)

np.bytes_(b'1')
np.bytes_(b'2')
np.bytes_(b'3')


### Iterating With Different Step Size

In [70]:
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

for x in np.nditer(arr[:, ::2]):
    print(x)

1
3
5
7


### Enumerated Iteration Using ndenumerate()

Enumeration means mentioning sequence number of somethings one by one.

Sometimes we require corresponding index of the element while iterating, the ndenumerate() method can be used for those usecases.

In [71]:
arr = np.array([1, 2, 3])

for idx, x in np.ndenumerate(arr):
    print(idx,x)

(0,) 1
(1,) 2
(2,) 3


In [72]:
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

for idx, x in np.ndenumerate(arr):
    print(idx, x)

(0, 0) 1
(0, 1) 2
(0, 2) 3
(0, 3) 4
(1, 0) 5
(1, 1) 6
(1, 2) 7
(1, 3) 8


### NumPy Joining Array

Joining NumPy Arrays
Joining means putting contents of two or more arrays in a single array.

In SQL we join tables based on a key, whereas in NumPy we join arrays by axes.

We pass a sequence of arrays that we want to join to the concatenate() function, along with the axis. If axis is not explicitly passed, it is taken as 0.

In [73]:
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])

arr = np.concatenate((arr1, arr2))
print(arr)

[1 2 3 4 5 6]


In [75]:
arr1 = np.array([[1,2,3], [4,5,6]])
arr2 = np.array([[3,4,5], [4,7,8]])

arr = np.concatenate((arr1, arr2), axis=1)
print(arr)

[[1 2 3 3 4 5]
 [4 5 6 4 7 8]]


### NumPy Splitting Array

#### Splitting NumPy Arrays

Splitting is reverse operation of Joining.

Joining merges multiple arrays into one and Splitting breaks one array into multiple.

We use array_split() for splitting arrays, we pass it the array we want to split and the number of splits.

In [77]:
arr = np.array([1,2,3,4,5,6])

n = np.array_split(arr, 3)
print(n)


[array([1, 2]), array([3, 4]), array([5, 6])]


In [79]:
# If the array has less elements than required, it will adjust from the end accordingly.

arr = np.array([1,2,3,4,5,6])

n1 = np.array_split(arr, 4)
print(n1)

[array([1, 2]), array([3, 4]), array([5]), array([6])]


### Split Into Arrays

The return value of the array_split() method is an array containing each of the split as an array.

If you split an array into 3 arrays, you can access them from the result just like any array element:

In [80]:
arr = np.array([1, 2, 3, 4, 5, 6])

n2 = np.array_split(arr, 3)

print(n2[0])
print(n2[1])
print(n2[2])

[1 2]
[3 4]
[5 6]


#### Splitting 2-D Arrays

Use the same syntax when splitting 2-D arrays.

Use the array_split() method, pass in the array you want to split and the number of splits you want to do.

In [81]:
arr = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])

n3 = np.array_split(arr, 3)
print(n3)

[array([[1, 2],
       [3, 4]]), array([[5, 6],
       [7, 8]]), array([[ 9, 10],
       [11, 12]])]


In [82]:
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])

n4 = np.array_split(arr, 3)
print(n4)

[array([[1, 2, 3],
       [4, 5, 6]]), array([[ 7,  8,  9],
       [10, 11, 12]]), array([[13, 14, 15],
       [16, 17, 18]])]


In [83]:
# Split the 2-D array into three 2-D arrays along rows.
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])

n5 = np.array_split(arr, 3, axis=1)
print(n5)

[array([[ 1],
       [ 4],
       [ 7],
       [10],
       [13],
       [16]]), array([[ 2],
       [ 5],
       [ 8],
       [11],
       [14],
       [17]]), array([[ 3],
       [ 6],
       [ 9],
       [12],
       [15],
       [18]])]


An alternate solution is using hsplit() opposite of hstack()

In [84]:
# Use the hsplit() method to split the 2-D array into three 2-D arrays along rows.
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])

n6 = np.hsplit(arr, 3)
print(n6)

[array([[ 1],
       [ 4],
       [ 7],
       [10],
       [13],
       [16]]), array([[ 2],
       [ 5],
       [ 8],
       [11],
       [14],
       [17]]), array([[ 3],
       [ 6],
       [ 9],
       [12],
       [15],
       [18]])]


Note: Similar alternates to vstack() and dstack() are available as vsplit() and dsplit().

### NumPy Searching Arrays

#### Searching Arrays

You can search an array for a certain value, and return the indexes that get a match.

To search an array, use the where() method.

In [85]:
arr = np.array([1, 2, 3, 4, 5, 4, 4])

x = np.where(arr == 4)

print(x)

(array([3, 5, 6]),)


In [86]:
# Find the indexes where the values are even:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])

x = np.where(arr%2 == 0)
print(x)

(array([1, 3, 5, 7]),)


In [87]:
# Find the indexes where the values are odd:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])

x = np.where(arr%2 == 1)
print(x)

(array([0, 2, 4, 6]),)


### Search Sorted

There is a method called searchsorted() which performs a binary search in the array, and returns the index where the specified value would be inserted to maintain the search order.

In [88]:
arr = np.array([6, 7, 8, 9])
#Example explained: The number 7 should be inserted on index 1 to remain the sort order.
#The method starts the search from the left and returns the first index where the number 7 is no longer larger than the next value.

x = np.searchsorted(arr, 7)
print(x)

1


#### Search From the Right Side

In [90]:
arr = np.array([6, 7, 8, 9])
#Example explained: The number 7 should be inserted on index 2 to remain the sort order.
#The method starts the search from the right and returns the first index where the number 7 is no longer less than the next value.

x = np.searchsorted(arr, 7, side='right')
print(x)

2


#### Multiple Values

In [92]:
arr = np.array([1, 3, 5, 7])

#The return value is an array: [1 2 3] containing the three indexes where 2, 4, 6 would be inserted in the original array to maintain the order.

x = np.searchsorted(arr, [2, 4, 6])

print(x)

[1 2 3]


NumPy Sorting Arrays

In [93]:
arr = np.array([3, 2, 0, 1])

print(np.sort(arr))

[0 1 2 3]


In [94]:
arr = np.array(['banana', 'cherry', 'apple'])

print(np.sort(arr))

['apple' 'banana' 'cherry']


In [95]:
arr = np.array([True, False, True])

print(np.sort(arr))

[False  True  True]


Sorting a 2-D Array

In [96]:
arr = np.array([[3, 2, 4], [5, 0, 1]])

print(np.sort(arr))

[[2 3 4]
 [0 1 5]]


NumPy Filter Array

In [97]:
arr = np.array([41, 42, 43, 44])

x = [True, False, True, False]

newarr = arr[x]

print(newarr)

[41 43]


In [98]:
arr = np.array([41, 42, 43, 44])

# Create an empty list
filter_arr = []

# go through each element in arr
for element in arr:
  # if the element is higher than 42, set the value to True, otherwise False:
  if element > 42:
    filter_arr.append(True)
  else:
    filter_arr.append(False)

newarr = arr[filter_arr]

print(filter_arr)
print(newarr)

[False, False, True, True]
[43 44]
