# Numpy

Numpy is one of the most important libraries of Python, we're going
to see why is this and also examples of how to use it and what is it used for.

It let's you realize maths with numeric arrays at high speed, instead 
the normal lists of Python; also it has a really wide set of tools to make
operations eficiently and with less code.

Also, most of Python's libraries are based on Numpy, for example: Pandas.

In [1]:
# You use the alias "np" to name the library
import numpy as np
"""
We create a "np" array of range 1 (one dimension) with the method "array()",
the numbers between square brackets are rows from the array and each number separated
with commas are the columns.
"""
an_array = np.array([7, 77, 777])
# This variable is a type "numpy.ndarray", n (number) of "d" (dimensions) of this array.
print(type(an_array))

<class 'numpy.ndarray'>


In [2]:
# We search what's the data type of the array's shape object and then we print the shape of the array.
print (type(an_array.shape))
# Since it's only 1 dimension you will only see a number meaning how many columns it has.
print(an_array.shape)
# Now this is how you create a two dimensions array, and each dimension should have the same lenght.
a_two_dimension_array = np.array([[1, 2, 3], [4, 5, 6]])
print("Shape of the array:", a_two_dimension_array.shape)
print(a_two_dimension_array)

<class 'tuple'>
(3,)
Shape of the array: (2, 3)
[[1 2 3]
 [4 5 6]]


In [3]:
# How to access those variables
print(an_array[0])
print(an_array[1])
print(an_array[2])
an_array[2] = 200
print(an_array[2])

7
77
777
200


# How to create a multidimensional array

In [4]:
multidimensional_array = np.array([[11, 12, 13], [22, 23, 24], [35, 36, 37]])
print("Shape of this array is:",multidimensional_array.shape)

Shape of this array is: (3, 3)


In [5]:
print("This is how I access a value of this two-dimension array:",multidimensional_array[2, 0])
print("This is how I print the full array")
print(multidimensional_array)

This is how I access a value of this two-dimension array: 35
This is how I print the full array
[[11 12 13]
 [22 23 24]
 [35 36 37]]


$ I^{3 \times 3} =
\left( \begin{array}{cccc}
 11 & 12 & 13 \\
 22 & 23 & 24 \\
 35 & 36 & 37 \\
\end{array} \right) $

# Multiple ways of creating nd arrays

In [6]:
#How to create a full-zero array, with the method "zeros()"
i1 = np.zeros((1080, 1920))
print(i1)
# How can we add a dimension color? With another dimension with colours (red, green, blue, alpha, ultraviolet)
i1 = np.zeros((1080, 1920, 5))
print (i1)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 ...

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  ...
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]]


In [7]:
"""
This method will fill all the numbers from an array with the method "full()"; first
argument means the dimensions of the array and the second the number you want to fill in in each position
"""
i2 = np.full((2,2), 9.1)
print(i2)

[[9.1 9.1]
 [9.1 9.1]]


In [8]:
# The method "eye()" creates an identity matrix
i3 = np.eye(7,7)
print(i3)

[[1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


In [9]:
# This method creates an array full of ones
i4 = np.ones((10,7,2))
print (i4)

[[[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]]


In [10]:
"""
We can also create an array full of random floats between 0 and 1; we're using the method
inside numpy's class "random" called also "random()".
"""
i5 = np.random.random((5,5))
print(i5)

[[0.82539109 0.29124046 0.86896133 0.50633921 0.28544349]
 [0.08476579 0.33007351 0.51807201 0.38816061 0.04567514]
 [0.18234757 0.45737601 0.54162419 0.28621375 0.28030993]
 [0.37890645 0.97797965 0.56779497 0.07695961 0.16208921]
 [0.48542643 0.73219144 0.02494867 0.83697202 0.14733776]]


# Array with indexes
Sometimes preparing a series of indexes in an array is useful to utilize them to operate other arrays

In [11]:
#Let's create a new two-dimensional array
array_a = np.array([[11, 12, 13], [21, 22, 23], [31, 32, 33], [41, 42, 43]])
print(array_a)
print(array_a.shape)

[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]
(4, 3)


In [12]:
# Let's create 2 arrays with integer numbers that we'll use as indexes
columns = np.array([0, 1, 2, 0])
# The method "arange()" generates a consecutive sequence of numbers from 0 to n-1 (in this example, 3)
rows = np.arange(4)
print("We choose this indexes for rows:",rows)
print("We choose this indexes for columns:",columns)

We choose this indexes for rows: [0 1 2 3]
We choose this indexes for columns: [0 1 2 0]


In [13]:
# We can print the indexes by columns from both arrays
for row,col in zip(rows,columns):
    print("(",row,",",col,")")

( 0 , 0 )
( 1 , 1 )
( 2 , 2 )
( 3 , 0 )


In [14]:
# So now to choose an element from a row we'll do it this way
print("The values contained in the indexes are:",array_a[rows, columns])

The values contained in the indexes are: [11 22 33 41]


In [15]:
# Let's operate with the selected values
print(array_a)
array_a[rows, columns] += 1000
print("\nNew array:\n")
print(array_a)

[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]

New array:

[[1011   12   13]
 [  21 1022   23]
 [  31   32 1033]
 [1041   42   43]]


# Indexed boolean
Indexed boolean to chage elements

In [16]:
# We'll create an array of 3x2 dimensions
array_b = np.array([[11,12], [21,22], [31, 32]])
print(array_b)
print("_________")
print(array_b.shape)

[[11 12]
 [21 22]
 [31 32]]
_________
(3, 2)


In [17]:
# Creating a filter is really easy with Numpy
filter = (array_b > 15)
print(filter)
# Then we can have a new array with the filtered numbers
filtered = array_b[filter]
print(filtered)

[[False False]
 [ True  True]
 [ True  True]]
[21 22 31 32]


In [18]:
# This way you don't have to create the filter first and then apply it, we can do it in the same line of code
print(array_b[(array_b % 2 == 0)])

[12 22 32]


##### The principal use of this type of filters is to modify the values inside of the array that meet certain criteria

In [19]:
print("Full array:\n",array_b)
print("\nFiltered array:\n", array_b[array_b % 2 == 0])
array_b[array_b % 2 == 0] += 1000
print("\nArray after changing only the filtered values:\n",array_b)

Full array:
 [[11 12]
 [21 22]
 [31 32]]

Filtered array:
 [12 22 32]

Array after changing only the filtered values:
 [[  11 1012]
 [  21 1022]
 [  31 1032]]


# Slicing arrays
To slice an array is useful to isolate a "subregion" of the array to a ndarray

In [20]:
# We create a normal array first
to_slice_array = np.array([[11,12,13,14,15,16], [21,22,23,24,25,26], [31,32,33,34,35,36], [41,42,43,44,45,46]])
print(to_slice_array)
#Then we'll slice it into a 2x2 array; first position are rows and second one are columns, last number isn't included
a_slice = to_slice_array[:2, 1:3]
print("\nArray sliced:\n",a_slice)

[[11 12 13 14 15 16]
 [21 22 23 24 25 26]
 [31 32 33 34 35 36]
 [41 42 43 44 45 46]]

Array sliced:
 [[12 13]
 [22 23]]


In [21]:
# Let's print the value in row 0 column 0 of the slice; the output will generate a new index
print("Slice:",a_slice[0,0])
# Why 12?, it's because we already sliced this array before, so the first position changed to the number 12.

Slice: 12


In [22]:
a_slice[0,0] += 100
print("Slice:", a_slice[0,0])

Slice: 112


In [23]:
# Why did the slice value affect the original array? It's because slices are pointers; it isn't a new array
print("Original array:", to_slice_array[0,1])

Original array: 112


In [24]:
# We confirm here that a slice is no more than a pointer/shortcut of the original array.
print(to_slice_array)
type(a_slice)

[[ 11 112  13  14  15  16]
 [ 21  22  23  24  25  26]
 [ 31  32  33  34  35  36]
 [ 41  42  43  44  45  46]]


numpy.ndarray

##### Sooo, what if we want a new array from a slice?

In [25]:
new_array = np.array(a_slice)
print(new_array)

[[112  13]
 [ 22  23]]


In [26]:
row_rank1 = to_slice_array[:, 1]
print(row_rank1, row_rank1.shape)

[112  22  32  42] (4,)


# Data types of arrays in numpy

In [27]:
# In this case Python assigns the data type of the structure by himself
ex1 = np.array([11, 12])
print(ex1.dtype)

int32


In [28]:
# Python assigns the data type
ex2 = np.array([11.0, 12.0])
print(ex2.dtype)

float64


In [29]:
# We can also force the data type implicitly like this (from integer to float and viceversa)
ex3 = np.array([11, 12], dtype=np.float64)
print(ex3.dtype)
ex4 = np.array([11.1, 12.7], dtype=np.int64)
print(ex4.dtype)
print("\n",ex4)

float64
int64

 [11 12]


# Arithmetic operations

In [30]:
x = np.array([[111, 112],[121,122]], dtype=np.int64)
y = np.array([[211.1, 212.1],[221.1,222.1]], dtype=np.float64)

print(x)
print("\n",y)

[[111 112]
 [121 122]]

 [[211.1 212.1]
 [221.1 222.1]]


In [31]:
# Subtraction (both ways)
print(x - y)
print("\n",np.subtract(x, y))

[[-100.1 -100.1]
 [-100.1 -100.1]]

 [[-100.1 -100.1]
 [-100.1 -100.1]]


In [32]:
# Multiplication (both ways)
print(x * y)
print("\n", np.multiply(x, y))

[[23432.1 23755.2]
 [26753.1 27096.2]]

 [[23432.1 23755.2]
 [26753.1 27096.2]]


In [33]:
# Division (both ways)
print(x / y)
print("\n",np.divide(x, y))

[[0.52581715 0.52805281]
 [0.54726368 0.54930212]]

 [[0.52581715 0.52805281]
 [0.54726368 0.54930212]]


In [34]:
# Square root
print(np.sqrt(x))

[[10.53565375 10.58300524]
 [11.         11.04536102]]



# Statistical basic operations

In [35]:
# Generate a random array
arr = 10 * np.random.randn(2,5)
print(arr)

[[-6.86604721 18.56786842  2.54175981  1.75907959 -8.86559498]
 [-7.35380304  7.17614846  0.16527285 -7.50482929  9.45115438]]


In [36]:
# This method ("mean()") calculates the mean of all the array
print(arr.mean())

0.9071008980582806


In [37]:
"""
You can pass as a parameter the axis to this method
Axis 0 for columns
Axis 1 for rows
Since there are 2 rows, we'll get two means in this case as printed
"""
print(arr.mean(axis = 1))

[1.42741313 0.38678867]


In [38]:
# Calculate the mean but row by row (there are 5 rows)
print(arr.mean(axis = 0))

[-7.10992513 12.87200844  1.35351633 -2.87287485  0.2927797 ]


In [39]:
# We'll create an array of 10 random elements
unordered = np.random.randn(10)
print(unordered)

[-1.81187099 -1.1646262   0.66220732 -0.22526184 -0.61527068 -0.7166093
 -0.16886451  0.41213395  1.09291874  1.7093875 ]


In [40]:
# Let's copy it
ordered = np.array(unordered)

# And we'll order it by asc
ordered.sort()
print(ordered)

[-1.81187099 -1.1646262  -0.7166093  -0.61527068 -0.22526184 -0.16886451
  0.41213395  0.66220732  1.09291874  1.7093875 ]


In [41]:
# We can also look for unique elements inside the array; it's good to have a general view of what's inside of an array
array = np.array([1,2,1,4,3,4,6,2,5])
print(np.unique(array))

[1 2 3 4 5 6]


In [42]:
# SET OPERATIONS
s1 = np.array(["desktop","chair","table"])
s2 = np.array(["desktop","chair","lamp"])
print(s1)
print(s2)

['desktop' 'chair' 'table']
['desktop' 'chair' 'lamp']


In [43]:
# Intersection (common elements between both sets) with method "intersect1d()"
print(np.intersect1d(s1, s2))

['chair' 'desktop']


In [44]:
# Union in 1d, you won't get more than one sample of each element
print(np.union1d(s1, s2))

['chair' 'desktop' 'lamp' 'table']


In [45]:
# Difference, elements of s1 that aren't in s2 with the method "setdiff1d()"
print(np.setdiff1d(s1, s2))

['table']


In [46]:
# Which elements of s1 are in s2 with method "in1d()", it will return a boolean array
print(np.in1d(s1, s2))

[ True  True False]


# Some frequent operations

In [47]:
# We sum every element of the arrays
ex1 = np.array([[11,12], [21,22]])
print(np.sum(ex1))

66


In [48]:
# This way we can sum the columns (axis = 0)
print(np.sum(ex1, axis = 0))
# How do we sum the rows? with axis = 1
print(np.sum(ex1, axis = 1))

[32 34]
[23 43]


#### How do we format an array (IMPORTANT)

In [49]:
# Let's create a onedimensional array
arr = np.arange(20)
print(arr)
arr.shape

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


(20,)

In [50]:
# Now let's change it shape, using the method "reshape()" and passing row as 1st argument and column as 2nd.
reshped_arr = arr.reshape(4,5)
print(reshped_arr)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


In [51]:
# We can also transpose an array this way
ex1 = np.array([[11,12], [21,22]])
print(ex1)
ex1.T

[[11 12]
 [21 22]]


array([[11, 21],
       [12, 22]])

In [52]:
# WHERE is really useful
mat = np.random.rand(4,4)
mat

array([[0.10075992, 0.00244835, 0.59844939, 0.68518525],
       [0.70520163, 0.3730094 , 0.26533947, 0.60004912],
       [0.58524006, 0.10507588, 0.52099084, 0.03367376],
       [0.07839485, 0.32622327, 0.71967786, 0.45084697]])

In [53]:
"""
With the method "where()" you can pass an array with a conditional expression, where the expression
returns True it replaces it with the 2nd argument (1000) and where it returns False it changes it to
the 3rd argument (-1).
"""
np.where(mat > 0.5, 1000, -1)

array([[  -1,   -1, 1000, 1000],
       [1000,   -1,   -1, 1000],
       [1000,   -1, 1000,   -1],
       [  -1,   -1, 1000,   -1]])

In [54]:
"""
This way we can check if an element of the array is True since 0 is considered False and anything
else apart from 0 is considered True
"""
arr_bools = np.array([0,0,0,0,0,0])
print(arr_bools.any())
arr_bools = np.array([0,0,0,0,1])
print(arr_bools.any())

False
True


In [55]:
# This method is like the previous one but checks if all of those elements are True
arr_bools = np.array([1,1,False])
print(arr_bools.all())
arr_bools = np.array([1,1,2])
print(arr_bools.all())

False
True


Unifying data sets

In [56]:
"""
In the random class there's a method called "randint()" where you can pass the lowest and highest int
generated and also the size of the array
"""
K = np.random.randint(low=2,high=50,size=(2,2))
print(K)

M = np.random.randint(low=2,high=50,size=(2,2))
print("\n",M)

[[17  8]
 [34 49]]

 [[12 10]
 [19  3]]


In [57]:
# Now we merge them stacking vertically with the method "v(ertically)stack()".
np.vstack((K,M))

array([[17,  8],
       [34, 49],
       [12, 10],
       [19,  3]])

In [58]:
# We can also merge them stacking horizontally with the method "h(orizontally)stack()"
np.hstack((K,M))

array([[17,  8, 12, 10],
       [34, 49, 19,  3]])

In [59]:
# We can also merge the arrays by concatenation
print(np.concatenate([K,M], axis = 0))
print()
print(np.concatenate([K,M], axis = 1))

[[17  8]
 [34 49]
 [12 10]
 [19  3]]

[[17  8 12 10]
 [34 49 19  3]]


# Broadcasting
https://docs.scipy.org/doc/numpy-1.10.1/user/basics.broadcasting.html

In [60]:
base = np.zeros((4,4))
print(base)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [61]:
# Let's create an one-dimensional array
row = np.array([1, 0, 2, 7])
print(row)
print(row.shape)

[1 0 2 7]
(4,)


In [62]:
# Broadcasting concept involves that arrays with the same row lenght can be merged like this
y = base + row
print(y)

[[1. 0. 2. 7.]
 [1. 0. 2. 7.]
 [1. 0. 2. 7.]
 [1. 0. 2. 7.]]


In [63]:
# Now we'll repeat the same thing but now let's sum a column to base
col = np.array([[0,1,2,3]])
print(col)

[[0 1 2 3]]


In [64]:
# We will transpose the array to make it vertically aligned
col = col.T
print(col)

[[0]
 [1]
 [2]
 [3]]


In [65]:
# And now we add to base
y = base + col
print(y)

[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]]


In [66]:
col = np.array([[0,1,2]])
col = col.T
print(col)

[[0]
 [1]
 [2]]


In [67]:
# What happens if we want to broadcast an array with different lenghts?
y = base + col
print(y)
# We cannot broadcast with different lengths

ValueError: operands could not be broadcast together with shapes (4,4) (3,1) 

In [None]:
# But somehow we can broadcast with a single element array
arr_x = np.array([1])
print(arr_x)
print("_________")
print(base + arr_x)

# Speed test!

There are some advantages using Numpy arrays instead of normal Python's lists, and in this
section we'll see those.

In [69]:
from numpy import arange
from timeit import Timer

size = 1000000
timeits = 1000

nd_array = arange(size)
print("Data type:",type(nd_array))
print("Shape:",nd_array.shape)

Data type: <class 'numpy.ndarray'>
Shape: (1000000,)


In [70]:
# With the Timer function we can measure the time to do an operation or function

timer_numpy = Timer("nd_array.sum()", "from __main__ import nd_array")
print("Time taken by numpy ndarray: %f seconds" % (timer_numpy.timeit(timeits)))

Time taken by numpy ndarray: 0.253074 seconds


In [71]:
a_list = list(range(size))
print (type(a_list))
print("Shape:",len(a_list))

<class 'list'>
Shape: 1000000


In [72]:
timer_list = Timer("sum(a_list)", "from __main__ import a_list")
print("Time taken by list:  %f seconds" % (timer_list.timeit(timeits)))

KeyboardInterrupt: 

# Save to disk an array / recover an array from disk

In [73]:
x = np.array([23.23, 24.24])
# With this method ("save()") you pass the name of the file as the 1st parameter and then the array itself
np.save("important_array", x)

In [None]:
# Then to recover it you will use the method "load()", with the extension of the file (save does it automatically)
recovered = np.load("important_array.npy")
print("The recovered array")