In [None]:
#========================================================================================================
#     NumPy - First step in Data Analysis
#========================================================================================================
# @author: Joana Santos Martins

In [None]:
"""What is Numpy?"""

# NumPy stands for Numeric Python and is the fundamental library for scientific computing with Python. It 
#is used to perform mathematical and statistical operations as well as data analysis. This free software 
#package provides several features that allow the programmer to work with multidimensional arrays and 
#matrices multiplication, thereby improving performance and execution.
# To use NumPy it is necessary to install the package in the system, through pip or Anaconda Python 
#distribution. The code in this post was developed in Python 3.5 and NumPy, installed with the Anaconda. 
#Code examples will be presented in Jupyter notebook. In this tutorial, we'll look at some examples of 
#how the NumPy library can be used to perform a variety of tasks.

In [2]:
#========================================================================================================
#     Package
#========================================================================================================
# The first step is to import NumPy.
# Import numpy as np 
import numpy as np
# Above code renames the NumPy library to np, so whenever we need to access any method of this package, we#indicate the shortest name (np).

In [21]:
# Check installed version:
print(np.__version__)

1.14.3


In [4]:
#========================================================================================================
#     Create a NumPy Array
#========================================================================================================
# The most common way to create an array is to use a Python list and convert it to a numpy array through the np.array() function.
# Python list
pythonList = [3, 4, 6, 8, 1, 2]
# NumPy array
array = np.array(pythonList)
# In practice, these two steps can be combined without declaring a Python list. To do this, simply use the np.array() function, 
#which has as argument the list of values to insert into the array. 
newArray = np.array([3, 4, 6, 8, 1, 2])

In [7]:
# One dimensional array - The function argument corresponds to a single list of values.
oneDim = np.array([1, 3, 5])
oneDim

array([1, 3, 5])

In [8]:
# Two dimensional array - The function argument corresponds to a list of two lists of values. That is, these two lists are contained
#within a larger list (a list of lists).
twoDim = np.array([(1, 3, 5), (2, 4, 6)])
twoDim

array([[1, 3, 5],
       [2, 4, 6]])

In [14]:
# Numpy array with a specific data type
# Note that a Numpy array must consist of elements of the same type. Optionally, one of the arguments of the np.array() function can
#specify the data type through dtype parameter.
# INTEGER
intArray = np.array([1, 2, 3], dtype = 'int') 
#or
intArray2 = np.array([1, 2, 3], dtype = np.int64) 
# FLOAT
floatArray = np.array([1, 1.4, 2], dtype = 'float') 
# These are just a few examples, Numpy supports several types of data in addition to the two mentioned.

In [23]:
# Null array - Array of zeros.
# In addition to generating custom arrays with the values chosen by the programmer, Numpy also allows you to generate arrays based 
#on different methods.
# The np.zeros() method creates an array consisting of only zeros. For this, the number of rows and columns is specified, in the 
#case of a two dimensional array.
zerosArray = np.zeros((2, 3))
zerosArray

array([[0., 0., 0.],
       [0., 0., 0.]])

In [26]:
# Array of ones
# In the same way, Numpy allows to create one dimensional and two dimensional arrays consisting only of number 1, using the np.ones()
onesArray = np.ones((3, 3), dtype = 'int') 
onesArray

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [30]:
# Identity matrix 3x3 - Array of 0 with 1 on diagonal 
# The eye() method allows to create an identity matrix, widely used in linear algebra operations.
eyeArray = np.eye(3, dtype = 'int')
eyeArray

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

In [35]:
# Sequence of numbers in one dimensional array
# The linspace() method takes three arguments: an initial index, a final index, and the number of linearly spaced numbers in the 
#specified range.
# Array of 5 evenly divided values from 0 to 100.
sequenceArray = np.linspace(0, 100, 5) 
sequenceArray

array([  0.,  25.,  50.,  75., 100.])

In [39]:
# Array of evenly-spaced values
# The linspace() method takes three arguments: an initial index, a final index, and the interval between each number.
# Array of values from 0 to less than 10 with step 2.
arangeArray = np.arange(0, 10, 2) 
arangeArray

array([0, 2, 4, 6, 8])

In [41]:
# Constant array
# 2x2 array with all values 5
fullArray = np.full((2, 2), 5) 
fullArray

array([[5, 5],
       [5, 5]])

In [44]:
# Array with random values
# 3x3 array of random floats between 0-1
randomArray = np.random.rand(3, 3) 
randomArray

array([[0.83278785, 0.40133325, 0.26294265],
       [0.07416401, 0.80095596, 0.43026833],
       [0.73418217, 0.56337086, 0.07335982]])

In [52]:
# 3x3 array of random floats between 0-100
randomArray2 = np.random.rand(3,3) * 100 
randomArray2

array([[57.28689417, 60.94243653, 76.95566873],
       [23.56562152, 50.68314487, 38.72512986],
       [32.49635595, 34.40604281, 96.23827747]])

In [54]:
# 3x3 array with random ints between 0-9
randomArray3 = np.random.randint(10, size = (3, 3))
randomArray3

array([[8, 5, 2],
       [7, 4, 4],
       [2, 9, 8]])

In [None]:
#===================================================================================================================================
#     Attributes of a NumPy Array
#===================================================================================================================================

In [4]:
# Each array has a set of characteristics that can be exploited through Numpy attributes. To analyze these attributes, an example of 
#a Numpy array created by np.random.randint() will be considered.
exampleArray =  np.random.randint(0, 50, 10)
exampleArray

array([26, 43,  5,  1,  7, 19, 36, 13, 22,  6])

In [None]:
# To access the attributes, a dot is placed after the name of the array, followed by the name of the attribute to be analyzed.

In [5]:
# Number of array elements
exampleArray.size 
# exampleArray has 5 total elements.

10

In [6]:
# Array dimensions (rows, columns)
exampleArray.shape 
# exampleArray has 10 elements along the first axis. 

(10,)

In [7]:
# Number of array dimensions
exampleArray.ndim
# This means is that exampleArray is a one dimensional array.

1

In [8]:
# Lenght of array
len(exampleArray)

10

In [9]:
# Data type of array elements
exampleArray.dtype 

dtype('int32')

In [10]:
# Convert an array to a different type
exampleArray.astype('float') 

array([26., 43.,  5.,  1.,  7., 19., 36., 13., 22.,  6.])

In [11]:
# Convert an array to a Python list
exampleArray.tolist() 

[26, 43, 5, 1, 7, 19, 36, 13, 22, 6]

In [None]:
#===================================================================================================================================
#     Manipulate a NumPy Array
#===================================================================================================================================

In [12]:
# In this section, we intend to test some methods of manipulating arrays. For this purpose a new two dimensional array example will 
# be considered.
exampleArray2 = np.random.randint(50, size = (3, 6))
exampleArray2

array([[43,  2, 18,  0, 14, 48],
       [28, 29, 43,  5, 19,  2],
       [49, 12, 22, 45, 16, 40]])

In [13]:
# COPY
# Copy of the array to a new memory
copyArray = np.copy(exampleArray2)
copyArray

array([[43,  2, 18,  0, 14, 48],
       [28, 29, 43,  5, 19,  2],
       [49, 12, 22, 45, 16, 40]])

In [14]:
# Deep copy of the array 
copyArray2 = exampleArray2.copy()
copyArray2

array([[43,  2, 18,  0, 14, 48],
       [28, 29, 43,  5, 19,  2],
       [49, 12, 22, 45, 16, 40]])

In [15]:
# View of the array with the same data
viewArray = exampleArray2.view() 
viewArray

array([[43,  2, 18,  0, 14, 48],
       [28, 29, 43,  5, 19,  2],
       [49, 12, 22, 45, 16, 40]])

In [16]:
# SORT
# Sort the elements of an array's axis
exampleArray2.sort(axis = 0)
exampleArray2

array([[28,  2, 18,  0, 14,  2],
       [43, 12, 22,  5, 16, 40],
       [49, 29, 43, 45, 19, 48]])

In [17]:
# Sort an array
exampleArray2.sort()
exampleArray2

array([[ 0,  2,  2, 14, 18, 28],
       [ 5, 12, 16, 22, 40, 43],
       [19, 29, 43, 45, 48, 49]])

In [18]:
# TRANSPOSE
# Permute array dimensions (rows become columns and vice versa)
exampleArray2.T

array([[ 0,  5, 19],
       [ 2, 12, 29],
       [ 2, 16, 43],
       [14, 22, 45],
       [18, 40, 48],
       [28, 43, 49]])

In [19]:
# Permute array dimensions (rows become columns and vice versa)
transposeArray = np.transpose(exampleArray2)
transposeArray

array([[ 0,  5, 19],
       [ 2, 12, 29],
       [ 2, 16, 43],
       [14, 22, 45],
       [18, 40, 48],
       [28, 43, 49]])

In [20]:
# RESHAPE (Change array shape)
# The reshape() method consists in changing the organization of the elements, thus altering the shape of the array, but maintaining 
#the same number of dimensions. For example, reshape() allows the conversion of one dimensional array into a two dimensional array.
# Reshape the array to 6 rows, 3 columns without changing data
exampleArray2.reshape(6,3)

array([[ 0,  2,  2],
       [14, 18, 28],
       [ 5, 12, 16],
       [22, 40, 43],
       [19, 29, 43],
       [45, 48, 49]])

In [21]:
# FLATTEN (Change array shape)
# The flatten() method converts a multidimensional array into one dimensional array.
flattenArray = exampleArray2.flatten() 
flattenArray

array([ 0,  2,  2, 14, 18, 28,  5, 12, 16, 22, 40, 43, 19, 29, 43, 45, 48,
       49])

In [22]:
# The ravel() method also converts a multidimensional array into one dimensional array.
ravelArray = exampleArray2.ravel() 
ravelArray 

array([ 0,  2,  2, 14, 18, 28,  5, 12, 16, 22, 40, 43, 19, 29, 43, 45, 48,
       49])

In [136]:
# Difference between flatten() and ravel()
# Both are methods responsible for implementing flattening of arrays. The difference between flatten() and ravel() is that the new 
#array created through ravel() consists of a reference to the parent array. This way, any changes to the new array will affect the 
#parent too, without creating a copy. As shown in the following example:

In [23]:
#New array created with flatten()
# Change the new array created with flatten()
flattenArray[0] = 5
flattenArray

array([ 5,  2,  2, 14, 18, 28,  5, 12, 16, 22, 40, 43, 19, 29, 43, 45, 48,
       49])

In [24]:
# Parent array after changing the new array (flattenArray)
exampleArray2

array([[ 0,  2,  2, 14, 18, 28],
       [ 5, 12, 16, 22, 40, 43],
       [19, 29, 43, 45, 48, 49]])

In [25]:
# Change the new array created with ravel()
ravelArray[0] = 5
ravelArray 

array([ 5,  2,  2, 14, 18, 28,  5, 12, 16, 22, 40, 43, 19, 29, 43, 45, 48,
       49])

In [26]:
# Parent array after changing the new array (ravelArray)
exampleArray2

array([[ 5,  2,  2, 14, 18, 28],
       [ 5, 12, 16, 22, 40, 43],
       [19, 29, 43, 45, 48, 49]])

In [27]:
# RESIZE
# Change array shape to 6x3 
exampleArray2.resize((6,3)) 
exampleArray2

array([[ 5,  2,  2],
       [14, 18, 28],
       [ 5, 12, 16],
       [22, 40, 43],
       [19, 29, 43],
       [45, 48, 49]])

In [28]:
# APPEND
# Append values to end of array
# Syntax of NumPy append:
    # arr = original array to append the new values
    # values = new values to append to the original array  
    # axis = axis along which append the new values (optional)
np.append(arr = exampleArray2, values =  [1, 4, 8]) 

array([ 5,  2,  2, 14, 18, 28,  5, 12, 16, 22, 40, 43, 19, 29, 43, 45, 48,
       49,  1,  4,  8])

In [29]:
# INSERT
# Insert values into array before index 0
np.insert(arr = exampleArray2, obj = 0, values =  [1, 4, 8]) 

array([ 1,  4,  8,  5,  2,  2, 14, 18, 28,  5, 12, 16, 22, 40, 43, 19, 29,
       43, 45, 48, 49])

In [30]:
# DELETE
# Delete row on index 3 of array
np.delete(arr = exampleArray2, obj = 0, axis = 0)

array([[14, 18, 28],
       [ 5, 12, 16],
       [22, 40, 43],
       [19, 29, 43],
       [45, 48, 49]])

In [35]:
# CONCATENATE
# This method allows to join 2 arrays
# This method allows to join 2 arrays. In this example 2 new arrays with the same dimensions will be created.
exampleArray3 = np.random.randint(50, size = (3, 4))
exampleArray3

array([[44, 41, 40, 31],
       [46,  6, 21, 26],
       [19, 21, 32, 17]])

In [36]:
exampleArray4 = np.random.randint(50, size = (3, 4))
exampleArray4

array([[ 2, 48, 22, 37],
       [10, 40, 10, 46],
       [48,  7, 38,  5]])

In [37]:
# Adds exampleArray4 as rows to the end of exampleArray3
np.concatenate((exampleArray3, exampleArray4), axis = 0)

array([[44, 41, 40, 31],
       [46,  6, 21, 26],
       [19, 21, 32, 17],
       [ 2, 48, 22, 37],
       [10, 40, 10, 46],
       [48,  7, 38,  5]])

In [38]:
# Adds exampleArray4 as columns to end of exampleArray3
np.concatenate((exampleArray3, exampleArray4), axis = 1)

array([[44, 41, 40, 31,  2, 48, 22, 37],
       [46,  6, 21, 26, 10, 40, 10, 46],
       [19, 21, 32, 17, 48,  7, 38,  5]])

In [40]:
# SPLIT
# Splits exampleArray3 into 3 sub-arrays
np.split(exampleArray3, 3)

[array([[44, 41, 40, 31]]),
 array([[46,  6, 21, 26]]),
 array([[19, 21, 32, 17]])]

In [45]:
# Splits exampleArray3 horizontally on the second index
np.hsplit(exampleArray3, 2)

[array([[44, 41],
        [46,  6],
        [19, 21]]), array([[40, 31],
        [21, 26],
        [32, 17]])]

In [47]:
# Splits exampleArray3 vertically on the third index
np.vsplit(exampleArray3, 3)

[array([[44, 41, 40, 31]]),
 array([[46,  6, 21, 26]]),
 array([[19, 21, 32, 17]])]

In [None]:
#===================================================================================================================================
#     Index/slice/subset a NumPy Array
#===================================================================================================================================

In [51]:
# To access an element of an array, its index number is used. Like the Python lists, NumPy arrays are indexed with zero. For example,
#the third element of the array has an index equal to two.
# Returns the element at index 2
exampleArray[2]
# In this example, the third element (second index) corresponds to the digit 5.

5

In [52]:
# Like 1-D arrays, the two-dimensional NumPy arrays also follow the zero-based index, that is, to access an element of the array it 
#is necessary to consider the first row and the first column with an index equal to zero.
# Returns the 2D array element on index [2][1]
exampleArray3[2, 1]
# The digit 21 will be returned as the element present in the second row and first column of the array.

21

In [60]:
# To replace an element in the array, consider its index and assign the new value.
# Assigns array element on index 0 the value 10
exampleArray[0] = 10
exampleArray

array([10,  4,  5,  1,  7, 19, 36, 13, 22,  6])

In [58]:
# Assigns array element on index [2][3] the value 0
exampleArray3[2, 3] = 0 
exampleArray3

array([[44, 41, 40, 31],
       [46,  6, 21, 11],
       [19, 21, 32,  0]])

In [None]:
# Returns the elements at indices 0,1,2 (On a 2D array: returns rows 0,1,2)
arr[0:3]

In [None]:
# Returns the elements on rows 0,1,2 at column 4
arr[0:3, 4] 

In [None]:
# Returns the elements at indices 0,1 (On a 2D array: returns rows 0,1)
arr[:2] 

In [None]:
# Returns the elements at index 1 on all rows
arr[:, 1]

In [None]:
# Returns an array with boolean values
arr < 5 

In [None]:
# Returns an array with boolean values
(arr1 < 3) & (arr2 > 5)

In [None]:
# Inverts a boolean array
~arr 

In [None]:
# Returns array elements smaller than 5
arr[arr < 5]