In [None]:
#========================================================================================================
#     NumPy - First step in Data Analysis
#========================================================================================================
# @author: Joana Santos Martins

In [None]:
"""What is Numpy?"""

# NumPy stands for Numeric Python and is the fundamental library for scientific computing with Python. It 
#is used to perform mathematical and statistical operations as well as data analysis. This free software 
#package provides several features that allow the programmer to work with multidimensional arrays and 
#matrices multiplication, thereby improving performance and execution. 
# To use NumPy it is necessary to install the package in the system, through pip or Anaconda Python 
#distribution. The code in this post was developed in Python 3.5 and NumPy, installed with the Anaconda. 
#Code examples will be presented in Jupyter notebook. In this tutorial, we'll look at some examples of 
#how the NumPy library can be used to perform a variety of tasks.

In [1]:
#========================================================================================================
#     Package
#========================================================================================================
# The first step is to import NumPy.
# Import numpy as np 
import numpy as np
# Above code renames the NumPy library to np, so whenever we need to access any method of this package, 
#we indicate the shortest name (np).

In [2]:
# Check installed version:
print(np.__version__)

1.14.3


In [3]:
#========================================================================================================
#     Create a NumPy Array
#========================================================================================================
# The most common way to create an array is to use a Python list and convert it to a numpy array through 
#the np.array() function.
# Python list
pythonList = [3, 4, 6, 8, 1, 2]
# NumPy array
array = np.array(pythonList)
# In practice, these two steps can be combined without declaring a Python list. To do this, simply use 
#the np.array() function, which has as argument the list of values to insert into the array. 
newArray = np.array([3, 4, 6, 8, 1, 2])

In [4]:
# One dimensional array - The function argument corresponds to a single list of values.
oneDim = np.array([1, 3, 5])
oneDim

array([1, 3, 5])

In [5]:
# Two dimensional array - The function argument corresponds to a list of two lists of values. That is, 
#these two lists are contained within a larger list (a list of lists).
twoDim = np.array([(1, 3, 5), (2, 4, 6)])
twoDim

array([[1, 3, 5],
       [2, 4, 6]])

In [6]:
# Numpy array with a specific data type
# Note that a Numpy array must consist of elements of the same type. Optionally, one of the arguments of 
#the np.array() function can specify the data type through dtype parameter.
# INTEGER
intArray = np.array([1, 2, 3], dtype = 'int') 
#or
intArray2 = np.array([1, 2, 3], dtype = np.int64) 
# FLOAT
floatArray = np.array([1, 1.4, 2], dtype = 'float') 
# These are just a few examples, Numpy supports several types of data in addition to the two mentioned.

In [7]:
# Null array - Array of zeros.
# In addition to generating custom arrays with the values chosen by the programmer, Numpy also allows you 
#to generate arrays based on different methods.
# The np.zeros() method creates an array consisting of only zeros. For this, the number of rows and 
#columns is specified, in the case of a two dimensional array.
zerosArray = np.zeros((2, 3))
zerosArray

array([[0., 0., 0.],
       [0., 0., 0.]])

In [8]:
# Array of ones
# In the same way, Numpy allows to create one dimensional and two dimensional arrays consisting only of 
#number 1, using the np.ones()
onesArray = np.ones((3, 3), dtype = 'int') 
onesArray

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [9]:
# Identity matrix 3x3 - Array of 0 with 1 on diagonal 
# The eye() method allows to create an identity matrix, widely used in linear algebra operations.
eyeArray = np.eye(3, dtype = 'int')
eyeArray

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

In [10]:
# Sequence of numbers in one dimensional array
# The linspace() method takes three arguments: an initial index, a final index, and the number of 
#linearly spaced numbers in the specified range.
# Array of 5 evenly divided values from 0 to 100.
sequenceArray = np.linspace(0, 100, 5) 
sequenceArray

array([  0.,  25.,  50.,  75., 100.])

In [11]:
# Array of evenly-spaced values
# The linspace() method takes three arguments: an initial index, a final index, and the interval between 
#each number. 
# Array of values from 0 to less than 10 with step 2.
arangeArray = np.arange(0, 10, 2) 
arangeArray

array([0, 2, 4, 6, 8])

In [12]:
# Constant array
# 2x2 array with all values 5
fullArray = np.full((2, 2), 5) 
fullArray

array([[5, 5],
       [5, 5]])

In [13]:
# Array with random values
# 3x3 array of random floats between 0-1
randomArray = np.random.rand(3, 3) 
randomArray

array([[0.19051233, 0.03533116, 0.47172349],
       [0.16229776, 0.57875799, 0.66099124],
       [0.38557135, 0.26095156, 0.26963973]])

In [14]:
# 3x3 array of random floats between 0-100
randomArray2 = np.random.rand(3,3) * 100 
randomArray2

array([[13.80123633, 95.61213827, 23.90626514],
       [15.47027928, 65.49241653, 14.62187917],
       [22.95599527, 29.74247541, 36.59873643]])

In [15]:
# 3x3 array with random ints between 0-9
randomArray3 = np.random.randint(10, size = (3, 3))
randomArray3

array([[4, 1, 7],
       [4, 1, 3],
       [1, 6, 8]])

In [None]:
#========================================================================================================
#     Attributes of a NumPy Array
#========================================================================================================

In [16]:
# Each array has a set of characteristics that can be exploited through Numpy attributes. To analyze 
#these attributes, an example of a Numpy array created by np.random.randint() will be considered.
exampleArray =  np.random.randint(0, 50, 10)
exampleArray

array([ 7, 29, 28, 35,  8, 30, 37, 16, 34,  8])

In [None]:
# To access the attributes, a dot is placed after the name of the array, followed by the name of the 
#attribute to be analyzed.

In [17]:
# Number of array elements
exampleArray.size 
# exampleArray has 5 total elements.

10

In [18]:
# Array dimensions (rows, columns)
exampleArray.shape 
# exampleArray has 10 elements along the first axis. 

(10,)

In [19]:
# Number of array dimensions
exampleArray.ndim
# This means is that exampleArray is a one dimensional array.

1

In [20]:
# Lenght of array
len(exampleArray)

10

In [21]:
# Data type of array elements
exampleArray.dtype 

dtype('int32')

In [22]:
# Convert an array to a different type
exampleArray.astype('float') 

array([ 7., 29., 28., 35.,  8., 30., 37., 16., 34.,  8.])

In [23]:
# Convert an array to a Python list
exampleArray.tolist() 

[7, 29, 28, 35, 8, 30, 37, 16, 34, 8]

In [None]:
#========================================================================================================
#     Manipulate a NumPy Array
#========================================================================================================

In [24]:
# In this section, we intend to test some methods of manipulating arrays. For this purpose a new two 
#dimensional array example will be considered.
exampleArray2 = np.random.randint(50, size = (3, 6))
exampleArray2

array([[36, 10, 10, 21, 22, 19],
       [45, 21, 16, 22, 23, 43],
       [11, 24, 23, 21, 27, 30]])

In [25]:
# COPY
# Copy of the array to a new memory
copyArray = np.copy(exampleArray2)
copyArray

array([[36, 10, 10, 21, 22, 19],
       [45, 21, 16, 22, 23, 43],
       [11, 24, 23, 21, 27, 30]])

In [26]:
# Deep copy of the array 
copyArray2 = exampleArray2.copy()
copyArray2

array([[36, 10, 10, 21, 22, 19],
       [45, 21, 16, 22, 23, 43],
       [11, 24, 23, 21, 27, 30]])

In [27]:
# View of the array with the same data
viewArray = exampleArray2.view() 
viewArray

array([[36, 10, 10, 21, 22, 19],
       [45, 21, 16, 22, 23, 43],
       [11, 24, 23, 21, 27, 30]])

In [28]:
# SORT
# Sort the elements of an array's axis
exampleArray2.sort(axis = 0)
exampleArray2

array([[11, 10, 10, 21, 22, 19],
       [36, 21, 16, 21, 23, 30],
       [45, 24, 23, 22, 27, 43]])

In [29]:
# Sort an array
exampleArray2.sort()
exampleArray2

array([[10, 10, 11, 19, 21, 22],
       [16, 21, 21, 23, 30, 36],
       [22, 23, 24, 27, 43, 45]])

In [30]:
# TRANSPOSE
# Permute array dimensions (rows become columns and vice versa)
exampleArray2.T

array([[10, 16, 22],
       [10, 21, 23],
       [11, 21, 24],
       [19, 23, 27],
       [21, 30, 43],
       [22, 36, 45]])

In [31]:
# Permute array dimensions (rows become columns and vice versa)
transposeArray = np.transpose(exampleArray2)
transposeArray

array([[10, 16, 22],
       [10, 21, 23],
       [11, 21, 24],
       [19, 23, 27],
       [21, 30, 43],
       [22, 36, 45]])

In [32]:
# RESHAPE (Change array shape)
# The reshape() method consists in changing the organization of the elements, thus altering the shape of 
#the array, but maintaining the same number of dimensions. For example, reshape() allows the conversion 
#of one dimensional array into a two dimensional array. 
# Reshape the array to 6 rows, 3 columns without changing data
exampleArray2.reshape(6,3)

array([[10, 10, 11],
       [19, 21, 22],
       [16, 21, 21],
       [23, 30, 36],
       [22, 23, 24],
       [27, 43, 45]])

In [33]:
# FLATTEN (Change array shape)
# The flatten() method converts a multidimensional array into one dimensional array.
flattenArray = exampleArray2.flatten() 
flattenArray

array([10, 10, 11, 19, 21, 22, 16, 21, 21, 23, 30, 36, 22, 23, 24, 27, 43,
       45])

In [34]:
# The ravel() method also converts a multidimensional array into one dimensional array.
ravelArray = exampleArray2.ravel() 
ravelArray 

array([10, 10, 11, 19, 21, 22, 16, 21, 21, 23, 30, 36, 22, 23, 24, 27, 43,
       45])

In [136]:
# Difference between flatten() and ravel()
# Both are methods responsible for implementing flattening of arrays. The difference between flatten() 
#and ravel() is that the new array created through ravel() consists of a reference to the parent array. 
#This way, any changes to the new array will affect the parent too, without creating a copy. As shown 
#in the following example:

In [35]:
#New array created with flatten()
# Change the new array created with flatten()
flattenArray[0] = 5
flattenArray

array([ 5, 10, 11, 19, 21, 22, 16, 21, 21, 23, 30, 36, 22, 23, 24, 27, 43,
       45])

In [36]:
# Parent array after changing the new array (flattenArray)
exampleArray2

array([[10, 10, 11, 19, 21, 22],
       [16, 21, 21, 23, 30, 36],
       [22, 23, 24, 27, 43, 45]])

In [37]:
# Change the new array created with ravel()
ravelArray[0] = 5
ravelArray 

array([ 5, 10, 11, 19, 21, 22, 16, 21, 21, 23, 30, 36, 22, 23, 24, 27, 43,
       45])

In [38]:
# Parent array after changing the new array (ravelArray)
exampleArray2

array([[ 5, 10, 11, 19, 21, 22],
       [16, 21, 21, 23, 30, 36],
       [22, 23, 24, 27, 43, 45]])

In [39]:
# RESIZE
# Change array shape to 6x3 
exampleArray2.resize((6,3)) 
exampleArray2

array([[ 5, 10, 11],
       [19, 21, 22],
       [16, 21, 21],
       [23, 30, 36],
       [22, 23, 24],
       [27, 43, 45]])

In [40]:
# APPEND
# Append values to end of array
# Syntax of NumPy append:
    # arr = original array to append the new values
    # values = new values to append to the original array  
    # axis = axis along which append the new values (optional)
np.append(arr = exampleArray2, values =  [1, 4, 8]) 

array([ 5, 10, 11, 19, 21, 22, 16, 21, 21, 23, 30, 36, 22, 23, 24, 27, 43,
       45,  1,  4,  8])

In [41]:
# INSERT
# Insert values into array before index 0
np.insert(arr = exampleArray2, obj = 0, values =  [1, 4, 8]) 

array([ 1,  4,  8,  5, 10, 11, 19, 21, 22, 16, 21, 21, 23, 30, 36, 22, 23,
       24, 27, 43, 45])

In [42]:
# DELETE
# Delete row on index 3 of array
np.delete(arr = exampleArray2, obj = 0, axis = 0)

array([[19, 21, 22],
       [16, 21, 21],
       [23, 30, 36],
       [22, 23, 24],
       [27, 43, 45]])

In [43]:
# CONCATENATE
# This method allows to join 2 arrays
# In this example 2 new arrays with the same dimensions will be created.
exampleArray3 = np.random.randint(50, size = (3, 4))
exampleArray3

array([[ 5,  2, 10,  0],
       [18, 39, 12, 43],
       [ 1, 27, 31,  7]])

In [44]:
exampleArray4 = np.random.randint(50, size = (3, 4))
exampleArray4

array([[21,  8, 12, 24],
       [ 2, 31,  5, 28],
       [31, 24,  6, 39]])

In [45]:
# Adds exampleArray4 as rows to the end of exampleArray3
np.concatenate((exampleArray3, exampleArray4), axis = 0)

array([[ 5,  2, 10,  0],
       [18, 39, 12, 43],
       [ 1, 27, 31,  7],
       [21,  8, 12, 24],
       [ 2, 31,  5, 28],
       [31, 24,  6, 39]])

In [46]:
# Adds exampleArray4 as columns to end of exampleArray3
np.concatenate((exampleArray3, exampleArray4), axis = 1)

array([[ 5,  2, 10,  0, 21,  8, 12, 24],
       [18, 39, 12, 43,  2, 31,  5, 28],
       [ 1, 27, 31,  7, 31, 24,  6, 39]])

In [47]:
# SPLIT
# Splits exampleArray3 into 3 sub-arrays
np.split(exampleArray3, 3)

[array([[ 5,  2, 10,  0]]),
 array([[18, 39, 12, 43]]),
 array([[ 1, 27, 31,  7]])]

In [48]:
# Splits exampleArray3 horizontally on the second index
np.hsplit(exampleArray3, 2)

[array([[ 5,  2],
        [18, 39],
        [ 1, 27]]), array([[10,  0],
        [12, 43],
        [31,  7]])]

In [49]:
# Splits exampleArray3 vertically on the third index
np.vsplit(exampleArray3, 3)

[array([[ 5,  2, 10,  0]]),
 array([[18, 39, 12, 43]]),
 array([[ 1, 27, 31,  7]])]

In [None]:
#========================================================================================================
#     Index/slice/subset a NumPy Array
#========================================================================================================

In [50]:
# To access an element of an array, its index number is used. Like the Python lists, NumPy arrays are 
#indexed with zero. For example, the third element of the array has an index equal to two.
# Returns the element at index 2
exampleArray[2]
# In this example, the third element (second index) corresponds to the digit 28.

28

In [55]:
# Like 1-D arrays, the two-dimensional NumPy arrays also follow the zero-based index, that is, to access 
#an element of the array it  is necessary to consider the first row and the first column with an index 
#equal to zero.
# Returns the 2D array element on index [1][2]
exampleArray3[1, 2]
# The digit 12 will be returned as the element present in the first row and second column of the array.

12

In [56]:
# To replace an element in the array, consider its index and assign the new value.
# Assigns array element on index 0 the value 10
exampleArray[0] = 10
exampleArray

array([10, 29, 28, 35,  8, 30, 37, 16, 34,  8])

In [57]:
# Assigns 2D array element on index [2][3] the value 0
exampleArray3[2, 3] = 0 
exampleArray3

array([[ 5,  2, 10,  0],
       [18, 39, 12, 43],
       [ 1, 27, 31,  0]])

In [58]:
# Returns the elements at indices 0,1,2,3 (On a 2D array: returns rows 0,1,2)
exampleArray[0:4]

array([10, 29, 28, 35])

In [60]:
# On a 2D array, returns the elements on rows 0,1,2 at column 1
exampleArray3[0:3, 1] 

array([ 2, 39, 27])

In [61]:
# Returns the elements at indices 0,1 (On a 2D array: returns rows 0,1)
exampleArray[:2] 

array([10, 29])

In [None]:
# Returns the elements at index 1 on all rows
exampleArray[:, 1]

In [66]:
# Returns an array with boolean values
exampleArray < 15

array([ True, False, False, False,  True, False, False, False, False,
        True])

In [74]:
# Returns an array with boolean values
(exampleArray < 20) & (exampleArray > 15)

array([False, False, False, False, False, False, False,  True, False,
       False])

In [75]:
# Inverts a boolean array
~exampleArray  

array([-11, -30, -29, -36,  -9, -31, -38, -17, -35,  -9], dtype=int32)

In [76]:
# Returns array elements smaller than 20
exampleArray[exampleArray < 20]

array([10,  8, 16,  8])

In [None]:
#========================================================================================================
#     Array Mathematics
#========================================================================================================

In [78]:
# SCALAR MATH
# Add 2 to each array element
np.add(exampleArray, 2)

array([12, 31, 30, 37, 10, 32, 39, 18, 36, 10])

In [80]:
# Subtract 1 from each array element
np.subtract(exampleArray, 1) 

array([ 9, 28, 27, 34,  7, 29, 36, 15, 33,  7])

In [81]:
# Multiply each array element by 5
np.multiply(exampleArray, 5)

array([ 50, 145, 140, 175,  40, 150, 185,  80, 170,  40])

In [82]:
# Divide each array element by 2 (returns np.nan for division by zero)
np.divide(exampleArray, 2) 

array([ 5. , 14.5, 14. , 17.5,  4. , 15. , 18.5,  8. , 17. ,  4. ])

In [84]:
# Raise each array element to the second power
np.power(exampleArray, 2) 

array([ 100,  841,  784, 1225,   64,  900, 1369,  256, 1156,   64],
      dtype=int32)

In [86]:
# VECTOR MATH
array1 = np.random.randint(10, size = (3, 3))
array1

array([[2, 4, 6],
       [4, 5, 1],
       [5, 8, 5]])

In [87]:
array2 = np.random.randint(10, size = (3, 3))
array2

array([[7, 6, 7],
       [9, 9, 9],
       [4, 9, 1]])

In [89]:
# Add array2 to array1
np.add(array1, array2) 

array([[ 9, 10, 13],
       [13, 14, 10],
       [ 9, 17,  6]])

In [None]:
# Subtract array2 from array1
np.subtract(array1, array2) 

In [None]:
# Multiply array1 by array2
np.multiply(array1, array2) 

In [None]:
# Divide array1 by array2
np.divide(array1, array2) 

In [91]:
# Raise array1 raised to the power of array2
np.power(array1, array2)

array([[      128,      4096,    279936],
       [   262144,   1953125,         1],
       [      625, 134217728,         5]], dtype=int32)

In [93]:
# Square root of each element in the array
np.sqrt(array1)

array([[1.41421356, 2.        , 2.44948974],
       [2.        , 2.23606798, 1.        ],
       [2.23606798, 2.82842712, 2.23606798]])

In [98]:
# Exponent of each element in the matrix
np.exp(array1)

array([[7.38905610e+00, 5.45981500e+01, 4.03428793e+02],
       [5.45981500e+01, 1.48413159e+02, 2.71828183e+00],
       [1.48413159e+02, 2.98095799e+03, 1.48413159e+02]])

In [94]:
# Sine of each element in the array
np.sin(array1) 

array([[ 0.90929743, -0.7568025 , -0.2794155 ],
       [-0.7568025 , -0.95892427,  0.84147098],
       [-0.95892427,  0.98935825, -0.95892427]])

In [99]:
# Cosine of each element in the matrix
np.cos(array1)

array([[-0.41614684, -0.65364362,  0.96017029],
       [-0.65364362,  0.28366219,  0.54030231],
       [ 0.28366219, -0.14550003,  0.28366219]])

In [95]:
# Natural log of each element in the array
np.log(array1) 

array([[0.69314718, 1.38629436, 1.79175947],
       [1.38629436, 1.60943791, 0.        ],
       [1.60943791, 2.07944154, 1.60943791]])

In [96]:
# Absolute value of each element in the array
np.abs(array1) 

array([[2, 4, 6],
       [4, 5, 1],
       [5, 8, 5]])

In [97]:
# Rounds up to the nearest int
np.round(array1) 

array([[2, 4, 6],
       [4, 5, 1],
       [5, 8, 5]])

In [101]:
# ARRAYS COMPARISON
# Compare array1 with array2
array1 == array2

array([[False, False, False],
       [False, False, False],
       [False, False, False]])

In [102]:
# Compare array1 with array2
array1 < array2

array([[ True,  True,  True],
       [ True,  True,  True],
       [False,  True, False]])

In [None]:
# Arrays have the same elements and shape
np.array_equal(array1, array2)

In [103]:
# STATISTICS
# Returns mean along specific axis
np.mean(array1, axis = 0)

array([3.66666667, 5.66666667, 4.        ])

In [104]:
# Returns sum of array
array1.sum() 

40

In [105]:
# Returns minimum value of array
array1.min() 

1

In [None]:
# Returns maximum value of specific axis
array1.max(axis = 0) 

In [106]:
# Returns the variance of array
np.var(array1)

3.802469135802469

In [108]:
# Returns the standard deviation of specific axis
np.std(array1, axis = 1)

array([1.63299316, 1.69967317, 1.41421356])