# Lesson: numpy Package

numpy is a very important package, described as "he fundamental package for scientific computing with Python". It provides the np.ndarray data type (array for short). Unlike lists, all the element in arrays must be the same data type (usually  ints or float). We can do element-wise operations on these arrays to make our code simpler & shorter.
Operatons on arrays work a lot faster than operations on lists.


In [1]:
import numpy as np

## Create numpy arrays

In [9]:
# Create an numpy array from a list
np_squares = np.array([1, 4, 9, 16, 25])
np_squares, type(np_squares) 

(array([ 1,  4,  9, 16, 25]), numpy.ndarray)

In [4]:
#  A numpy array can also be 2 or more dimensions
list_of_lists = [[1,2,3], [4,5,6]]
a2 = np.array(list_of_lists) # from a list of lists
a2, a2.shape, a2.ndim 

(array([[1, 2, 3],
        [4, 5, 6]]),
 (2, 3),
 2)

In [2]:
# Create numpy arrays using numpy functions
a = np.zeros(3) # initialise with all elements with value 0
b = np.ones(3) # # initialise with all elements with value 1
a, b, type(a), type(a[0])

(array([0., 0., 0.]), array([1., 1., 1.]), numpy.ndarray, numpy.float64)

In [3]:
# Create an numpy array  with a set of random values
c = np.random.randn(5) # standard normal distribution
d = np.random.randint(low = 100, high = 200, size = 6) # discrete, uniform distribution
c, d

(array([-0.47118966, -0.72924715, -2.09790491,  0.06193591,  0.85041466]),
 array([143, 116, 150, 185, 107, 167]))

In [7]:
# Create a 2D array with random values
a2 = np.random.randn(4, 2) 
a2, a2.shape

(array([[ 0.72623372,  0.18230476],
        [-2.20169537,  0.43560881],
        [ 0.21503843,  0.41503987],
        [ 0.12581911, -0.69487287]]),
 (4, 2))

## Operate on numpy arrays
We can take advantage of element-wise operations. We don't need to loop through the elements of the array.

In [11]:
#  Add (or multiply, subtract, divide...) a constant value to each element in the array
np_squares = np.array([1, 4, 9, 16, 25])
np_squares, np_squares + 10

(array([ 1,  4,  9, 16, 25]), array([11, 14, 19, 26, 35]))

In [12]:
#  Add (or multiply...)  two arrays
a = np.array([3,4,5])
b = np.array([30,40,50])
a + b

array([33, 44, 55])

## Filter arrays with boolean expressions

In [14]:
np_squares = np.array([1, 4, 9, 16, 25])
np_squares < 15 # return an array of booleans the same length as the array

array([ True,  True,  True, False, False])

In [15]:
np_squares[np_squares < 15] # returns a smaller array with only those elements that meet the criteria

array([1, 4, 9])

## Slicing and Indexing Arrays

In [17]:
np_squares = np.array([1, 4, 9, 16, 25])
indexed = np_squares[2] # returns the element at index 2
sliced = np_squares[2:4] # returns the elements from index 2 to 4 (not including 4)
indexed, sliced

(9, array([ 9, 16]))

## Math (aggregaton) operations


In [20]:
np_squares = np.array([1, 4, 9, 16, 25])
np_squares.min(), np_squares.max(), np_squares.mean(), np_squares.std()

(1, 25, 11.0, 8.648699324175862)

## Reshape Arrays

In [24]:
evens = np.arange(2, 26, 2) # start at 2, stop before 26, step by 2
evens, evens.shape

(array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24]), (12,))

In [25]:
evens.reshape(3, 4) # reshape to 3 rows, 4 columns

array([[ 2,  4,  6,  8],
       [10, 12, 14, 16],
       [18, 20, 22, 24]])

# Stack Arrays

In [32]:
# Create a couple of 2D arrays
arr1 = np.array([[1,2,3], [4,5,6]])
arr2 = np.array([[7,8,9], [10,11,12]])
arr1, arr2

(array([[1, 2, 3],
        [4, 5, 6]]),
 array([[ 7,  8,  9],
        [10, 11, 12]]))

In [30]:
# Stack vertically
np.vstack((arr1, arr2)) # stack vertically

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [31]:
# Stack horizontally
np.hstack((arr1, arr2)) # stack horizontally

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])