<a href="https://colab.research.google.com/github/JuniorGunner/DataScience/blob/master/PythonDataScienceHandbook/02_Numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 2 - Numpy

In [0]:
import numpy as np
np.__version__

'1.17.4'

## Data Types

### Arrays from Lists

In [0]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [0]:
np.array([3.14, 4, 2, 3])

array([3.14, 4.  , 2.  , 3.  ])

In [0]:
np.array([1, 2, 3, 4], dtype = 'float32')

array([1., 2., 3., 4.], dtype=float32)

In [0]:
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### Arrays from Scratch

In [0]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [0]:
# Create a 3x5 floating-point array filled with 1s
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [0]:
# Create a 3x5 array filled with 3.14
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [0]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [0]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [0]:
# Create a 3x3 array of uniformly distributed
# random values between 0 and 1
np.random.random((3, 3))

array([[0.9642309 , 0.37815284, 0.78464992],
       [0.29197349, 0.35196166, 0.04757175],
       [0.90839604, 0.24171761, 0.138447  ]])

In [0]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3, 3))

array([[ 0.41001042,  0.62378716, -1.5544504 ],
       [ 0.16123185, -1.42275591, -0.03861121],
       [-1.19865995,  2.5490413 , -0.36202307]])

In [0]:
# Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3, 3))

array([[5, 9, 1],
       [5, 6, 9],
       [8, 7, 1]])

In [0]:
# Create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [0]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that
# memory location
np.empty(3)

array([1., 1., 1.])

## Basics of Numpy Arrays

### Numpy Array Attributes

In [0]:
np.random.seed(0) # seed for reproducibility
x1 = np.random.randint(10, size=6) # One-dimensional array
x2 = np.random.randint(10, size=(3, 4)) # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5)) # Three-dimensional array

In [0]:
# Number of dimensions, shape and size
print(x3.ndim, x3.shape, x3.size, sep='\n')

3
(3, 4, 5)
60


In [0]:
# Data type
x3.dtype

dtype('int64')

In [0]:
# Size in bytes of elements and array
print('Element size:', x3.itemsize, 'bytes')
print('Array size:', x3.nbytes, 'bytes')

Element size: 8 bytes
Array size: 480 bytes


### Array Indexing: Accessing Single Elements

In [0]:
x1

array([5, 0, 3, 3, 7, 9])

In [0]:
x1[0]

5

In [0]:
x1[4]

7

In [0]:
x1[-1]

9

In [0]:
x1[-2]

7

In [0]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [0]:
x2[0, 0]

3

In [0]:
x2[-1, -3]

6

In [0]:
x2[0, 0] = 12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [0]:
x1[0] = 3.14159
x1

array([3, 0, 3, 3, 7, 9])

### Array Slicing: Accessing Subarrays

In [0]:
# x[start:stop:step]
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [0]:
# First 5 elements
x[:5]

array([0, 1, 2, 3, 4])

In [0]:
# After index 5
x[5:]

array([5, 6, 7, 8, 9])

In [0]:
# Middle subarray
x[4:7]

array([4, 5, 6])

In [0]:
x[::2]

array([0, 2, 4, 6, 8])

In [0]:
x[1::2]

array([1, 3, 5, 7, 9])

In [0]:
# Reversed
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [0]:
x[5::-2]

array([5, 3, 1])

* Multidimensional subarrays

In [0]:
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [0]:
# 2 rows, 3 columns
x2[:2, :3]

array([[12,  5,  2],
       [ 7,  6,  8]])

In [0]:
x2[:3, ::2]

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [0]:
# Reversed
x2[::-1, ::-1]

array([[ 7,  7,  6,  1],
       [ 8,  8,  6,  7],
       [ 4,  2,  5, 12]])

In [0]:
# First Column
x2[:, 0]

array([12,  7,  1])

In [0]:
# First row
x2[0, :]

array([12,  5,  2,  4])

In [0]:
x2[0]

array([12,  5,  2,  4])

In [52]:
# Subarray 2x2 no-copy view
x2_sub = x2[:2, :2]
x2_sub

array([[12,  5],
       [ 7,  6]])

In [53]:
x2_sub[0, 0] = 99
x2_sub

array([[99,  5],
       [ 7,  6]])

In [54]:
x2

array([[99,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [55]:
# Copy
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy

array([[99,  5],
       [ 7,  6]])

In [56]:
x2_sub_copy[0, 0] = 42
x2_sub_copy

array([[42,  5],
       [ 7,  6]])

In [57]:
x2

array([[99,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

### Reshaping of Arrays

In [58]:
grid = np.arange(1, 10).reshape((3, 3))
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [60]:
x = np.array([1, 2, 3])
x

array([1, 2, 3])

In [61]:
# row vector via reshape
x.reshape((1, 3))
# row vector via newaxis
# x[np.newaxis, :]

array([[1, 2, 3]])

In [62]:
# column vector via reshape
x.reshape((3, 1))
# column vector via newaxis
# x[:, np.newaxis]

array([[1],
       [2],
       [3]])

### Array Concatenation and Splitting
* Concatenation

In [63]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [64]:
z = [99, 99, 99]
np.concatenate([x, y, z])

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [65]:
grid = np.array([[1, 2, 3], [4, 5, 6]])

# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [66]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

* Vertical Stack and Horizontal Stack (vstack, hstack)

In [67]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7], [6, 5, 4]])

# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [68]:
# horizontally stack the arrays
y = np.array([[99],
[99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

* Splitting

In [69]:
# split, hsplit, vsplit
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [70]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [71]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [72]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## Computation on NumPy Arrays: Universal Functions

### The Slowness of Loops