# intro to numpy

### why numpy?
#### * powerful library of tools
#### * backbone of many important data science libraries
#### * super fast -- numpy's backbone is C

### arrays in numpy : vectors & matrices
#### * vectors: 1-d arrays
#### * matrices: 2-d arrays (but still can have only 1 row or 1 column)

In [55]:
# let's import numpy and shorten the name

import numpy as np

## arrays, vectors, matrices

### convert python lists (& lists of lists) to numpy vectors and matrices

In [56]:
# a regular python list

new_list = [1,2,3,4,5,]

new_list

[1, 2, 3, 4, 5]

In [57]:
# turn it into a numpy array
# remember: numpy is 'np' now

np.array(new_list)

array([1, 2, 3, 4, 5])

In [58]:
# can assign it to a variable

new_list = [6,7,8,9,10]

arr = np.array(new_list)

arr

array([ 6,  7,  8,  9, 10])

In [59]:
# get a matrix from a list of lists

matrix_1 = [[2,4,6,8],[5,10,15,20],[3,6,9,12]]

matrix_1

[[2, 4, 6, 8], [5, 10, 15, 20], [3, 6, 9, 12]]

In [60]:
# now cast as a numpy matrix, ie 2d array
# note that the output is matrix-shaped (not list of lists as above)

np_matrix_1 = np.array(matrix_1)

np_matrix_1

array([[ 2,  4,  6,  8],
       [ 5, 10, 15, 20],
       [ 3,  6,  9, 12]])

### * note: this isn't the easiest (or most common) way to create an array in numpy--it just helps if you've got a list (or list of lists) to convert :)

## creating arrays from scratch: ranges, zeros/ones, & identity matrices

In [61]:
# one super common way to create a numpy array is using np.arange()
# similar to python's range() function 
# arguments: start, stop--not inclusive!, step size
# default start is zero, one argument only is inferred to be the non-inclusive stop point

array_1 = np.arange(0,10,2)

array_2 = np.arange(0,11,2)

array_3 = np.arange(11)

print(array_1)
print(array_2)
print(array_3)

[0 2 4 6 8]
[ 0  2  4  6  8 10]
[ 0  1  2  3  4  5  6  7  8  9 10]


In [62]:
# np.zeros()
# takes single integer for 1d array
# takes TUPLE for 2d array--add parentheses

zero_arr_1d = np.zeros(5)

zero_arr_1d

array([0., 0., 0., 0., 0.])

In [63]:
# making a 2d array of zeros
# takes a TUPLE--so extra parentheses!
# without a TUPLE argument, you'll get an error like this one:

zero_arr = np.zeros()

TypeError: Required argument 'shape' (pos 1) not found

In [64]:
# now the right way: pass in a tuple using extra parentheses
# this will create a 4 x 5 array of zeros
# first number: rows
# second number: columns

zero_arr_2d = np.zeros((4,5))

zero_arr

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [65]:
# now with ones, same deal
# single integer for 1d array
# tuple argument for 2d array
# always in this order: rows, columns

ones_arr_1d = np.ones(7)
ones_arr_2d = np.ones((3,7))

print(ones_arr_1d)

# separating to make this easier to read using newline ('\n')
print('\n')

print(ones_arr_2d)

[1. 1. 1. 1. 1. 1. 1.]


[[1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1.]]


In [66]:
# now: np.linspace()
# specify a start and a stop, get out evenly spaced numbers in that range, in 1d array
# NOT neccessarily integers
# args go in this order: start, stop, number-of-points

np.linspace(0,643,14)

array([  0.        ,  49.46153846,  98.92307692, 148.38461538,
       197.84615385, 247.30769231, 296.76923077, 346.23076923,
       395.69230769, 445.15384615, 494.61538462, 544.07692308,
       593.53846154, 643.        ])

In [67]:
# identity matrix: np.eye()
# number of rows is same as number of columns
# diagonal row of 1s

id_matrix = np.eye(7)

id_matrix

array([[1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 1.]])

## creating arrays from scratch: randoms

#### * there are LOTS of random methods in numpy
#### * start with np.random. then hit 'tab' to see a list of all of them
#### * 1 argument == 1d array, 2 arguments == 2d array
#### * randoms DON'T take tuples--just 2 integers
#### * here are some frequently used ones



In [70]:
# np.random.rand 
# returns 1d array with 1 arg, 2d array with 2 args 
# uniform distribution from ZERO to ONE, whatever number of points you specify
# DOESN'T take a tuple for 2d array--just 2 integers

random_arr_1d = np.random.rand(7)
random_arr_2d = np.random.rand(5,5)

print(random_arr_1d)
print('\n')
print(random_arr_2d)

[0.94456301 0.30510113 0.17811534 0.17197034 0.80603649 0.80265709
 0.12103068]


[[0.70895213 0.4903918  0.31181774 0.33148979 0.74100501]
 [0.47844446 0.94199231 0.24818785 0.57819438 0.52757378]
 [0.29138369 0.29895515 0.26401993 0.10577242 0.81001331]
 [0.08247655 0.1210241  0.45727844 0.74658628 0.62174316]
 [0.59918927 0.88710625 0.30208485 0.4453576  0.64738168]]


In [74]:
# for numbers from normal/gaussian distribution (instead of normal, as above):
# np.random.randn

gauss_random_1d = np.random.randn(5)
gauss_random_2d = np.random.randn(5,5)

print(gauss_random_1d)
print('\n')
print(gauss_random_2d)

[-0.14470199  0.4670417  -1.34091762  2.30418039 -0.12229155]


[[ 0.64118933 -0.19713004 -0.7985031   0.63306899 -0.63221859]
 [ 1.60671126  0.69738545  0.89147712  0.77636915 -0.27021805]
 [-0.56408754 -1.20593048 -0.31523236  0.10806058 -0.31969535]
 [-0.56199858  0.62341029  0.11301159  0.92276416 -0.21749987]
 [ 0.24469593 -1.81712626 -0.13364505 -0.59619936  0.36517846]]


In [78]:
# return random INTEGERS from low to high number
# np.random.randint()
# args: low, high--not inclusive!, number-of-points
# default number of points returned is 1

single_random_int = np.random.randint(0,50)

# 50 has no chance of being selected because the high end is exclusive
lots_of_random_ints = np.random.randint(0,50,14)

print(single_random_int)
print(lots_of_random_ints)

7
[ 3  7 12 15  9  0 20  6 46  9  6 44 42 20]


## changing the shape of arrays

#### * to go from 1d array to 2d array: use .reshape() method
#### * arguments: number of rows, number of columns
#### * IMPORTANT: total size of the 2d array has to be the same as the original 1d array

In [85]:
# create an array with 25 elements
# NOTE: this will include the 0 but not the 25
# still 25 elements, 0-indexed and ending at 24

arr = np.arange(0,25)

arr_2d = arr.reshape(5,5)

print(arr)
print('\n')
print(arr_2d)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]


[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [88]:
# more on using .reshape():
# total number of points has to match! or else Error Town
# there are 25 points in arr, so it can't fill a 5 x 7 (ie 35-point) matrix

error_arr_2d = arr.reshape(5,7)

ValueError: cannot reshape array of size 25 into shape (5,7)

In [92]:
# how to check if it'll work:
# multiply number of rows times number of columns, compare to the original 1d array

arr = np.arange(0,20)

arr_2d = arr.reshape(2,10)

another_2d_version = arr.reshape(4,5)

print(arr)
print('\n')
print(arr_2d)
print('\n')
print(another_2d_version)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]


[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]
