In [1]:
# NUMPY IS A MULTIDIMENSIONAL ARRAY LIBRARY
# SO YOU CAN STORE LIKE
# 1D ARRAY, 2D ARRAYS, 3D ARRAYS

# NUMPY VS LISTS
# LISTS ARE SO SLOW. NUMPY IS SO FAST
# BECAUSE NUMPY USES FIXED TYPES
# FOR AN ELEMENT IN A LIST, LOTS OF INFO IS STORED ABOUT THE ELEMENT: SIZE, REFERENCE COUNT, OBJECT TYPE, OBJECT VALUE
# IT IS FASTER TO READ LESS BYTES OF MEMORY MAKING NUMPY FASTER
# WHEN ITERATING THROUGH EACH ITEM, BECAUSE NUMPY IS FIXED TYPE SO NO TYPE CHECKING IS NEEDED
# ALSO, NUMPY USES CONTIGUOUS MEMORY
# BENEFITS: SIMD (SINGLE INSTRUCTION MULTIPLE DATA) VECTOR PROCESSING, EFFECTIVE CACHE UTILIZATION

# LISTS: INSERTION, DELETION, APPENDING, CONCATENATION
# NUMPY: INSERTION, DELETION, APPENDING, CONCATENATION, ETC.

# LISTS: a = [1,3,5], b = [1,2,3], a*b = ERROR
# NUMPY: a = np.array([1,3,5]), b = np.array([1,2,3]), a*b = np.array([1,6,15])

# applications of numpy?
# kind of like matlab replacement, but scikit scipi probably has more
# useful for plotting (matplotlib)
# backend (pandas, connect 4, digital photography)
# machine learning

In [3]:
import numpy as np

<h3>the basics</h3>

In [4]:
a = np.array([1,2,3], dtype = 'int16')     # 1d array. np.array is a function so use ()
# CHANGE DEFAULT TYPE FROM INT32 -> INT16

In [6]:
b = np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])
b

array([[9., 8., 7.],
       [6., 5., 4.]])

In [9]:
# GET DIMENSION OF NUMPY ARRAY
b.ndim

2

In [10]:
# GET SHAPE
a.shape

(3,)

In [12]:
# GET TYPE
a.dtype

# GET SIZE OF ELEMENT IN BYTES
a.itemsize

# GET TOTAL ELEMENTS IN ARRAY (ARRAY SIZE)
a.size

# GET TOTAL SIZE OF ARRAY IN BYTES
a.size * a.itemsize
a.nbytes

24

<h3>accessing/changing specific elements, rows, columns, etc</h3>

In [13]:
a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)
print(a.shape)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]
(2, 7)


In [15]:
# getting a sepcific element: [r,c]
a[1,5]
a[1,-2]

13

In [16]:
# get a specific row
a[0, :]

array([1, 2, 3, 4, 5, 6, 7])

In [17]:
# get a specific column
a[:, 2]

array([ 3, 10])

In [19]:
# getting a little more fancy [startindex:endindex:stepsize]
a[0,1:6:2]
a[0,1:-1:2]

array([2, 4, 6])

In [21]:
# changing an element in an array
a[1,5] = 20
print(a)

a[1,5] = 13

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 20 14]]


In [25]:
# changing multiple elements in an array
a[:,2] = 5
print(a)

a[:,2] = [3,10]
print(a)

[[ 1  2  5  4  5  6  7]
 [ 8  9  5 11 12 13 14]]
[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [26]:
# 3d example
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [28]:
# get specific element (work outside in)
b[0,1,:]

array([3, 4])

In [30]:
# replace
b[0,:,:] = [[9,10],[11,12]]
print(b)

[[[ 9 10]
  [11 12]]

 [[ 5  6]
  [ 7  8]]]


<h3>initializing different types of arrays</h3>

In [31]:
# all 0s matrix
np.zeros((2,3,3))

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [33]:
# all 1s matrix
np.ones(5, dtype = 'int32')

array([1, 1, 1, 1, 1], dtype=int32)

In [35]:
# any other number
np.full((2,2),101, dtype = 'int16')

array([[101, 101],
       [101, 101]], dtype=int16)

In [36]:
# any other number (full-like) <- follows shape of an array youve made before
np.full_like(a,4)

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [38]:
# initialize matrix of random decimal numbers
np.random.rand(4,2)

array([[0.11606721, 0.74057626],
       [0.89593628, 0.06665184],
       [0.04038864, 0.80701735],
       [0.03907252, 0.7290141 ]])

In [39]:
# random matrix that follows a specific shape:
np.random.random_sample(a.shape)

array([[0.75784468, 0.84295253, 0.91567646, 0.75867711, 0.50460608,
        0.04163781, 0.41491521],
       [0.38605602, 0.70559996, 0.30819321, 0.44199797, 0.18436325,
        0.93303826, 0.88708506]])

In [44]:
# random integer values only
np.random.randint(8,size = (3,3))         # start (default 0), stop, size

array([[4, 5, 4],
       [2, 0, 3],
       [7, 1, 5]])

In [45]:
# identity matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [47]:
# repeating arrays
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3,axis = 0)         # repeat on the 0th axis
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


In [55]:
# 1 1 1 1 1
# 1 0 0 0 1
# 1 0 9 0 1
# 1 0 0 0 1
# 1 1 1 1 1
c = np.ones((5,5), dtype = 'int32')
c[1:-1,1:-1] = 0
c[2,2] = 9
print(c)

[[1 1 1 1 1]
 [1 0 0 0 1]
 [1 0 9 0 1]
 [1 0 0 0 1]
 [1 1 1 1 1]]


<h3>be careful when copying arrays!!!</h3>

In [57]:
a = np.array([1,2,3])
b = a
b

array([1, 2, 3])

In [58]:
b[0] = 100
print(b)
print(a)              # value of b changes with a. this is because b = a makes them both point to the same thing

[100   2   3]
[100   2   3]


In [61]:
a[0] = 1
b = a.copy()
print(b)
b[0] = 100
print(a)
print(b)

[1 2 3]
[1 2 3]
[100   2   3]


<h3>mathematics</h3>

In [62]:
a = np.array([1,2,3,4])
print(a)

[1 2 3 4]


In [63]:
a + 2

array([3, 4, 5, 6])

In [64]:
a - 2

array([-1,  0,  1,  2])

In [65]:
a * 2

array([2, 4, 6, 8])

In [66]:
a / 2

array([0.5, 1. , 1.5, 2. ])

In [68]:
b = np.array([1,0,1,0])
a + b

array([2, 2, 4, 4])

In [69]:
a ** 2

array([ 1,  4,  9, 16])

In [70]:
# take the sin of all values
np.sin(a)

# np.cos(a)
# np.tan(a)
# etc.

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

<h3>linear algebra</h3>

In [71]:
# matrix multiplication
a = np.ones((2,3))
print(a)

b = np.full((3,2),2)
print(b)

np.matmul(a,b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


array([[6., 6.],
       [6., 6.]])

In [72]:
# find the determinant
c = np.identity(3)
np.linalg.det(c)

1.0

In [None]:
# reference docs at scipy

# determinant
# trace
# singular vector decomposition
# eigenvalues
# matrix norm
# inverse
# etc...

<h3>statistics</h3>

In [80]:
stats = np.array([[1,2,3],[4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [81]:
np.min(stats, axis=1)    # axis 0 is row going down and axis 1 is column going across kinda idk

array([1, 4])

In [76]:
np.max(stats)

6

<h3>reorganizing arrays</h3>

In [85]:
# reshaping arrays
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((2,2,2))
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [88]:
# vertically stacking vectors
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2,v2,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8]])

In [89]:
# horizontal stacking
h1 = np.ones((4,1))
h2 = np.zeros((4,2))

np.hstack([h1,h1,h2])

array([[1., 1., 0., 0.],
       [1., 1., 0., 0.],
       [1., 1., 0., 0.],
       [1., 1., 0., 0.]])

<h3>miscellanous</h3>

<h5>load data from file</h5>

In [96]:
filedata = np.genfromtxt('data.txt', delimiter = ',')
filedata = filedata.astype('int32')                   # must be reassigned because .astype is not destructive. int32 and float have different sizes and it doesnt really make sense to change each value. makes more sense to make a copy of it
filedata

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]], dtype=int32)

<h5>boolean masking and advanced indexing</h5>

In [97]:
# where in filedata the value is greater than 50?
filedata > 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [99]:
filedata[filedata > 50]                   # returns indexes greater than 50

array([196,  75, 766,  75,  55, 999,  78,  76,  88], dtype=int32)

In [100]:
# you can index with a list in numpy
a = np.array([1,2,3,4,5,6,7,8,9])
a[[1,2,8]]                # passing in a list and the list acts as the indexes that you want to select

array([2, 3, 9])

In [102]:
np.any(filedata > 50, axis = 0)          # going down each column (going through each row), is there any value greater than 50

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [103]:
np.all(filedata > 50, axis = 0)          # going down where each row has the instance of being greater than 50 in value

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [105]:
((filedata > 50) & (filedata < 100))          # all values greater than 50 but less than 100

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [None]:
#  1  2  3  4  5
#  6  7  8  9 10
# 11 12 13 14 15
# 16 17 18 19 20
# 21 22 23 24 25
# 26 27 28 29 30

# to access 11,12,16,17
# a[2:4, 0:2]

# to access 2,8,14,20
# a[[0,1,2,3],[1,2,3,4]]

# to access 4,5,24,25,29,30
# a[[0,4,5],[3,4]] // 3: // 3:5