# NumPy Basics

## Why use numpy? Because a numpy array is faster than a standard list
## Numpy also include a large number of useful functions

## Numpy Introduction

In [4]:
import numpy as np

In [6]:
## Can use either [] or ()
## The types will stay the same

a = np.array([1,2,3,4,5,6,7,8])
b = np.array((1,2,3,4,5,6,7,8))

print(a)
print(b)

[1 2 3 4 5 6 7 8]
[1 2 3 4 5 6 7 8]


In [8]:
## Notice its type is array
type(a)

numpy.ndarray

In [10]:
## Notice its type is also array
type(b)

numpy.ndarray

In [12]:
## Use .dtype to check type inside the array

a.dtype

dtype('int32')

In [14]:
## We can also specify type inside the array's definition

## Defined as int
a = np.array([1,2,3,4,5,6,7,8], dtype = 'i')

## Defined as float
b = np.array((1,2,3,4,5,6,7,8),dtype = 'f')

print(a.dtype)
print(b.dtype)

int32
float32


## Numpy Dimensions

In [16]:
## How to create a 2 dimentional array

a = np.array([[1,2,3],[4,5,6]])
print(a.ndim)

2


In [18]:
## Create a 3 dimensional array

b = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])
print(b.ndim)

3


In [28]:
## You MUST have the same number of elements in an array for it to be multidimensional 

a = np.array([[1,2,3],[4,5,6,7]])

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

## Numpy: Shape, Size and Bytes

In [33]:
b = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])
b.shape

(2, 2, 3)

In [35]:
## Show the shapes (sizes) of the arrays

##               1                   2
##               1        2         1      2
##              1 2 3   1 2 3      1 2 3  1 2 3
b = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])

print(b.shape[0],b.shape[1],b.shape[2])

2 2 3


In [37]:
## Indexing specific numbers from arrays
## If this is the array we are calling the 2nd arrays 3
##               0                   1
##               0        1         0      1
##              0 1 2   0 1 2      0 1 2  0 1 2
b = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])

print(b[1][0][2])

3


In [41]:
## Number of elements there

b.size

12

In [45]:
## Number of bytes in memory

b.nbytes

48

## Numpy: Arange, Random, Reshape

In [2]:
import numpy as np

In [8]:
## Create an array from 0 to x number (a range) sequentially
## This works very similar to range()

A = np.arange(100)
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [11]:
## Can step and start at specific numbers too!

A = np.arange(2,100,2)
A

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34,
       36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68,
       70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98])

In [29]:
## np.random is a package inside of numpy
## So in this example we get get an array from 0-9 with the numbers being randomly spread around in the array
## If you keep running this cell the array will change each time because its getting a new random permutation

## We order the np.random.permutation() because random is a package in np. And permutation is apart of the random package.

A = np.random.permutation(np.arange(10))
A

array([4, 5, 8, 0, 1, 2, 7, 3, 6, 9])

In [82]:
## randint will pick a random integer between the range that is specified
A = np.random.randint(20,30)
print(A)
print(type(A))

27
<class 'int'>


In [94]:
## You can create multi dimensional arrays (matrises) using random
## 2 defines the dimensions of the matrix and 3 is the number of variables in those arrays
A = np.random.rand(2,3)
print(A)
print(A.ndim)

[[0.84766983 0.08493704 0.55179524]
 [0.90854003 0.58487861 0.26086194]]
2


In [96]:
## Can do larger dimensional arrays
## So this is a 4 dimensional array. 
## There are 2 arrays with 3 arrays within them. 
## Each of those 3 arrays has 4 arrays within them. 
## Each oof those 4 arrays has 2 variables within them

A = np.random.rand(2,3,4,2)
print(A)
print(A.ndim)

[[[[5.61604299e-01 1.03432106e-01]
   [8.71057930e-01 6.49890138e-01]
   [1.79722458e-01 7.64836151e-01]
   [4.96472362e-01 6.25335766e-01]]

  [[8.61049353e-01 8.37171138e-01]
   [8.89617705e-01 3.16079261e-01]
   [4.07576744e-01 4.45564394e-01]
   [5.11014844e-01 6.40675723e-01]]

  [[2.22602396e-01 4.05644656e-01]
   [3.52553346e-02 9.47619911e-01]
   [4.14831406e-01 9.39345291e-01]
   [9.37804420e-01 4.41836085e-01]]]


 [[[6.45285574e-01 6.95565595e-01]
   [6.65175930e-04 1.95215924e-01]
   [1.88792925e-01 6.59440862e-02]
   [1.23726120e-01 5.16039708e-01]]

  [[9.53804404e-01 5.91158870e-01]
   [6.36726329e-01 8.24923800e-01]
   [2.86004428e-01 9.48354166e-01]
   [5.05901381e-01 7.02942888e-01]]

  [[1.51488034e-01 7.83912260e-01]
   [2.72511907e-01 4.75193466e-01]
   [4.81535920e-01 4.59097617e-01]
   [8.24679127e-02 8.31105771e-02]]]]
4


In [104]:
## Reshape
## What reshape does is breaks up the array as defined by the user.
## In this example we have a range from 0-99
## We broke this up into 4 arrays with 25 elements in each

A = np.arange(100).reshape(4,25)
print(A)
print(A.shape)
print(A.ndim)

[[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
  24]
 [25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
  49]
 [50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
  74]
 [75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
  99]]
(4, 25)
2


In [106]:
## Reshape
## What reshape does is breaks up the array as defined by the user.
## In this example we have a range from 0-99
## We broke this up into 4 arrays with 5 arrays within them.
## Each of those 5 arrays then has 5 elements within them

A = np.arange(100).reshape(4,5,5)
print(A)
print(A.shape)
print(A.ndim)

[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]
  [10 11 12 13 14]
  [15 16 17 18 19]
  [20 21 22 23 24]]

 [[25 26 27 28 29]
  [30 31 32 33 34]
  [35 36 37 38 39]
  [40 41 42 43 44]
  [45 46 47 48 49]]

 [[50 51 52 53 54]
  [55 56 57 58 59]
  [60 61 62 63 64]
  [65 66 67 68 69]
  [70 71 72 73 74]]

 [[75 76 77 78 79]
  [80 81 82 83 84]
  [85 86 87 88 89]
  [90 91 92 93 94]
  [95 96 97 98 99]]]
(4, 5, 5)
3


## Numpy: Indexing and Slicing

In [113]:
## Slicing arrays is very similar to lists!
## A[start:end:step]

A = np.arange(100)
print(A)
b = A[3:10]
print(b)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
[3 4 5 6 7 8 9]


In [115]:
## Lets try changing the elements of b
## Notice how A and b both changed. This is because they are associated in the same place in memory.
## THIS IS NOT THE SAME AS LISTS!! If a list is sliced and saved to a variable then it creates its own list

b[0] = -1200
print(b)
print(A)

[-1200     4     5     6     7     8     9]
[    0     1     2 -1200     4     5     6     7     8     9    10    11
    12    13    14    15    16    17    18    19    20    21    22    23
    24    25    26    27    28    29    30    31    32    33    34    35
    36    37    38    39    40    41    42    43    44    45    46    47
    48    49    50    51    52    53    54    55    56    57    58    59
    60    61    62    63    64    65    66    67    68    69    70    71
    72    73    74    75    76    77    78    79    80    81    82    83
    84    85    86    87    88    89    90    91    92    93    94    95
    96    97    98    99]


In [119]:
## To make a copy of an array without associating to same memory use .copy() method

A = np.arange(100) ## resetting the original array

b = A[3:10].copy()
b[0] = -1200
print(b)
print(A)

[-1200     4     5     6     7     8     9]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


In [121]:
## stepping in arrays

A[::5]

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80,
       85, 90, 95])

In [123]:
## ## Reverse stepping

A[::-5]

array([99, 94, 89, 84, 79, 74, 69, 64, 59, 54, 49, 44, 39, 34, 29, 24, 19,
       14,  9,  4])

In [127]:
## Reverse an array

A[::-1]

array([99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83,
       82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66,
       65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
       48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
       31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15,
       14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

In [135]:
## How to find the position of a particular element in an array
## Lets say I wanna know what index 1000 is in the array. 

A[15] = 1000 ## Setting 1000 to be at index 15 so we can know the answer of where it is initially 
print(A)

idx = np.argwhere(A==1000)[0][0] ## This tells us where 1000 is located in the array
print(idx)
print(A[idx])

[   0    1    2    3    4    5    6    7    8    9   10   11   12   13
   14 1000   16   17   18   19   20   21   22   23   24   25   26   27
   28   29   30   31   32   33   34   35   36   37   38   39   40   41
   42   43   44   45   46   47   48   49   50   51   52   53   54   55
   56   57   58   59   60   61   62   63   64   65   66   67   68   69
   70   71   72   73   74   75   76   77   78   79   80   81   82   83
   84   85   86   87   88   89   90   91   92   93   94   95   96   97
   98   99]
15
1000


In [141]:
## Creating a random 2 dimensional array 
## Notice how their smaller decimals. Lets round these values

A = np.random.rand(5,4)
A

array([[0.84450907, 0.35114204, 0.47340607, 0.13172016],
       [0.17104106, 0.22895491, 0.97061533, 0.58788449],
       [0.77054979, 0.8268839 , 0.53327367, 0.25150467],
       [0.30746183, 0.47556708, 0.2328896 , 0.07205304],
       [0.42353107, 0.88785107, 0.08317962, 0.58009707]])

In [143]:
## rounding all the values to scale all the values up to integers (multiplying by 10)

A = np.round(10*np.random.rand(5,4))
A

array([[ 2.,  7.,  5.,  6.],
       [ 3.,  1.,  4.,  3.],
       [10.,  7.,  8.,  2.],
       [ 3., 10.,  7., 10.],
       [ 4.,  6.,  8.,  5.]])

In [147]:
## Now lets slice this array
## Lets say we want to see the number 7 in the 4th array

A[3][2]

7.0

In [149]:
## Lets say we want the entire 3rd array

A[2]

array([10.,  7.,  8.,  2.])

In [151]:
## Lets say you want all the first elements in every array

A[:,0]

array([ 2.,  3., 10.,  3.,  4.])

In [161]:
## Lets say you want the middle 6 elements 
## Notice how the order of the array is [rows, columns]
A[1:4,1:3]

array([[ 1.,  4.],
       [ 7.,  8.],
       [10.,  7.]])

In [163]:
## How to transpose the matrix

A.T

array([[ 2.,  3., 10.,  3.,  4.],
       [ 7.,  1.,  7., 10.,  6.],
       [ 5.,  4.,  8.,  7.,  8.],
       [ 6.,  3.,  2., 10.,  5.]])

In [167]:
## You can also use linear algebra on matricies
import numpy.linalg as la 

In [179]:
## This is how to get the inverse of the matrix
A = np.random.rand(3,3)
print(A)

la.inv(A)

[[0.90596327 0.83698888 0.13068596]
 [0.08674265 0.90352776 0.02615034]
 [0.72165721 0.60088082 0.50745328]]


array([[ 1.46763697, -1.14752069, -0.31883032],
       [-0.0833488 ,  1.21121825, -0.04095204],
       [-1.98845517,  0.19769065,  2.47253005]])

In [187]:
## Now lets say we wanna sort the columns

A = np.round(10*np.random.rand(5,4))
print(A)



[[ 1.  2.  3.  1.]
 [ 1.  0.  1. 10.]
 [ 5.  6.  3.  2.]
 [ 1.  5.  4.  5.]
 [ 4.  9.  1.  8.]]


In [191]:
A.sort(axis=0)
A

array([[ 1.,  0.,  1.,  1.],
       [ 1.,  2.,  1.,  2.],
       [ 1.,  5.,  3.,  5.],
       [ 4.,  6.,  3.,  8.],
       [ 5.,  9.,  4., 10.]])

In [193]:
A.sort(axis=1)
A

array([[ 0.,  1.,  1.,  1.],
       [ 1.,  1.,  2.,  2.],
       [ 1.,  3.,  5.,  5.],
       [ 3.,  4.,  6.,  8.],
       [ 4.,  5.,  9., 10.]])

## Numpy: Masking

In [196]:
A = np.arange(100)
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [198]:
## Masking! Lets say we want elements 3,5,6
## Then we assign that to variable B

B = A[[3,5,6]]
B

array([3, 5, 6])

In [200]:
## Now with our new mask we can change an element in B without changing in A
## So by default a mask is a copy of the initial array

B[0] = -1
print(B)
print(A)

[-1  5  6]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


In [202]:
## We can also mask using logic!
## Here we get all elements less than 40

B = A[A<40]
B

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39])

In [208]:
## Here we get all elements between 40 and 80

B = A[(A>40) & (A<80)]
B

array([41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
       58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
       75, 76, 77, 78, 79])

### Whats the difference in using '&' and 'and'
#### '&' is used for arrays 
#### 'and' is used for single objects

#### This also applies for '|' and 'or', '~' and 'not'

## Numpy: BroadCasting and Concatenation and Sorting

In [218]:
A = np.round(10*np.random.rand(2,3))
A

array([[10.,  9.,  6.],
       [ 8.,  6.,  5.]])

In [220]:
## This is breadcasting. Basically 3 is added across all the elements of the matrix

A + 3

array([[13., 12.,  9.],
       [11.,  9.,  8.]])

In [222]:
A+(np.arange(2).reshape(2,1))

array([[10.,  9.,  6.],
       [ 9.,  7.,  6.]])

In [226]:
B = np.round(10*np.random.rand(2,2))
B

array([[5., 4.],
       [5., 7.]])

In [228]:
## Concatonate
## A is a 2x3 array and B is a 2x2 array

print(A)
print(B)

[[10.  9.  6.]
 [ 8.  6.  5.]]
[[5. 4.]
 [5. 7.]]


In [230]:
## Concatation can happen a few different ways.
## This example is horizontal

C = np.hstack((A,B))
C

array([[10.,  9.,  6.,  5.,  4.],
       [ 8.,  6.,  5.,  5.,  7.]])

In [238]:
##Sorting

A = np.random.permutation(np.arange(10))
print(A)
A.sort()
print(A)

[7 9 1 8 4 6 5 2 3 0]
[0 1 2 3 4 5 6 7 8 9]


In [242]:
## Works on strings too!

A = np.array(['D','B','A','C'])
print(A)
A.sort()
print(A)

['D' 'B' 'A' 'C']
['A' 'B' 'C' 'D']


## Numpy: ufuncs(Universal Functions) Speed test

## Lets show how fast Numpy is!

In [None]:
## Notice the times below on how long numpy took compared to the universal function for sum!
## The universal function is in miliseconds!
## Numpy is in NANO!! Thats insainly fast!

In [247]:
b = np.random.rand(1000000)
%timeit sum(b)
%timeit np.sum(b)

111 ms ± 3.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
836 µs ± 50 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [251]:
## Now lets make my own function and compare time!
## By using our own created function its about the same speed as the universal function
## Not as fast as Numpy

def mySUM(G):
    s = 0
    for i in G:
        s += i
    return s

%timeit mySUM(b)

151 ms ± 6.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
