# NUMPY

 - Numpy provides python with its numerical muscle.
 - This is your go-to package.
 - The package is written in C and made to deal with N-dimensional arrays, all basic mathematical operations, linear algebra operations, et cetera.
 - We will not be going through all of the power of this module. For more
 https://numpy.org/doc/stable/reference/index.html

Numpy arrays are the base object containing a variety of powerful methods. <br>
Making a Numpy array is easy:


In [1]:
import numpy as np

In [2]:
array1 = ([1,2,3])

Note that it is np.array([1,2,3]) <br>
not np.array[1, 2, 3]

In [3]:
# You can convert Lists to NP Arrays
# and believe me, you will convert lots of lists to NP arrays
list1 = [1.0, 2.0, 3.0]

In [5]:
array2 = np.array(list1)
print(list1)

[1.0, 2.0, 3.0]


All data in a Numpy array must be of a single data type (dtype). <br>
Numpy has a large number of possible data types:

- np.str ==> string
- np.bool ==> boolean (i.e., True|False)
- np.int ==> integer
- np.float ==> floating point
- np.complex ==> complex (i.e., 1+1j)

In [6]:
# currently, array1 is an integer array
# if we want to convert that integer array into a float array,
# we need to use an associated function (with an underscore). For example
array3 = np.float_(array1)

In [7]:
print(array1)
print(array3)

[1, 2, 3]
[1. 2. 3.]


## Multidimensionality

Numpy arrays can be N-dimensional, which is of particular use with tables of data (i.e. 2-D)

In [8]:
# Creating a 3x2 Array:
array4 = np.array( [[1, 2], [3, 4], [5, 6]])
print(array4)

[[1 2]
 [3 4]
 [5 6]]


In [9]:
array4.shape

(3, 2)

In [10]:
array4.size

6

In [11]:
array4[:,1]

array([2, 4, 6])

In [12]:
array4[2,:]

array([5, 6])

In [13]:
array4.flatten()

array([1, 2, 3, 4, 5, 6])

In [14]:
array4.reshape((2,3))

array([[1, 2, 3],
       [4, 5, 6]])

In [15]:
array4.reshape((-1,1))

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

# Special Array Creation Functions

In [16]:
array5 = np.arange(10)
print(array5)

[0 1 2 3 4 5 6 7 8 9]


In [17]:
array6 = np.arange(0,51,5)
print(array6)

[ 0  5 10 15 20 25 30 35 40 45 50]


In [18]:
array7 = np.ones(10)
print(array7)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [19]:
array8 = np.zeros((3,5))
print(array8)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [20]:
array9 = np.identity(6)
print(array9)

[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]


## Some Built-in Numpy Functionality

In [21]:
np.arange(9).reshape((3,3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [22]:
array6

array([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50])

In [23]:
array6.min(), array6.max()

(0, 50)

In [24]:
array1 = np.arange(1,10,1.0)
array1.mean(), array1.sum(), array1.prod()

(5.0, 45.0, 362880.0)

In [25]:
array10 = np.random.randint(1,10,12).reshape((4,3))
print(array10)

[[9 4 8]
 [1 2 2]
 [5 9 2]
 [5 4 3]]


In [26]:
array10.mean()

4.5

In [27]:
# average of each column
array10.mean(axis=0)

array([5.  , 4.75, 3.75])

In [28]:
# average of each row
array10.mean(axis=1)

array([7.        , 1.66666667, 5.33333333, 4.        ])

In [29]:
# sum of columns
array10.sum(axis=0)

array([20, 19, 15])

In [30]:
# sum of rows
array10.sum(axis=1)

array([21,  5, 16, 12])

In [31]:
print(array10)

[[9 4 8]
 [1 2 2]
 [5 9 2]
 [5 4 3]]


In [32]:
array10.transpose()

array([[9, 1, 5, 5],
       [4, 2, 9, 4],
       [8, 2, 2, 3]])

In [33]:
# another method of transposing
array10.T

array([[9, 1, 5, 5],
       [4, 2, 9, 4],
       [8, 2, 2, 3]])

In [34]:
# and yet another method of transposing
array10.swapaxes(0, 1)

array([[9, 1, 5, 5],
       [4, 2, 9, 4],
       [8, 2, 2, 3]])

In [None]:
# # RESHAPING

In [35]:
np.arange(90).reshape((9, 10))

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]])

In [36]:
np.arange(90).reshape((-1, 10))
#   here -1 means "hey python, you determine the length along this axis"
# so we know there will be 10 columns, NP determines the number of row

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]])

In [37]:
array1 = np.arange(90).reshape((-1, 10))
array1.shape == (9, 10)

True

In [None]:
# Let's change some elements and observe where they are

In [38]:
array1[7,3]= 1000
print(array1)

[[   0    1    2    3    4    5    6    7    8    9]
 [  10   11   12   13   14   15   16   17   18   19]
 [  20   21   22   23   24   25   26   27   28   29]
 [  30   31   32   33   34   35   36   37   38   39]
 [  40   41   42   43   44   45   46   47   48   49]
 [  50   51   52   53   54   55   56   57   58   59]
 [  60   61   62   63   64   65   66   67   68   69]
 [  70   71   72 1000   74   75   76   77   78   79]
 [  80   81   82   83   84   85   86   87   88   89]]


In [39]:
array1[5,:]= np.ones(10)
print(array1)

[[   0    1    2    3    4    5    6    7    8    9]
 [  10   11   12   13   14   15   16   17   18   19]
 [  20   21   22   23   24   25   26   27   28   29]
 [  30   31   32   33   34   35   36   37   38   39]
 [  40   41   42   43   44   45   46   47   48   49]
 [   1    1    1    1    1    1    1    1    1    1]
 [  60   61   62   63   64   65   66   67   68   69]
 [  70   71   72 1000   74   75   76   77   78   79]
 [  80   81   82   83   84   85   86   87   88   89]]


In [40]:
array1[:,4]= np.zeros(9)
print(array1)

[[   0    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30   31   32   33    0   35   36   37   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    1    1    1    0    1    1    1    1    1]
 [  60   61   62   63    0   65   66   67   68   69]
 [  70   71   72 1000    0   75   76   77   78   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [41]:
array1[3,1:8] = 2*np.ones(7)
print(array1)

[[   0    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30    2    2    2    2    2    2    2   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    1    1    1    0    1    1    1    1    1]
 [  60   61   62   63    0   65   66   67   68   69]
 [  70   71   72 1000    0   75   76   77   78   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [42]:
# -1 means "last"
array1[5:-1,1:3] = [[3,3],[3,3],[3,3]]
print(array1)

[[   0    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30    2    2    2    2    2    2    2   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    3    3    1    0    1    1    1    1    1]
 [  60    3    3   63    0   65   66   67   68   69]
 [  70    3    3 1000    0   75   76   77   78   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [43]:
# Non-sequential indexing!
array1[(5, 7), (6, 8)] = [-20,-20]
print(array1)

[[   0    1    2    3    0    5    6    7    8    9]
 [  10   11   12   13    0   15   16   17   18   19]
 [  20   21   22   23    0   25   26   27   28   29]
 [  30    2    2    2    2    2    2    2   38   39]
 [  40   41   42   43    0   45   46   47   48   49]
 [   1    3    3    1    0    1  -20    1    1    1]
 [  60    3    3   63    0   65   66   67   68   69]
 [  70    3    3 1000    0   75   76   77  -20   79]
 [  80   81   82   83    0   85   86   87   88   89]]


In [46]:
# a = b vs a = copy(b)
array1 = np.array([1, 2, 3])
array2 = (array1)
print('Before______')
print(array2)
array1[0] = 5
print('After_______')
print(array2)

Before______
[1 2 3]
After_______
[5 2 3]


In [45]:
# Numpy arrays are generally passed by reference (to minimize space used in memory)
# This is why when we made a change in array1 above, we also changed array2
#
# To ensure that values are independent, use the copy function:
array1 = np.array([1, 2, 3])
array2 = np.copy(array1)
print('Before______')
print(array2)
array1[0] = 5
print('After_______')
print(array2)

Before______
[1 2 3]
After_______
[1 2 3]


### Numpy Load Text Files: np.loadtxt
There are lots of options on this function, so check the docs, but some of the most used: <br>
array2 = np.loadtxt(filename, dtype=dtype, comments=‘#’, delimiter=‘,’, skiprows=5,
 usecols=(0, 1, 2)) <br>
 - This skips all comments (designated with a #) and the first 5 rows.
 - It then reads in columns 0, 1, and 2, delimited by a comma

When in doubt about arguments and what form they should be, check the docs:

In [47]:
np.loadtxt?

Loadtxt can read in gzipped (.gz) and Bzip2 (.bz2) files without them being unzipped.

### Mathematical Operations
Mathematical operations proceed element-wise, as follows

In [48]:
array1 = np.array([0.5, 1.0, 1.5, 2.0])
print(array1+5)

[5.5 6.  6.5 7. ]


In [49]:
print(array1*2)

[1. 2. 3. 4.]


In [50]:
print(array1**2)

[0.25 1.   2.25 4.  ]


In [51]:
array2 = np.copy(array1)
print(array1 + array2)

[1. 2. 3. 4.]


In [52]:
print(array1*array2)

[0.25 1.   2.25 4.  ]


In [53]:
np.log10(array1)

array([-0.30103   ,  0.        ,  0.17609126,  0.30103   ])

In [54]:
np.exp(array1)

array([1.64872127, 2.71828183, 4.48168907, 7.3890561 ])

In [55]:
np.sin(array1)

array([0.47942554, 0.84147098, 0.99749499, 0.90929743])

In [56]:
np.cosh(array1)

array([1.12762597, 1.54308063, 2.35240962, 3.76219569])

## Matrix Math

In [57]:
# For 2-D (and higher) matrices, you can do standard matrix math:
arr1 = np.array([[0,1],[2,3]])
arr2 = np.array([[4,5],[6,7]])
print(arr1)
print(arr2)

[[0 1]
 [2 3]]
[[4 5]
 [6 7]]


In [58]:
# Doing standard matrix math:
np.dot(arr1, arr2)  # dot product

array([[ 6,  7],
       [26, 31]])

In [59]:
# Cross product
np.cross(arr1, arr2)

array([-4, -4])

In [60]:
# Eigenvalues and eigenvectors
np.linalg.eig(arr1)

(array([-0.56155281,  3.56155281]),
 array([[-0.87192821, -0.27032301],
        [ 0.48963374, -0.96276969]]))

In [61]:
# calculating inverses:
np.linalg.inv(arr1)

array([[-1.5,  0.5],
       [ 1. ,  0. ]])

In [62]:
# determinant
np.linalg.det(arr1)

-2.0

### Searching Arrays

In [63]:
arr1 = np.arange(6).reshape((2,3))
print(arr1)

[[0 1 2]
 [3 4 5]]


In [64]:
# following command gives the elements of arr1 > 1
# [0,2], [1,0], [1,1], [1,2]
# but we will have them in row/col tuple
np.where(arr1 > 1)

(array([0, 1, 1, 1]), array([2, 0, 1, 2]))

In [65]:
print(arr1[0, 2])

2


In [66]:
print(arr1[1, 0])

3


In [67]:
# you can use more than one criteria
# but in that case we should use extra ()s as follows
np.where((arr1 > 1) & (arr1<4))

(array([0, 1]), array([2, 0]))

## Vectorizing Functions

In [None]:
# Sometimes, you’ll want to make complex functions that don’t
# necessarily automatically work with numpy arrays.
# SoLution: vectorization.
# Let's see how vectorization works with an example

In [68]:
def funct1(val):
    import numpy as np
    if val < np.pi/2: # Doesn’t work with array
        x = np.sin(val)
    else:
        x = np.cos(val)
    return x

In [69]:
# this will work
funct1(np.pi/4)

0.7071067811865475

In [70]:
 # this will fail
z = np.linspace(0,np.pi,7)
print(z)

[0.         0.52359878 1.04719755 1.57079633 2.0943951  2.61799388
 3.14159265]


In [71]:
funct1(z)

ValueError: ignored

In [72]:
# Now it will work
vfunct1 = np.vectorize(funct1)
vfunct1(z)
# because the vectorize function makes functions like this work for arrays

array([ 0.00000000e+00,  5.00000000e-01,  8.66025404e-01,  6.12323400e-17,
       -5.00000000e-01, -8.66025404e-01, -1.00000000e+00])

While this is fine to do for functions you don’t need high performance on, it is slow(ish). Consider writing the function better for speed.

## Saving your output

In [None]:
# For individual numpy arrays, there are some quick and dirty methods to save your data:

In [73]:
# Quick and Dirty in Text:
x = np.arange(100).reshape((25,4))
np.savetxt('test.dat', x)

In [74]:
# Numpy also has some proprietary formats (.npy, .npz) that allow for quick reading of data
# Saving a single array:
np.save('test.npy', arr1)

# Saving multiple arrays:
x2 = np.arange(20).reshape((5,4))
np.savez('test', a1=x, a2=x2)   # .npz suffix added

# Output file extensions are based on how many arrays you have in the save file:
# .npy is for a single array and
# .npz is for multiple

### Loading Saved Output

In [75]:
ls

[0m[01;34msample_data[0m/  test.dat  test.npy  test.npz


In [76]:
# To load a single numpy array (.npy file):
arr1a = np.load('test.npy')
print(arr1a)

[[0 1 2]
 [3 4 5]]


In [78]:
# to list current variables used so far
%who

alldata	 arr1	 arr1a	 arr2	 array1	 array10	 array2	 array3	 array4	 
array5	 array6	 array7	 array8	 array9	 funct1	 list1	 np	 vfunct1	 
x	 x2	 z	 


In [77]:
# To load a multiple numpy arrays (.npz file):
alldata = np.load('test.npz')
# to learn what arrays are in your NPZ file do this
alldata.files

['a1', 'a2']

In [79]:
# All Data Object is dictionary-like:
var1 = alldata['a1']
var2 = alldata['a2']
print(var1)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]
 [32 33 34 35]
 [36 37 38 39]
 [40 41 42 43]
 [44 45 46 47]
 [48 49 50 51]
 [52 53 54 55]
 [56 57 58 59]
 [60 61 62 63]
 [64 65 66 67]
 [68 69 70 71]
 [72 73 74 75]
 [76 77 78 79]
 [80 81 82 83]
 [84 85 86 87]
 [88 89 90 91]
 [92 93 94 95]
 [96 97 98 99]]


## LAMBDA FUNCTIONS

In [None]:
# Sometimes you want to define a simple function without the full function syntax.
# Lambda functions exist for this exact reason:
# Defining the Function:
funct1 = lambda x: x**2 # Returns the square of x

In [80]:
# Using the Function:
tmpvar1 = funct1(5)
print(tmpvar1)

0.28366218546322625


In [81]:
# Can use multiple variables:
funct2 = lambda x,y: x + y

In [82]:
# Using the Function:
tmpvar2 = funct2(5, 6)
print(tmpvar2)

11


In [None]:
# LAMBDA FUNCTIONS ARE VERY USEFUL IN E.D.A

In [83]:
!pip install pipe
from pipe import select, where

Collecting pipe
  Downloading pipe-2.0-py3-none-any.whl (8.8 kB)
Installing collected packages: pipe
Successfully installed pipe-2.0


In [84]:
arr = [1, 2, 3, 4, 5, 6, 10]
list(arr| where(lambda x:x%2==0) | select(lambda x:x**2))

[4, 16, 36, 100]