In [1]:
# To print multiple outputs together
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 1000)


# What is Dimension?

Dimension is the number of coordinates needed to specify a point in space. For example, a rectangle is two-dimensional, while a cube is three-dimensional. The dimension of an object is sometimes also called its dimensionality.

# What is Matrix?

A matrix is a two-dimensional data structure where numbers are arranged into rows and columns.

<center>
<img src = 'images/Matrix.png' height=350 width=350 />
    
This matrix is a 3x4 (pronounced "three by four") matrix because it has 3 rows and 4 columns.

# Python Matrix

Python doesn't have a built-in type for matrices. However, we can treat list of a list as a matrix. For example:

In [2]:
A = [[1, 4, 5], 
    [-5, 8, 9]]
A

# We can treat this list of a list as a matrix having 2 rows and 3 columns.

[[1, 4, 5], [-5, 8, 9]]

#### Let's see how to work with a nested list.

In [3]:
A = [[1, 4, 5, 12], 
    [-5, 8, 9, 0],
    [-6, 7, 11, 19]]

print("A =", A) 
print("A[1] =", A[1])   # 2nd row
print("A[1][2] =", A[1][2])   # 3rd element of 2nd row
print("A[0][-1] =", A[0][-1])   # Last element of 1st Row


A = [[1, 4, 5, 12], [-5, 8, 9, 0], [-6, 7, 11, 19]]
A[1] = [-5, 8, 9, 0]
A[1][2] = 9
A[0][-1] = 12


#### Extract one column

In [4]:
column = []; # empty list

for row in A:
    print(row)
    print(row[2])
    column.append(row[2])   
    
print("3rd column =", column)

[1, 4, 5, 12]
5
[-5, 8, 9, 0]
9
[-6, 7, 11, 19]
11
3rd column = [5, 9, 11]


# Computations With List and Matrices - Element by Element

## Add Two List

In [5]:
def initialise_list():
    # initializing lists 
    test_list1 = [1, 3, 4, 6, 8] 
    test_list2 = [4, 5, 6, 2, 10] 

    # printing original lists 
    print("Original list 1 : " + str(test_list1)) 
    print("Original list 2 : " + str(test_list2)) 
    
    return test_list1, test_list2

### Method 1 : Naive Method

In [6]:
test_list1, test_list2 = initialise_list()
res_list = [] 

for i in range(0, len(test_list1)): 
    res_list.append(test_list1[i] + test_list2[i]) 

# printing resultant list  
print("Resultant list is : " + str(res_list)) 

Original list 1 : [1, 3, 4, 6, 8]
Original list 2 : [4, 5, 6, 2, 10]
Resultant list is : [5, 8, 10, 8, 18]


### Method 2 : Using List Comprehension

In [7]:
initialise_list()

res_list = [test_list1[i] + test_list2[i] for i in range(len(test_list1))] 
  
# printing resultant list  
print("Resultant list is : " + str(res_list)) 

Original list 1 : [1, 3, 4, 6, 8]
Original list 2 : [4, 5, 6, 2, 10]


([1, 3, 4, 6, 8], [4, 5, 6, 2, 10])

Resultant list is : [5, 8, 10, 8, 18]


### Method 3 : using map() and add()

In [8]:
from operator import add 
  
initialise_list()

res_list = list(map(add, test_list1, test_list2)) 
  
# printing resultant list  
print("Resultant list is : " + str(res_list)) 

Original list 1 : [1, 3, 4, 6, 8]
Original list 2 : [4, 5, 6, 2, 10]


([1, 3, 4, 6, 8], [4, 5, 6, 2, 10])

Resultant list is : [5, 8, 10, 8, 18]


### Method 4 : Using zip() + sum()

The `zip()` function take iterables (can be zero or more), makes iterator that aggregates elements based on the iterables passed, and returns an iterator of tuples.

Syntax - `zip(*iterables)`

**iterables** - can be built-in iterables (like: list, string, dict), or user-defined iterables (object that has __iter__ method).

The `zip()` function returns an iterator of tuples based on the iterable object.

- If no parameters are passed, `zip()` returns an empty iterator
- If a single iterable is passed, `zip()` returns an iterator of 1-tuples. Meaning, the number of elements in each tuple is 1.
- If multiple iterables are passed, ith tuple contains ith Suppose, two iterables are passed; one iterable containing 3 and other containing 5 elements. Then, the returned iterator has 3 tuples. It's because iterator stops when shortest iterable is exhaused.

In [9]:
numberList = [1, 2, 3]
strList = ['one', 'two', 'three']

# No iterables are passed
print(zip())

# Converting itertor to list
print(list(zip()))

# Two iterables are passed
print(zip(numberList, strList))
print(list(zip(numberList, strList)))

numberList = [1, 2, 3, 4]
strList = ['one', 'two', 'three']
print(list(zip(numberList, strList)))

<zip object at 0x10eed6b48>
[]
<zip object at 0x10eed6b48>
[(1, 'one'), (2, 'two'), (3, 'three')]
[(1, 'one'), (2, 'two'), (3, 'three')]


In [10]:
from operator import add 
  
initialise_list()

print("Zip Output: " + str(list(zip(test_list1, test_list2))))

res_list = [sum(i) for i in zip(test_list1, test_list2)] 
  
# printing resultant list  
print("Resultant list is : " + str(res_list)) 

Original list 1 : [1, 3, 4, 6, 8]
Original list 2 : [4, 5, 6, 2, 10]


([1, 3, 4, 6, 8], [4, 5, 6, 2, 10])

Zip Output: [(1, 4), (3, 5), (4, 6), (6, 2), (8, 10)]
Resultant list is : [5, 8, 10, 8, 18]


## Add Two Matrices

### Method 1 : Using for loop

In [11]:
X = [[1,2,3], 
    [4 ,5,6], 
    [7 ,8,9]] 
  
Y = [[9,8,7], 
    [6,5,4], 
    [3,2,1]] 

result = [[0,0,0], 
        [0,0,0], 
        [0,0,0]] 
  
# iterate through rows 
for i in range(len(X)):    
    # iterate through columns 
    for j in range(len(X[0])): 
        result[i][j] = X[i][j] + Y[i][j] 

print(result)
print(" ")
        
for r in result: 
    print(r) 

[[10, 10, 10], [10, 10, 10], [10, 10, 10]]
 
[10, 10, 10]
[10, 10, 10]
[10, 10, 10]


### Method 2 : Using nested list comprehension

In [12]:
X = [[1,2,3], 
    [4 ,5,6], 
    [7 ,8,9]] 
   
Y = [[9,8,7], 
    [6,5,4], 
    [3,2,1]] 
  
result = [[X[i][j] + Y[i][j]  for j in range(len(X[0]))] for i in range(len(X))] 
   
for r in result: 
    print(r) 

[10, 10, 10]
[10, 10, 10]
[10, 10, 10]


# Numpy

Advanatages of numpy

- Support for multi-dimension array
- Built in array operations, Has a lot of built-in functions for linear algebra
- Vectorization and Broadcasting. Faster access in reading and writing items
- Time and space complexity of tasks is much lower when compared with traditional data structures
- Absolutely free since open-sourced

## Creating NumPy arrays

In [13]:
import numpy as np
a = np.array([1,2,3,4])               # creates a 1-dimensional array
b = np.array([ [1,2,3,4], [5,6,7,8] ])    # creates a 2-dimensional array
c = np.array([
               [[1,2,3,4], [5,6,7,8], [2,4,6,8]],
               [[10,20,30,40], [50,60,70,80], [1,3,5,7]]
              ])    # creates a 3-dimensional array
print(a)
print('----')
print(b)
print('----')
print(c)

[1 2 3 4]
----
[[1 2 3 4]
 [5 6 7 8]]
----
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 2  4  6  8]]

 [[10 20 30 40]
  [50 60 70 80]
  [ 1  3  5  7]]]


In [14]:
type(c)

numpy.ndarray

## Attributes of NumPy arrays

In [15]:
a = np.array([1,2,3,4])
b = np.array([[1,2,3,4], [5,6,7,8]])
a
b

array([1, 2, 3, 4])

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [16]:
print('The shape of the array a is ', a.shape)
print('The shape of the array b is ', b.shape)

The shape of the array a is  (4,)
The shape of the array b is  (2, 4)


In [17]:
print('The dimensions of array a is ', a.ndim)
print('The dimensions of array b is ', b.ndim)

The dimensions of array a is  1
The dimensions of array b is  2


In [18]:
print('The size of the array a is ', a.size)
print('The size of the array b is ', b.size)
print('The size of the array b is ', b[0].size)

The size of the array a is  4
The size of the array b is  8
The size of the array b is  4


In [19]:
print('The datatype of the array a is ', a.dtype)
print('The datatype of the array b is ', b.dtype)

The datatype of the array a is  int64
The datatype of the array b is  int64


In [20]:
print('The number of bytes in each element of the array a is  ', a.itemsize)
print('The number of bytes in each element of the array b is ', b.itemsize)

The number of bytes in each element of the array a is   8
The number of bytes in each element of the array b is  8


## Creating with Low-level ndarray Constructor

In [21]:
np.empty((3,4),dtype='int8')

array([[ 0,  0,  0,  0],
       [ 0,  0,  0, 80],
       [ 0,  0,  0,  0]], dtype=int8)

In [22]:
np.zeros((3,4),dtype='int8')

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int8)

In [23]:
np.ones((3,4),dtype='int8')

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int8)

In [24]:
np.full((2,2),7)

array([[7, 7],
       [7, 7]])

In [25]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [26]:
np.eye(4, dtype = 'float32')

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]], dtype=float32)

## Creating with Existing Data

In [27]:
#python list
a=[1,2,3]

#convert to NumPy array
b=np.asarray(a)
print(b)

[1 2 3]


In [28]:
#python tuples
a=((1,2),(3,4))

#convery to NumPy array
b=np.asarray(a)
print(b)

[[1 2]
 [3 4]]


In [29]:
a=np.fromiter([1,2,3,4],dtype='int8')
b=np.fromiter((1,2,3,4),dtype='int8')
c=np.fromiter(range(1,5),dtype='int8')
d=np.fromiter('string strin2',dtype='S50')

print("Array a is ",a)
print("Array b is ",b)
print("Array c is ",c)
print("Array d is ",d)

Array a is  [1 2 3 4]
Array b is  [1 2 3 4]
Array c is  [1 2 3 4]
Array d is  [b's' b't' b'r' b'i' b'n' b'g' b' ' b's' b't' b'r' b'i' b'n' b'2']


Bytes literals are always prefixed with 'b' or 'B'; they produce an instance of the bytes type instead of the str type.
More details - https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals


## Creating with Numerical Ranges

In [30]:
# NumPy array from 1 to 19
print(np.arange(1,20,dtype='int32'))

# NumPy array from 1 to 19 with step size 2
print(np.arange(1,20,2,dtype='int8'))

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 1  3  5  7  9 11 13 15 17 19]


In [31]:
# NumPy array from 1 to 20 with 100 numbers in between
print(np.linspace(1,20,100))

[ 1.          1.19191919  1.38383838  1.57575758  1.76767677  1.95959596
  2.15151515  2.34343434  2.53535354  2.72727273  2.91919192  3.11111111
  3.3030303   3.49494949  3.68686869  3.87878788  4.07070707  4.26262626
  4.45454545  4.64646465  4.83838384  5.03030303  5.22222222  5.41414141
  5.60606061  5.7979798   5.98989899  6.18181818  6.37373737  6.56565657
  6.75757576  6.94949495  7.14141414  7.33333333  7.52525253  7.71717172
  7.90909091  8.1010101   8.29292929  8.48484848  8.67676768  8.86868687
  9.06060606  9.25252525  9.44444444  9.63636364  9.82828283 10.02020202
 10.21212121 10.4040404  10.5959596  10.78787879 10.97979798 11.17171717
 11.36363636 11.55555556 11.74747475 11.93939394 12.13131313 12.32323232
 12.51515152 12.70707071 12.8989899  13.09090909 13.28282828 13.47474747
 13.66666667 13.85858586 14.05050505 14.24242424 14.43434343 14.62626263
 14.81818182 15.01010101 15.2020202  15.39393939 15.58585859 15.77777778
 15.96969697 16.16161616 16.35353535 16.54545455 16

In [32]:
print(np.linspace(1,20,20, dtype = 'int'))

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]


In [33]:
# NumPy array from 10^0 to 10^2 with 100 numbers in log scale
print(np.logspace(0,2,100))

[  1.           1.04761575   1.09749877   1.149757     1.20450354
   1.26185688   1.32194115   1.38488637   1.45082878   1.51991108
   1.59228279   1.66810054   1.7475284    1.83073828   1.91791026
   2.009233     2.10490414   2.20513074   2.3101297    2.42012826
   2.53536449   2.65608778   2.7825594    2.91505306   3.05385551
   3.19926714   3.35160265   3.51119173   3.67837977   3.85352859
   4.03701726   4.22924287   4.43062146   4.64158883   4.86260158
   5.09413801   5.33669923   5.59081018   5.85702082   6.13590727
   6.42807312   6.73415066   7.05480231   7.39072203   7.74263683
   8.11130831   8.49753436   8.90215085   9.32603347   9.77009957
  10.23531022  10.72267222  11.23324033  11.76811952  12.32846739
  12.91549665  13.53047775  14.17474163  14.84968262  15.55676144
  16.29750835  17.07352647  17.88649529  18.73817423  19.6304065
  20.56512308  21.5443469   22.5701972   23.64489413  24.77076356
  25.95024211  27.18588243  28.48035868  29.8364724   31.2571585
  32.7454916

## Indexing and Slicing

In [34]:
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(a)

# Pull out second element of third row
print(a[2][1])
print('==========')
# Pull out first two rows and columns
print(a[:2,:2])
print('==========')
# Pull all elements of the third row
print(a[2,:])

[[1 2 3]
 [4 5 6]
 [7 8 9]]
8
[[1 2]
 [4 5]]
[7 8 9]


In [35]:
# An example of integer array indexing
a=np.array([[1,2],[3,4],[5,6]])
print(a)
print(" ")

print(a[[0,1,2],[0,1,0]])
print('==========')

# [0, 0]
# [1, 1]
# [2, 0]

print(np.array([a[0,0],a[1,1],a[2,0]]))
print('==========')

print(a[[0,0],[1,1]])
print('==========')

print(np.array([a[0,1],a[0,1]]))

[[1 2]
 [3 4]
 [5 6]]
 
[1 4 5]
[1 4 5]
[2 2]
[2 2]


In [36]:
a = np.array([[4,7,1],[2,5,7],[7,1,1]])

# Boolean condition for values greater than 3
mask = a > 3
print(mask)

# Masking for the above boolean condition in the array
print(a[mask])

[[ True  True False]
 [False  True  True]
 [ True False False]]
[4 7 5 7 7]


## Vectorization

In [37]:
%%time
a = np.array([1,2,3,4,5,6,7,8,9,10])
print(a[a > 2])

[ 3  4  5  6  7  8  9 10]
CPU times: user 161 µs, sys: 59 µs, total: 220 µs
Wall time: 189 µs


In [38]:
%%time
a = [1,2,3,4,5,6,7,8,9,10]
print([i for i in a if i > 2])

[3, 4, 5, 6, 7, 8, 9, 10]
CPU times: user 59 µs, sys: 40 µs, total: 99 µs
Wall time: 77.7 µs


In [39]:
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
b = np.array([[10,11,12],[13,14,15],[16,17,18]])

a
b

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

array([[10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [40]:
# element wise addition
print(a+b)
print('==========')
print(np.add(a,b))

[[11 13 15]
 [17 19 21]
 [23 25 27]]
[[11 13 15]
 [17 19 21]
 [23 25 27]]


In [41]:
# element wise subtractions
print(a-b)
print('==========')
print(np.subtract(a,b))

[[-9 -9 -9]
 [-9 -9 -9]
 [-9 -9 -9]]
[[-9 -9 -9]
 [-9 -9 -9]
 [-9 -9 -9]]


In [42]:
# element wise multiplication
print(a*b)
print('==========')
print(np.multiply(a,b))

[[ 10  22  36]
 [ 52  70  90]
 [112 136 162]]
[[ 10  22  36]
 [ 52  70  90]
 [112 136 162]]


In [43]:
# element wise division
print(a/b)
print('==========')
print(np.divide(a,b))

[[0.1        0.18181818 0.25      ]
 [0.30769231 0.35714286 0.4       ]
 [0.4375     0.47058824 0.5       ]]
[[0.1        0.18181818 0.25      ]
 [0.30769231 0.35714286 0.4       ]
 [0.4375     0.47058824 0.5       ]]


In [44]:
# element wise square root transform
a = np.array([[1,4,9],[16,25,36]])
print(np.sqrt(a))

[[1. 2. 3.]
 [4. 5. 6.]]


In [45]:
# element wise square root transform
a = np.array([[1,4,9],[16,25,36]])
print(np.log(a))

[[0.         1.38629436 2.19722458]
 [2.77258872 3.21887582 3.58351894]]


In [46]:
a = np.array([1,2,3,4])
b = np.array([1,2,3,4])
print(np.array_equal(a,b))

True


In [47]:
a = np.array([[1,2,3],[4, 5, 6]])
a

# computes sum over columns
print(a.sum(axis=0))
print('==========')

# computes sum over rows
print(a.sum(axis=1))
print('==========')

# computes total sum
print(a.sum())

array([[1, 2, 3],
       [4, 5, 6]])

[5 7 9]
[ 6 15]
21


## Broadcasting

Consider you have two matrices $𝐴$ and $𝐵$ of orders $𝑎1×𝑎2$ and $𝑏1×𝑏2$ respectively.

- Matrix **addition/subtraction** on the two matrices will be defined iff $𝑎1=𝑏1$ and $𝑎2=𝑏2$
- Matrix **multiplication** on them is defined iff $𝑎2=𝑏1$ for $𝐴𝐵$ to be defined and $𝑏2=𝑎1$ for 𝐵𝐴 to be defined. 
- $𝐴𝐵$ will be of order $𝑎1×𝑏2$ and $𝐵𝐴$ will be of order $𝑏1×𝑎2$

In [48]:
np.array([1, 2, 3]) + 4
np.array([1, 2, 3]) + np.array([10, 20, 30])

array([5, 6, 7])

array([11, 22, 33])

- Broadcasting is a powerful mechanism that allows numpy to work with arrays of different shapes when performing arithmetic operations. 
- Frequently we have a smaller array and a larger array, and we want to use the smaller array multiple times to perform some operation on the larger array.

For example, suppose that we want to add a constant vector to each row of a matrix.

In [49]:
import numpy as np

# We will add the vector v to each row of the matrix x,
# storing the result in the matrix y
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = np.empty_like(x)   # Create an empty matrix with the same shape as x

# Add the vector v to each row of the matrix x with an explicit loop
for i in range(4):
    y[i, :] = x[i, :] + v

# Now y is the following
# [[ 2  2  4]
#  [ 5  5  7]
#  [ 8  8 10]
#  [11 11 13]]
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


- This works; however when the matrix x is very large, computing an explicit loop in Python could be slow. 
- Note that adding the vector v to each row of the matrix x is equivalent to forming a matrix vv by stacking multiple copies of v vertically, then performing elementwise summation of x and vv.

In [50]:
import numpy as np

# We will add the vector v to each row of the matrix x,
# storing the result in the matrix y
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
vv = np.tile(v, (4, 1))   # Stack 4 copies of v on top of each other
print(vv)                 # Prints "[[1 0 1]
                          #          [1 0 1]
                          #          [1 0 1]
                          #          [1 0 1]]"
            
y = x + vv  # Add x and vv elementwise
print(y)  # Prints "[[ 2  2  4
          #          [ 5  5  7]
          #          [ 8  8 10]
          #          [11 11 13]]"

[[1 0 1]
 [1 0 1]
 [1 0 1]
 [1 0 1]]
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


Numpy broadcasting allows us to perform this computation without actually creating multiple copies of v. Consider this version, using broadcasting:

In [51]:
import numpy as np

# We will add the vector v to each row of the matrix x,
# storing the result in the matrix y
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = x + v  # Add v to each row of x using broadcasting
print(y)  # Prints "[[ 2  2  4]
          #          [ 5  5  7]
          #          [ 8  8 10]
          #          [11 11 13]]"

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


The line y = x + v works even though x has shape (4, 3) and v has shape (3,) due to broadcasting; this line works as if v actually had shape (4, 3), where each row was a copy of v, and the sum was performed elementwise.

Applications of Broadcasting

In [52]:
import numpy as np

# Compute outer product of vectors
v = np.array([1,2,3])  # v has shape (3,)
w = np.array([4,5])    # w has shape (2,)
# To compute an outer product, we first reshape v to be a column
# vector of shape (3, 1); we can then broadcast it against w to yield
# an output of shape (3, 2), which is the outer product of v and w:
# [[ 4  5]
#  [ 8 10]
#  [12 15]]
print(np.reshape(v, (3, 1)) * w)

[[ 4  5]
 [ 8 10]
 [12 15]]


In [53]:
# Add a vector to each row of a matrix
x = np.array([[1,2,3], [4,5,6]])
# x has shape (2, 3) and v has shape (3,) so they broadcast to (2, 3),
# giving the following matrix:
# [[2 4 6]
#  [5 7 9]]
print(x + v)

[[2 4 6]
 [5 7 9]]


In [54]:
# Add a vector to each column of a matrix
# x has shape (2, 3) and w has shape (2,).
# If we transpose x then it has shape (3, 2) and can be broadcast
# against w to yield a result of shape (3, 2); transposing this result
# yields the final result of shape (2, 3) which is the matrix x with
# the vector w added to each column. Gives the following matrix:
# [[ 5  6  7]
#  [ 9 10 11]]
print((x.T + w).T)
# Another solution is to reshape w to be a column vector of shape (2, 1);
# we can then broadcast it directly against x to produce the same
# output.
print(x + np.reshape(w, (2, 1)))

[[ 5  6  7]
 [ 9 10 11]]
[[ 5  6  7]
 [ 9 10 11]]


# Convert from Centigrade to Fahrenheit

Lets convert degrees in Centigrade scale to Fahrenheit scale. the formula is C/5 = (F - 32)/9, 
where C is the temperature in Centigrade scale and F the temperature in Fahrenheit scale.

You will be converting the temperatures [0, 10, 25, 32, 80, 99.99] in Centigrades to Fahrenheit

