NumPy - Specialized library for doing numerical computations and mathematical operations. (eg - Dot Product, Matrix Multiplication etc).

In [2]:
import numpy as np

Numpy array - a list like data structure

In [3]:
kanto = np.array([73, 67, 43])
kanto

array([73, 67, 43])

In [4]:
kanto[0]           #Supports 0 based indexing

73

In [5]:
weights = np.array([0.3, 0.2, 0.5])
weights

array([0.3, 0.2, 0.5])

In [6]:
type(kanto)

numpy.ndarray

Operations on numpy arrays

In [7]:
#Dot product

np.dot(kanto, weights)

56.8

In [8]:
np.dot([True, False], [2,3])       #True = 1, False = 0

2

In [9]:
# * - Element wise multiplication between 2 vectors
res = kanto * weights
res

array([21.9, 13.4, 21.5])

In [10]:
# Sum of elements in an array
res.sum()

56.8

Benefits of Numpy over python lists

1. Ease of use - numpy is built for performing mathematical computations
2. Performance boost - Numpy operations are implemented internally using C++ which is compiled to assembly. This makes it much faster than python statements and loops which are interpreted at runtime

In [11]:
l1 = list(range(0, 1000000))
l2 = list(range(1000000, 2000000))

nparr1 = np.array(l1)
nparr2 = np.array(l2)

In [13]:
%%time
res = 0
for x,y in zip(l1, l2):
    res += x*y

res

CPU times: user 164 ms, sys: 36.9 ms, total: 201 ms
Wall time: 212 ms


833332333333500000

In [17]:
%%time
np.dot(nparr1, nparr2)               #np.dot() is ~100 times faster than the equivalent python logic

CPU times: user 2.03 ms, sys: 5.72 ms, total: 7.74 ms
Wall time: 6.24 ms


833332333333500000

In [16]:
%%time
(nparr1 * nparr2).sum()

CPU times: user 2.75 ms, sys: 3.99 ms, total: 6.74 ms
Wall time: 8.51 ms


833332333333500000

Multidimensional Arrays

In [None]:
# 2-d arrays (list of lists) - Matrix

climate_data = np.array([[73, 67, 43],
                         [91, 88, 64],
                         [87, 134, 58],
                         [102, 43, 37],
                         [69, 96, 70]
                        ])

In [19]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [22]:
# 3-d arrays (list of matrices)

arr3 = np.array([
               [[1,1],[1,1]],
               [[10, 20],[30, 40]]
])

arr3

array([[[ 1,  1],
        [ 1,  1]],

       [[10, 20],
        [30, 40]]])

In [26]:
#Getting dimension(also called axes in numpy terminology) info of an np array - use shape attribute
climate_data.shape

(5, 3)

In [24]:
arr3.shape

(2, 2, 2)

In [27]:
mat = np.array([[1,2],
                [4]])

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [29]:
#All the elements in an np array have the same data type. 
# You can check the datatype of an array using .dtype attribute

print(kanto)
print(kanto.dtype)

print(weights)
print(weights.dtype)

[73 67 43]
int64
[0.3 0.2 0.5]
float64


In [34]:
a1 = np.array([1,'a',2])       #Numpy tries to make a homogenous array by upcasting elements to a common compatible type
print(a1)
print(a1.dtype)

['1' 'a' '2']
<U21


In [None]:
a2 = np.array([1, 5.3])              
print(a2)
print(a2.dtype)

[1.  5.3]
float64


Matrix Multiplication - In numpy this can be done using the np.matmul() method or by using the @ operator

In [35]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [36]:
weights

array([0.3, 0.2, 0.5])

In [37]:
np.matmul(climate_data, weights)

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [38]:
climate_data @ weights

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [39]:
[[1,1],[1,1]] @ [[1,1],[1,1]]

TypeError: unsupported operand type(s) for @: 'list' and 'list'

In [41]:
from urllib import request

In [42]:
request.urlretrieve('https://gist.github.com/BirajCoder/a4ffcb76fd6fb221d76ac2ee2b8584e9/raw/4054f90adfd361b7aa4255e99c2e874664094cea/climate.csv', 
    'climate.csv')

('climate.csv', <http.client.HTTPMessage at 0x10df83b10>)

In [45]:
#np.genfromtxt() method can be used to store structured data from a txt, csv file in a numpy array
climate_data = np.genfromtxt('climate.csv', delimiter=',', skip_header=True)

In [46]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [48]:
climate_data.shape

(10000, 3)

In [49]:
yields = climate_data @ weights
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [50]:
yields.shape

(10000,)

np.concatenate - Concatenates a tuple of arrays into along an existing axis

In [51]:
help(np.concatenate)

Help on function concatenate in module numpy:

concatenate(...)
    concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting="same_kind")
    
    Join a sequence of arrays along an existing axis.
    
    Parameters
    ----------
    a1, a2, ... : sequence of array_like
        The arrays must have the same shape, except in the dimension
        corresponding to `axis` (the first, by default).
    axis : int, optional
        The axis along which the arrays will be joined.  If axis is None,
        arrays are flattened before use.  Default is 0.
    out : ndarray, optional
        If provided, the destination to place the result. The shape must be
        correct, matching that of what concatenate would have returned if no
        out argument were specified.
    dtype : str or dtype
        If provided, the destination array will have this dtype. Cannot be
        provided together with `out`.
    
        .. versionadded:: 1.20.0
    
    casting : {'no', 'equiv', 'safe', '

In [61]:
climate_results = np.concatenate((climate_data, yields.reshape(10000, 1)), axis=1)

In [62]:
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

np.reshape() - Changes the shape (dimensions) of the given array without changing the data. The new shape must be compatible with the original shape

In [63]:
help(np.reshape)

Help on function reshape in module numpy:

reshape(a, newshape, order='C')
    Gives a new shape to an array without changing its data.
    
    Parameters
    ----------
    a : array_like
        Array to be reshaped.
    newshape : int or tuple of ints
        The new shape should be compatible with the original shape. If
        an integer, then the result will be a 1-D array of that length.
        One shape dimension can be -1. In this case, the value is
        inferred from the length of the array and remaining dimensions.
    order : {'C', 'F', 'A'}, optional
        Read the elements of `a` using this index order, and place the
        elements into the reshaped array using this index order.  'C'
        means to read / write the elements using C-like index order,
        with the last axis index changing fastest, back to the first
        axis index changing slowest. 'F' means to read / write the
        elements using Fortran-like index order, with the first index
        c

In [65]:
yields

array([[72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4]])

In [64]:
yields.shape

(1, 10000)

In [67]:
np.reshape(yields, (5000, 2))

array([[72.2, 59.7],
       [65.2, 56.8],
       [55.8, 69.6],
       ...,
       [56.2, 87.4],
       [49.7, 71.1],
       [80.7, 73.4]])

In [69]:
yields.reshape(10000, 1)

array([[72.2],
       [59.7],
       [65.2],
       ...,
       [71.1],
       [80.7],
       [73.4]])

In [71]:
yields

array([[72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4]])

In [70]:
yields.shape

(1, 10000)

In [75]:
np.savetxt('climate_results.csv', climate_results, fmt='%.2f', header='Temp, Rainfall, Humidity, Yield_apples', comments='')