### Numpy

In [1]:
w1, w2, w3 = 0.3, 0.2, 0.5

In [2]:
kanto_temp = 73
kanto_rainfall = 67
kanto_humidity = 43

In [3]:
kanto_yield_apples = kanto_temp * w1 + kanto_rainfall * w2 + kanto_humidity * w3
kanto_yield_apples

56.8

In [4]:
print("The expected yield of apples in Kanto region is {} tons per hectare.".format(kanto_yield_apples))

The expected yield of apples in Kanto region is 56.8 tons per hectare.


In [5]:
kanto = [73, 67, 43]
johto = [91, 88, 64]
hoenn = [87, 134, 58]
sinnoh = [102, 43, 37]
unova = [69, 96, 70]

In [6]:
kanto

[73, 67, 43]

In [8]:
weights = [w1, w2, w3]
weights

[0.3, 0.2, 0.5]

In [9]:
for  item in zip(kanto, weights):
    print(item)

(73, 0.3)
(67, 0.2)
(43, 0.5)


In [10]:
for x, w in zip(kanto, weights):
    print(x)
    print(w)

73
0.3
67
0.2
43
0.5


In [11]:
def crop_yield(region, weights):
    result=0
    for x, w in zip(kanto, weights):
        result += x*w
    return result
    

In [13]:
crop_yield(kanto, weights)

56.8

In [16]:
import numpy as np

In [17]:
kanto = np.array([73,67,43])
kanto

array([73, 67, 43])

In [18]:
weights = np.array([w1, w2, w3])

In [19]:
weights

array([0.3, 0.2, 0.5])

In [20]:
type(kanto)

numpy.ndarray

In [21]:
type(weights)

numpy.ndarray

In [22]:
weights[0]

0.3

In [23]:
help(np.dot)

Help on function dot in module numpy:

dot(...)
    dot(a, b, out=None)
    
    Dot product of two arrays. Specifically,
    
    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
      (without complex conjugation).
    
    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
      but using :func:`matmul` or ``a @ b`` is preferred.
    
    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
    
    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
      the last axis of `a` and `b`.
    
    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
      sum product over the last axis of `a` and the second-to-last axis of `b`::
    
        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
    
    Parameters
    ----------
    a : array_like
        First argument.
    b : array_like
        Second argument.
    out : 

In [24]:
np.dot(kanto, weights)

56.8

In [25]:
kanto

array([73, 67, 43])

In [26]:
weights

array([0.3, 0.2, 0.5])

In [28]:
(kanto * weights).sum()

56.8

In [29]:
arr1 = list(range(1000000))
arr2 = list(range(1000000, 2000000))

arr1_np = np.array(arr1)
arr2_np = np.array(arr2)

In [30]:
%%time
result = 0
for x1, x2 in zip(arr1, arr2):
    result += x1*x2
result

Wall time: 399 ms


833332333333500000

In [31]:
%%time
np.dot(arr1_np, arr2_np)

Wall time: 6.33 ms


-1942957984

## Multi-dimensional Numpy arrays 

We can now go one step further and represent the climate data for all the regions using a single 2-dimensional Numpy array.

In [32]:
climate_data = np.array([[73, 67, 43],
                         [91, 88, 64],
                         [87, 134, 58],
                         [102, 43, 37],
                         [69, 96, 70]])
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [33]:
climate_data.shape

(5, 3)

In [34]:
weights

array([0.3, 0.2, 0.5])

In [35]:
weights.shape

(3,)

In [40]:
arr3 = np.array([
    [[11,12,13],
     [13,14,15]],
    
    [[15,16,17],
     [17,18,19.5]]
])

In [39]:
arr3.shape

(2, 2)

In [43]:
weights.dtype

dtype('float64')

In [44]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [45]:
climate_data.dtype

dtype('int32')

In [46]:
arr3 = np.array([
    [[11,12,13],
     [13,14,15]],
    
    [[15,16,17],
     [17,18,19.5]]
])

In [47]:
arr3.dtype

dtype('float64')

In [48]:
arr3

array([[[11. , 12. , 13. ],
        [13. , 14. , 15. ]],

       [[15. , 16. , 17. ],
        [17. , 18. , 19.5]]])

In [49]:
np.matmul(climate_data, weights)

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [51]:
climate_data @ weights  # @ represents matrix multiplication in numpy

array([56.8, 76.9, 81.9, 57.7, 74.9])

## Working with CSV data files

Numpy also provides helper functions reading from & writing to files. Let's download a file `climate.txt`, which contains 10,000 climate measurements (temperature, rainfall & humidity) in the following format:


```
temperature,rainfall,humidity
25.00,76.00,99.00
39.00,65.00,70.00
59.00,45.00,77.00
84.00,63.00,38.00
66.00,50.00,52.00
41.00,94.00,77.00
91.00,57.00,96.00
49.00,96.00,99.00
67.00,20.00,28.00
...
```

This format of storing data is known as *comma-separated values* or CSV. 

> **CSVs**: A comma-separated values (CSV) file is a delimited text file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas. A CSV file typically stores tabular data (numbers and text) in plain text, in which case each line will have the same number of fields. (Wikipedia)


To read this file into a numpy array, we can use the `genfromtxt` function.

In [75]:
import urllib.request

urllib.request.urlretrieve(
    'https://hub.jovian.ml/wp-content/uploads/2020/08/climate.csv', 
    'climate.txt')

('climate.txt', <http.client.HTTPMessage at 0x262e76eb908>)

In [76]:
climate_data = np.genfromtxt('climate.txt',delimiter=',', skip_header=1)

In [77]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [78]:
climate_data.shape

(10000, 3)

In [79]:
weights = np.array([0.3, 0.2, 0.5])

In [80]:
weights.shape

(3,)

In [81]:
yields = climate_data @ weights

In [82]:
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [64]:
yields.reshape(10000,1).ndim

2

In [65]:
climate_results = np.concatenate((climate_data, yields.reshape(10000,1)), axis=1)

In [66]:
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [70]:
help(np.concatenate)

Help on function concatenate in module numpy:

concatenate(...)
    concatenate((a1, a2, ...), axis=0, out=None)
    
    Join a sequence of arrays along an existing axis.
    
    Parameters
    ----------
    a1, a2, ... : sequence of array_like
        The arrays must have the same shape, except in the dimension
        corresponding to `axis` (the first, by default).
    axis : int, optional
        The axis along which the arrays will be joined.  If axis is None,
        arrays are flattened before use.  Default is 0.
    out : ndarray, optional
        If provided, the destination to place the result. The shape must be
        correct, matching that of what concatenate would have returned if no
        out argument were specified.
    
    Returns
    -------
    res : ndarray
        The concatenated array.
    
    See Also
    --------
    ma.concatenate : Concatenate function that preserves input masks.
    array_split : Split an array into multiple sub-arrays of equal or
   

In [71]:
np.savetxt('climate_results.txt',
            climate_results,
            fmt= '%.2f',
            delimiter=',',
            header='temperature,rainfall,humidity,yeild_apples',
            comments='')

In [83]:
arr2 = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 1, 2, 3]])

In [84]:
arr2.shape

(3, 4)

In [85]:
arr4 = np.array([4, 5, 6, 7])

In [86]:
arr4.shape

(4,)

In [87]:
arr2 + arr4

array([[ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13,  6,  8, 10]])

In [90]:
arr1 = np.array([[1, 2, 3], [3, 4, 5]])
arr2 = np.array([[2, 2, 3], [1, 2, 5]])

In [91]:
arr1 == arr2

array([[False,  True,  True],
       [False, False,  True]])

In [92]:
arr1 != arr2

array([[ True, False, False],
       [ True,  True, False]])

In [93]:
(arr1==arr2).sum()

3

## Array indexing and slicing

Numpy extends Python's list indexing notation using `[]` to multiple dimensions in an intuitive fashion. You can provide a comma-separated list of indices or ranges to select a specific element or a subarray (also called a slice) from a Numpy array.

In [94]:
arr3 = np.array([
    [[11,12,13,14],
     [13,14,15,19]],
    
    [[15,16,17,21],
     [63,92,36,18]],
    
    [[98, 32, 81, 23],      
     [17, 18, 19.5, 43]]
])

In [95]:
arr3.shape

(3, 2, 4)

In [97]:
arr3[2, 1, 3]

43.0

In [102]:
arr3[1: , 0: , 1]

array([[16., 92.],
       [32., 18.]])

In [103]:
arr3[1: , 0:1, :2]

array([[[15., 16.]],

       [[98., 32.]]])

In [104]:
arr3[1: , 1, 3]

array([18., 43.])

In [105]:
arr3

array([[[11. , 12. , 13. , 14. ],
        [13. , 14. , 15. , 19. ]],

       [[15. , 16. , 17. , 21. ],
        [63. , 92. , 36. , 18. ]],

       [[98. , 32. , 81. , 23. ],
        [17. , 18. , 19.5, 43. ]]])

In [106]:
arr3[:2, 1]

array([[13., 14., 15., 19.],
       [63., 92., 36., 18.]])

## Other ways of creating Numpy arrays

Numpy also provides some handy functions to create arrays of desired shapes with fixed or random values. Check out the [official documentation](https://numpy.org/doc/stable/reference/routines.array-creation.html) or use the `help` function to learn more.

In [2]:
import numpy as np

In [9]:
b = np.zeros((3,2))

In [5]:
a = np.ones([2,2,3])

In [8]:
a.ndim

3

In [10]:
b.ndim

2

In [11]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [12]:
np.random.rand(5)

array([0.91767811, 0.33444594, 0.75067199, 0.64578199, 0.5844731 ])

In [16]:
np.random.randn(3,2)

array([[ 0.51212324, -0.22110907],
       [ 1.02822864,  1.54996218],
       [ 1.47896663,  3.20198531]])

In [18]:
np.full([2,3], 72)

array([[72, 72, 72],
       [72, 72, 72]])

In [20]:
np.arange(10,90, 3).shape

(27,)

In [24]:
np.arange(10,90, 3).reshape([3,3,3])

array([[[10, 13, 16],
        [19, 22, 25],
        [28, 31, 34]],

       [[37, 40, 43],
        [46, 49, 52],
        [55, 58, 61]],

       [[64, 67, 70],
        [73, 76, 79],
        [82, 85, 88]]])

In [28]:
np.linspace(3,27, 18)

array([ 3.        ,  4.41176471,  5.82352941,  7.23529412,  8.64705882,
       10.05882353, 11.47058824, 12.88235294, 14.29411765, 15.70588235,
       17.11764706, 18.52941176, 19.94117647, 21.35294118, 22.76470588,
       24.17647059, 25.58823529, 27.        ])