In [32]:
import numpy as np
import pandas as pd

In [33]:
w1,w2,w3=0.3,0.2,0.5

In [34]:
kan_temp=73
kan_rainfall=67
kan_humidity=43

In [35]:
kan_apples=w1*kan_temp+w2*kan_rainfall+w3*kan_humidity

In [36]:
kan=[73,67,43]
johto=[91,88,64]
hoenn=[87,134,58]
sinnoh=[102,43,37]
unova=[69,96,70]


In [37]:
wts=np.array([w1,w2,w3])

In [38]:
def crop_yield(region,wts):
  result=0
  for x,w in zip(region,wts):
    result+=x*w
  return result

In [39]:
crop_yield(sinnoh,wts)

np.float64(57.699999999999996)

In [40]:
crop_yield(kan,wts)

np.float64(56.8)

In [41]:
crop_yield(johto,wts)

np.float64(76.9)

In [42]:
kan_apples

56.8

In [43]:
kan=np.array([12,13,14])

In [44]:
kan

array([12, 13, 14])

In [45]:
type(kan)

numpy.ndarray

In [46]:
kan[2]

np.int64(14)

In [47]:
np.dot(johto,wts)

np.float64(76.9)

 "*" operator is used for element wise multiplication

In [48]:
(kan*wts).sum()

np.float64(13.2)

In [49]:
arr1=np.array([2,7,8])
arr2=np.array([4,6,7])

In [50]:
arr1*arr2

array([ 8, 42, 56])

In [51]:
np.dot(arr1,arr2)

np.int64(106)

**There are a couple of important benefits of using Numpy arrays instead of Python lists for operating on numerical data:

Ease of use: You can write small, concise and intuitive mathematical expressions like (kanto * weights).sum() rather than using loops & custom functions like crop_yield.

Performance: Numpy operations and functions are implemented internally in C++, which makes them much faster than using Python statements & loops which are interpreted at runtime.

Here's a quick comparision of dot products done of vectors with a million elements each using Python loops vs. Numpy arrays.**

In [52]:
#Python lists
arr1=list(range(1000000))
arr2=list(range(1000000,2000000))

arr1_np=np.array(arr1)
arr2_np=np.array(arr2)

In [53]:
%%time
res=0
for x1,x2 in zip(arr1,arr2):
  res+=x1*x2
res

CPU times: user 123 ms, sys: 941 µs, total: 124 ms
Wall time: 124 ms


833332333333500000

In [54]:
%%time
np.dot(arr1_np,arr2_np)

CPU times: user 1.59 ms, sys: 0 ns, total: 1.59 ms
Wall time: 1.59 ms


np.int64(833332333333500000)

In [55]:
climate_data=np.array([[73,67,43],
                      [91,88,64],
                      [87,134,58],
                      [102,43,37],
                     [69,96,70]] )

In [56]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [57]:
climate_data.shape

(5, 3)

In [58]:
wts.shape

(3,)

In [66]:
arr3=np.array([[[12,24,67],
                [46,23,98]],
               [[15,3,2],
                [12,67,5.2]]])

In [62]:
arr3.shape

(2, 2, 3)

In [63]:
wts.dtype

dtype('float64')

In [67]:
arr3.dtype

dtype('float64')

In [65]:
climate_data.dtype

dtype('int64')

In [68]:
arr3

array([[[12. , 24. , 67. ],
        [46. , 23. , 98. ]],

       [[15. ,  3. ,  2. ],
        [12. , 67. ,  5.2]]])

**we use matmul function to do matrix multiplication also @ operator is also used for matrix multplication**

In [70]:
np.matmul(climate_data,wts)

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [71]:
climate_data @ wts

array([56.8, 76.9, 81.9, 57.7, 74.9])

Working with CSV data files
Numpy also provides helper functions reading from & writing to files. Let's download a file climate.txt, which contains 10,000 climate measurements (temperature, rainfall & humidity) in the following format:

temperature,rainfall,humidity
25.00,76.00,99.00
39.00,65.00,70.00
59.00,45.00,77.00
84.00,63.00,38.00
66.00,50.00,52.00
41.00,94.00,77.00
91.00,57.00,96.00
49.00,96.00,99.00
67.00,20.00,28.00
...

In [72]:
import urllib.request

urllib.request.urlretrieve(
    'https://gist.github.com/BirajCoder/a4ffcb76fd6fb221d76ac2ee2b8584e9/raw/4054f90adfd361b7aa4255e99c2e874664094cea/climate.csv',
    'climate.txt')

('climate.txt', <http.client.HTTPMessage at 0x7b82dc7904d0>)

In [73]:
climate_data = np.genfromtxt('climate.txt', delimiter=',', skip_header=1)

In [74]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [76]:
climate_data.shape

(10000, 3)

In [77]:
yields=climate_data @ wts

In [78]:
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [79]:
yields.shape

(10000,)

we got yields as result we wanted now we have to insert the values in yield as column

In [81]:
climate_results=np.concatenate((climate_data,yields.reshape(10000,1)),axis=1)

In [82]:
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [83]:
np.savetxt('climate_results.txt',
           climate_results,
           fmt='%.2f',
           delimiter=',',
           header='temperature,rainfall,humidity,yeild_apples',
           comments='')

Numpy provides hundreds of functions for performing operations on arrays. Here are some commonly used functions:

Mathematics: np.sum, np.exp, np.round, arithemtic operators
Array manipulation: np.reshape, np.stack, np.concatenate, np.split
Linear Algebra: np.matmul, np.dot, np.transpose, np.eigvals
Statistics: np.mean, np.median, np.std, np.max