In [1]:

"""

Given some climate data for a region, we can now predict the yield of apples. Here's some sample data:

<img src="https://i.imgur.com/TXPBiqv.png" style="width:360px;">

To begin, we can define some variables to record climate data for a region.


"""


kanto = [73,67,43]
johto = [91,88,64]
hoenn = [87,134,58]
sinnoh = [102,43,37]
unova =[69,96,70]
w1,w2,w3 = 0.3,0.2,0.5
weight=[w1,w2,w3]

    

In [2]:
def crop_yield(data,weights):
    result = 0
    for x,w in zip(data,weight):
        result+=x*w
    return result

In [3]:
crop_yield(kanto,weight)

56.8

In [4]:
crop_yield(johto,weight)

76.9

## going from python lists to numpy arrays

In [5]:
import numpy as np

In [6]:
# we can compute th dot products of two vectors using np.dot function
# for this we have to convert python lists to numpy array
kanto = np.array([73,67,43])
kanto

array([73, 67, 43])

In [7]:
weight = np.array([w1,w2,w3])
weight

array([0.3, 0.2, 0.5])

In [8]:
np.dot(kanto,weight)


56.8

In [9]:
# same result is obtained using element wise multiplication of two arrays and calculating their sum using sum function

In [10]:
(kanto*weight).sum()

56.8

In [11]:
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])


In [12]:
arr=arr1*arr2
arr

array([ 4, 10, 18])

In [13]:
arr.sum()

32

### numpy array is much faster than python list

In [14]:
%%time
l1=list(range(10000000))
l2 = list(range(10000000,20000000))
result = 0
for x,y in zip(l1,l2):
    result+=x*y

CPU times: total: 1.84 s
Wall time: 2.01 s


In [15]:
%%time
n1=np.array(l1)
n2=np.array(l2)
(n1*n2).sum()

CPU times: total: 625 ms
Wall time: 755 ms


690258880

### Two dimensional numpy array

In [16]:
climate_data = np.array([[73,67,43],
                          [91,88,64],
                          [87,134,58],
                          [102,43,37],
                          [69,96,70]])                   

In [17]:
# 2D array (matrix)
climate_data.shape # gives number of rows and columns
print(type(climate_data))

<class 'numpy.ndarray'>


In [42]:
# 1D array(vector)
print(weight.shape)
print(type(weight))

(3,)
<class 'numpy.ndarray'>


In [19]:
np.matmul(climate_data,weight)  #performs the matrix multiplication

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [20]:
# or simply use @ operator to perform matrix multiplication
climate_data @ weight

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [21]:
# 3D array
arr3d = np.array([
        [[2.0,30,50],
         [10,11,12]],
    
        [[19,22,14],
         [15,12,10]],
    
        [[19,22,14],
         [15,12,10]]
        ])
print(arr3d.shape)
print(arr3d.dtype)
# if an array contains even a single floating point number, all other elements are converted to floats

(3, 2, 3)
float64


## Working with CSV data files

Numpy also provides helper functions reading from & writing to files. Let's download a file `climate.txt`, which contains 10,000 climate measurements (temperature, rainfall & humidity) in the following format:


```
temperature,rainfall,humidity
25.00,76.00,99.00
39.00,65.00,70.00
59.00,45.00,77.00
84.00,63.00,38.00
66.00,50.00,52.00
41.00,94.00,77.00
91.00,57.00,96.00
49.00,96.00,99.00
67.00,20.00,28.00
...
```

This format of storing data is known as *comma-separated values* or CSV. 

> **CSVs**: A comma-separated values (CSV) file is a delimited text file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas. A CSV file typically stores tabular data (numbers and text) in plain text, in which case each line will have the same number of fields. (Wikipedia)


To read this file into a numpy array, we can use the `genfromtxt` function.

In [22]:
# to download text file. To view click on file and on open option we will see climate.txt file

import urllib.request
urllib.request.urlretrieve('https://gist.github.com/BirajCoder/a4ffcb76fd6fb221d76ac2ee2b8584e9/raw/4054f90adfd361b7aa4255e99c2e874664094cea/climate.csv', 
    'climate.txt')

('climate.txt', <http.client.HTTPMessage at 0x1b03092fbd0>)

In [23]:
# To read data from textfile
# loading data as numpy array

climate_data=np.genfromtxt('climate.txt',delimiter =',',skip_header =1)

# genfromtxt means generate from txt
# first parameter is file name or we can give path of file
# delimiter means separator between two datas. In our case, it is ','
# skip_header means how many header rows we want to skip

In [24]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [25]:
climate_data.shape

(10000, 3)

In [26]:
weights = np.array([0.2,0.3,0.4])

In [27]:
yields=climate_data @ weights

In [28]:
yields

array([67.4, 55.3, 56.1, ..., 61.6, 71.7, 60.5])

In [29]:
yields.shape

(10000,)

In [30]:
# concatenating two arrays

a = np.array([[1,2],
             [3,4]])
b = np.array([[5,6]])
print(np.concatenate((a,b))) # adds b as 3rd row, We have to passas tuple i.e((a,b)) 
# if we want to concatenate by adding column then there must be equal number of rows in both arrays not necessarily equal columns like in (a and c)
c = np.array([[7,8,6,5],
             [9,10,11,12]])
print()
print(np.concatenate((a,c),axis =1))

[[1 2]
 [3 4]
 [5 6]]

[[ 1  2  7  8  6  5]
 [ 3  4  9 10 11 12]]


 Let's add the `yields` to `climate_data` as a fourth column using the [`np.concatenate`]
 'climate_data' has 10000 rows and yields has one row and 10000 therefore it is incompatible shape so we have to reshape yields, making 10000 rows and one column

In [31]:
yields.reshape(10000,1).shape

(10000, 1)

In [32]:
climate_results = np.concatenate((climate_data,yields.reshape(10000,1)),axis =1 )

In [33]:
climate_results

array([[25. , 76. , 99. , 67.4],
       [39. , 65. , 70. , 55.3],
       [59. , 45. , 77. , 56.1],
       ...,
       [99. , 62. , 58. , 61.6],
       [70. , 71. , 91. , 71.7],
       [92. , 39. , 76. , 60.5]])

#### We can also save the obtained results into the file again

In [34]:
np.savetxt('climate_results.txt', # -> name of file that we wanted to save
           climate_results,  # data from which file is created
           delimiter=' , ',
           fmt ='%.2f',   # formating the floating point number
          header = 'temperature,rainfall,humidity,yield_apples',
           comments = '') # we have to pass comments as argument for no any comments appears in our text file)

## Arithmetic operations, broadcasting and comparison

Numpy arrays support arithmetic operators like `+`, `-`, `*`, etc. You can perform an arithmetic operation with a single number (also called scalar) or with another array of the same shape. Operators make it easy to write mathematical expressions with multi-dimensional arrays.

In [35]:
arr2 = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 1, 2, 3]])

In [36]:
arr3 = np.array([[11, 12, 13, 14], 
                 [15, 16, 17, 18], 
                 [19, 11, 12, 13]])

In [37]:
# Adding a scalar
arr2 + 3

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12,  4,  5,  6]])

In [38]:
# Element-wise subtraction
arr3 - arr2

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

In [39]:
# Division by scalar
arr2 / 2

array([[0.5, 1. , 1.5, 2. ],
       [2.5, 3. , 3.5, 4. ],
       [4.5, 0.5, 1. , 1.5]])

In [40]:
# Element-wise multiplication
arr2 * arr3

array([[ 11,  24,  39,  56],
       [ 75,  96, 119, 144],
       [171,  11,  24,  39]])

In [41]:
# Modulus with scalar
arr2 % 4

array([[1, 2, 3, 0],
       [1, 2, 3, 0],
       [1, 1, 2, 3]], dtype=int32)

### Array Broadcasting

Numpy arrays also support *broadcasting*, allowing arithmetic operations between two arrays with different numbers of dimensions but compatible shapes. Let's look at an example to see how it works.

In [50]:
arr2 = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 1, 2, 3]])

In [51]:
arr2.shape

(3, 4)

In [52]:
arr4 = np.array([4, 5, 6, 7])

In [53]:
arr4.shape

(4,)

In [54]:
arr2 + arr4

array([[ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13,  6,  8, 10]])

When the expression `arr2 + arr4` is evaluated, `arr4` (which has the shape `(4,)`) is replicated three times to match the shape `(3, 4)` of `arr2`. Numpy performs the replication without actually creating three copies of the smaller dimension array, thus improving performance and using lower memory.
For this the numbers of column of first array and row of second array should be same

<img src="https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png" width="360">

Broadcasting only works if one of the arrays can be replicated to match the other array's shape.

In [56]:
arr1 = np.array([[[1,2,3],
                 [2,3,4],
                 [3,4,5]],
                 
                 [[4,5,6],
                  [5,6,7],
                  [7,8,9]]])

In [57]:
arr1.shape

(2, 3, 3)

In [63]:
arr2 = np.array([[0,2,4],
                 [1,3,5],
                 [1,2,3]])

In [64]:
arr2.shape

(3,)

In [65]:
arr1+arr2

array([[[ 1,  4,  7],
        [ 2,  5,  8],
        [ 3,  6,  9]],

       [[ 4,  7, 10],
        [ 5,  8, 11],
        [ 7, 10, 13]]])

### Array Comparison

Numpy arrays also support comparison operations like `==`, `!=`, `>` etc. The result is an array of booleans.

In [66]:
arr1 = np.array([[1, 2, 3], [3, 4, 5]])
arr2 = np.array([[2, 2, 3], [1, 2, 5]])

In [67]:
arr1 == arr2

array([[False,  True,  True],
       [False, False,  True]])

In [68]:
arr1 != arr2

array([[ True, False, False],
       [ True,  True, False]])

In [69]:
arr1 >= arr2

array([[False,  True,  True],
       [ True,  True,  True]])

In [70]:
arr1 < arr2

array([[ True, False, False],
       [False, False, False]])

In [72]:
(arr1 == arr2).sum() # total 3 positions are matched in 'arr1' and 'arr2'

3

## Array indexing and slicing

Numpy extends Python's list indexing notation using `[]` to multiple dimensions in an intuitive fashion. You can provide a comma-separated list of indices or ranges to select a specific element or a subarray (also called a slice) from a Numpy array.

In [75]:
arr3 = np.array([
        [[11,12,13,14],
         [13,14,15,19]],
    
         [[15,16,17,21],
         [63,92,36,18]],
    
         [[98,32,81,23],
         [17,18,19.5,43]]
    
        ])

In [76]:
arr3.shape

(3, 2, 4)

In [77]:
# Single element
# Single element
arr3[1, 1, 2]

36.0

In [78]:
# Subarray using ranges
arr3[1:, 0:1, :2]

array([[[15., 16.]],

       [[98., 32.]]])

In [79]:
# Mixing indices and ranges
arr3[1:, 1, 3]

array([18., 43.])

In [80]:
# Mixing indices and ranges
arr3[1:, 1, :3]

array([[63. , 92. , 36. ],
       [17. , 18. , 19.5]])

In [81]:

# Using fewer indices
arr3[1]


array([[15., 16., 17., 21.],
       [63., 92., 36., 18.]])

In [84]:
# Using too many indices. Maximum indices we can pass is the dimension of array
arr3[1,1,2]

36.0

## Other ways of creating Numpy arrays

Numpy also provides some handy functions to create arrays of desired shapes with fixed or random values. Check out the [official documentation](https://numpy.org/doc/stable/reference/routines.array-creation.html) or use the `help` function to learn more.

In [85]:
# All zeros
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [86]:
# All ones
np.ones([2, 2, 3])

array([[[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]]])

In [87]:
# Identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [91]:
# Random vector
np.random.rand(5,4)

array([[0.3159624 , 0.45834771, 0.22352846, 0.73767513],
       [0.73757349, 0.16721217, 0.4653969 , 0.29880662],
       [0.59496315, 0.05247677, 0.00158921, 0.82515695],
       [0.94693176, 0.1593228 , 0.14058705, 0.82117103],
       [0.46622114, 0.44341397, 0.82571068, 0.94997851]])

In [89]:
# Random matrix
np.random.randn(2, 3) # rand vs. randn - what's the difference?

array([[ 0.02527711,  0.81391877, -0.33915434],
       [ 1.46269967, -0.81810818, -1.05848704]])

In [90]:
# Fixed value--> array of given shape with value we passed
np.full([2, 3], 42)

array([[42, 42, 42],
       [42, 42, 42]])

In [95]:
# Range with start, end and step
print(np.arange(10, 90, 3))
print()
# we can reshape them
print(np.arange(10,90,3).reshape(3,3,3))


[10 13 16 19 22 25 28 31 34 37 40 43 46 49 52 55 58 61 64 67 70 73 76 79
 82 85 88]

[[[10 13 16]
  [19 22 25]
  [28 31 34]]

 [[37 40 43]
  [46 49 52]
  [55 58 61]]

 [[64 67 70]
  [73 76 79]
  [82 85 88]]]


In [99]:
# Equally spaced numbers in a range
print(np.linspace(3, 27, 9)) # makes array of 9 elements from 3 to 27
print(np.linspace(3, 27, 3))

[ 3.  6.  9. 12. 15. 18. 21. 24. 27.]
[ 3. 15. 27.]
