# Numpy
* it is written in c, which is really fast language
* using this gives us great computational advantage

In [1]:
import numpy as np

## Datatypes & Attributes
* the main advantage of numpy is ndarray

### np.array([])
used to just make an array

In [2]:
a1 = np.array([1,2,3])
a1

array([1, 2, 3])

### type()
gives the data type of the written array

In [3]:
type(a1)

numpy.ndarray

In [4]:
a2 = np.array([[1,2,3],
               [4,5,6]])
a2.shape

(2, 3)

In [5]:
a3 = np.array([[[1,2,3],
                [4,5,6],
                [7,8,9]],
               [[10,11,12],
                [13,14,15],
                [16,17,18]]])
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

### np.shape
basically gives the shape _(a representation of dimensions in tuple form|)_ of the array  
For ex: a2(represented above) has __2 elements__ and those elements have __3 elements__  
thus it gives the output like this

In [6]:
a2.shape

(2, 3)

## np.ndim
used to give dimension of the particular array

In [7]:
a1.ndim , a2.ndim , a3.ndim

(1, 2, 3)

* `a1` is a 1 dimensional array
* `a2` is a 2 dimensional array
* `a3` is a 3 dimensional array

## np.dtype
used to give the data type of the elements of the array

In [8]:
a1.dtype

dtype('int32')

## np.size
gives the number of elements in the array

In [9]:
a1.size

3

## example to show that numpy is the base of dataScience


In [10]:
import pandas as pd 
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


## np.ones()
give the shape it will print the array(matrix) consisting only one as an element

In [11]:
ones = np.ones(shape=a2.shape)

In [12]:
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

## np.zeros()
this gives us an array having zeros as their element

In [13]:
zero = np.zeros(shape = a2.shape,dtype = int)

In [14]:
zero

array([[0, 0, 0],
       [0, 0, 0]])

## np.arange()
gives us the array within the range like we did in iteration

In [15]:
rAr = np.arange(0,11,2)
rAr

array([ 0,  2,  4,  6,  8, 10])

## np.random.randint(low,high,size=None)
gives array of specified size    
elements are between low and high which are specified by the programmer  
these elements are integers

In [16]:
ranAr = np.random.randint(0,10,size=a2.shape)
ranAr

array([[8, 4, 8],
       [0, 9, 8]])

## np.random.rand(shape)
this gives us the array in which we have random elements  
__Note__: Array shape must be specified

In [17]:
ran = np.random.rand(5,3)

In [18]:
ran

array([[0.24105396, 0.5414779 , 0.54195467],
       [0.24684136, 0.28740902, 0.2546237 ],
       [0.83760089, 0.44233438, 0.21796528],
       [0.73515311, 0.14406004, 0.38287159],
       [0.9281123 , 0.32892658, 0.49069838]])

## np.random.random()
* generates an array of specified size
* 0.0 <= element < 1.0

In [19]:
ranDom = np.random.random(a2.shape)
ranDom

array([[0.87038564, 0.3526474 , 0.15059649],
       [0.66265024, 0.3191938 , 0.6187473 ]])

## np.random.seed(self,seed = None)
used to create constant random arrays  
written before any random function so that it can allign itself with the seed

In [20]:
np.random.seed(seed=7)
arr = np.random.randint(10,size = a2.shape)
arr
# no matter how many times you run the code you will get the same output

array([[4, 9, 6],
       [3, 3, 7]])

## Accessing the elements of the array

In [21]:
a1[0]
# 1D array

1

In [22]:
print(a2[1][1])
print(a2[1,1])
# 2D array

5
5


In [23]:
print(a3[0][1][2])
print(a3[0,1,2])
# 3D array

6
6


## slicing the Array

In [24]:
a3[0:1]
# the first one is included and the end one is excluded

array([[[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]])

In [25]:
a3 , a3.shape

(array([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],
 
        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]]),
 (2, 3, 3))

In [26]:
a3[:,:,:2]

array([[[ 1,  2],
        [ 4,  5],
        [ 7,  8]],

       [[10, 11],
        [13, 14],
        [16, 17]]])

# Manupulating and comparing Arrays

## Arithmetic

In [27]:
ones = np.ones(shape=a1.shape , dtype=int)
ones,a1

(array([1, 1, 1]), array([1, 2, 3]))

### Addition

In [28]:
ones+a1

array([2, 3, 4])

### Subtraction

In [29]:
a1-ones

array([0, 1, 2])

### Multiplication
* it is not like normal matrix multiplication.  
* the smaller array is __*broadcasted*__ across the larger array so that they can have **compatible shapes**
* **Multiplication with constants**: same as in matrix


In [30]:
a1*ones

array([1, 2, 3])

In [31]:
a1*a2
# here as you can see a1 has been broadcasted over each element of a2

array([[ 1,  4,  9],
       [ 4, 10, 18]])

In [32]:
# here as you can see due to a2 shape we can't broadcast it onto a3
# to solve this we need to reshape it
a2*a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

### Division

In [33]:
a3/a1

array([[[ 1. ,  1. ,  1. ],
        [ 4. ,  2.5,  2. ],
        [ 7. ,  4. ,  3. ]],

       [[10. ,  5.5,  4. ],
        [13. ,  7. ,  5. ],
        [16. ,  8.5,  6. ]]])

### Floor Division

In [34]:
a3//a1

array([[[ 1,  1,  1],
        [ 4,  2,  2],
        [ 7,  4,  3]],

       [[10,  5,  4],
        [13,  7,  5],
        [16,  8,  6]]], dtype=int32)

### Power


In [35]:
a2**2

array([[ 1,  4,  9],
       [16, 25, 36]], dtype=int32)

In [36]:
a2**a1

array([[  1,   4,  27],
       [  4,  25, 216]], dtype=int32)

### Function for Arithmetic operations
#### Square

In [37]:
print(np.square(a1))

print(a1**2)

[1 4 9]
[1 4 9]


#### Add

In [38]:
print(np.add(a1,ones))

print(a1+ones)

[2 3 4]
[2 3 4]


#### Modulus

In [39]:
print(np.mod(a1,2))

print(a1 % 2)

[1 0 1]
[1 0 1]


## Aggregation
Performing same operation on a number of things

In [40]:
listy = [1,2,3]
listy

[1, 2, 3]

In [41]:
type(listy)

list

In [42]:
# python sum
sum(listy)

6

In [43]:
a1

array([1, 2, 3])

In [44]:
# numpy sum
np.sum(a1)

6

So the crux is that we should use **python sum** `sum()` when using _python Datatypes_ and  
use **NumPy sum** `np.sum()` when using _NumPy Datatypes_

#### Example

In [45]:
Massive = np.random.random(100000)
Massive[:10]

array([0.45558491, 0.30801277, 0.26387084, 0.08674343, 0.41937221,
       0.01591036, 0.52776479, 0.86880146, 0.33083925, 0.39294231])

In [46]:
%timeit sum(Massive) # Python's sum()
%timeit np.sum(Massive) # Numpy's np.sum()

5.13 ms ± 153 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
27.6 µs ± 1.12 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


So as we can see that the `np.sum()` (NumPy) is way more faster than `sum()` (Python)

In [47]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

### Normal Math functions


In [48]:
np.max(a2)  # Returns max value in the array

6

In [49]:
np.min(a2)  # Returns min value in the array

1

In [50]:
np.std(a2)  # Returns standard deviation of the array
# standard deviation = it is the measurement of how much the group of numbers are spread out from the mean

1.707825127659933

In [51]:
np.var(a2)  # Returns variance of the array
# variance = measurement of average degree to which the number is different to the mean
# higher variance = higher range of numbers
# lower variance = lower range of numbers
# var = std**2

2.9166666666666665

## Reshaping and Transposing
### Why do we __need__ it?
bcz of these broadcasting rules:  
Two dimensions are compatible when
* they are equal, or
* one of them is 1  
so these must be followed otherwise no arithmetic operation would work on those two arrays

In [52]:
a2*a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

We got an error bcz they are not compatible with each other so we have to reshape the smaller array so that it can be easily when performing arithmetic operations

### reshape(`shape`)
used to reshape non compatible arrays so that we can perform arithmetic operations  
`Note` : number of elements in the array are __equal__ and also which is equal to the __product of the elements in the shape tuple__

In [53]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [54]:
reshaped = a2.reshape(2,3,1)
reshaped

array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]]])

In [55]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [56]:
reshaped*a3

array([[[  1,   2,   3],
        [  8,  10,  12],
        [ 21,  24,  27]],

       [[ 40,  44,  48],
        [ 65,  70,  75],
        [ 96, 102, 108]]])

### Transpose
you know what it is

In [57]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [58]:
a3.shape

(2, 3, 3)

In [59]:
a3.T

array([[[ 1, 10],
        [ 4, 13],
        [ 7, 16]],

       [[ 2, 11],
        [ 5, 14],
        [ 8, 17]],

       [[ 3, 12],
        [ 6, 15],
        [ 9, 18]]])

In [60]:
a3.T.shape

(3, 3, 2)

## Dot Product
It is same as `matrix multiplication`

In [61]:
np.random.seed(0)
mat1 = np.random.randint(0,10,size=(5,3))
mat2 = np.random.randint(0,10,size=(5,3))

In [62]:
mat1

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

In [63]:
mat2

array([[6, 7, 7],
       [8, 1, 5],
       [9, 8, 9],
       [4, 3, 0],
       [3, 5, 0]])

In [64]:
# Element-wise multiplication
mat1*mat2

array([[30,  0, 21],
       [24,  7, 45],
       [27, 40, 18],
       [16, 21,  0],
       [24, 40,  0]])

In [65]:
np.dot(mat1,mat2)

ValueError: shapes (5,3) and (5,3) not aligned: 3 (dim 1) != 5 (dim 0)

In [66]:
mat2 = mat2.T

In [67]:
mat2.shape, mat1.shape

((3, 5), (5, 3))

In [68]:
np.dot(mat2,mat1)

array([[121, 153, 135],
       [114, 108,  69],
       [ 77,  80,  84]])

In [69]:
np.dot(mat1,mat2)

array([[ 51,  55,  72,  20,  15],
       [130,  76, 164,  33,  44],
       [ 67,  39,  85,  27,  34],
       [115,  69, 146,  37,  47],
       [111,  77, 145,  56,  64]])

In [72]:
np.random.seed(1)
quan = np.random.randint(0,20,size=(5,3))
price = np.array([10,8,12])

In [85]:
quan

array([[ 5, 11, 12],
       [ 8,  9, 11],
       [ 5, 15,  0],
       [16,  1, 12],
       [ 7, 13,  6]])

In [86]:
price

array([10,  8, 12])

In [73]:
price.shape, quan.shape

((3,), (5, 3))

In [83]:
tot = np.dot(price.T,quan.T)
tot

array([282, 284, 170, 312, 246])

In [84]:
tot.shape

(5,)

In [87]:
import pandas as pd
sales = pd.DataFrame(quan,index = ["Mon","Tue","Wed","Thu","Fri"],columns = ["Pasta","Maggi","Nacho"])
sales

Unnamed: 0,Pasta,Maggi,Nacho
Mon,5,11,12
Tue,8,9,11
Wed,5,15,0
Thu,16,1,12
Fri,7,13,6
