# Numpy
- NumPy is an open-source Python library for scientific computing that provides support for large, multi-dimensional arrays and a collection of high-level mathematical functions to operate on these arrays.
- It offers a powerful and efficient n-dimensional array object, enabling faster computations and serving as the foundational library for many other data science and machine learning tools like Pandas and Scikit-learn.

## Properties of numpy array

### dtype
- dtype stores data type of the elemenents of array
- A numpy array has single data type (homogeneous)

In [4]:
import numpy as np

a = np.array([1,2,3,5,7], dtype='i')
b = np.array((2,3,5))

print(a)
print(type(a))
print(a.dtype)
print(b)
print(type(b))

[1 2 3 5 7]
<class 'numpy.ndarray'>
int32
[2 3 5]
<class 'numpy.ndarray'>


### Dimensions
- Numpy arrays can be multidimensional
- We can access an element using 3 indices - array dimension, array index and element index
- The size of each arrays must be same

In [13]:
a = np.array([[1,2,3],[4,5,6]])
print(a.ndim)  # 2 dimensions
print(a[0,2])

b = np.array([[1,2,3,-1],[4,5,6,9]])
print(b.ndim)
print(b[1,2])

# An array of 2-d arrays is a 3-d array
c = np.array([[[1,2,3],[4,5,6], [0,0,-1]], [[-1,-2,-3],[-4,-5,-6], [0,0,1]]])
print(c.ndim)
print(c[1,0,2])

d = np.array([1])
print(d.ndim)

e = np.array(3)
print(e.ndim)

2
3
2
6
3
-3
1
0


### Shape
- Returns tuple type

In [11]:
print(c.shape)
# (no. of 2-d dimensional arrays, no. of 1-d array in each 2-d array, no. of elements in each 1-d array)

print(c.shape[0])

(2, 3, 3)
2


### Other Properties

In [16]:
print(c.size) # total no. of elements

print(c.nbytes) # total no. of bytes

18
144


### arange, reshape, random, zeroes

In [26]:
A = np.arange(100)
print(A)

B = np.arange(20,100,3)  # (start, end, jump)
print(B)

C = np.random.permutation(np.arange(10))  # 0 to 10 array but shuffled
print(C)

print(np.random.randint(20,30))  # returns a random integer between 20 to 30

print(np.random.rand(100))  # generate random 100 nos. from 0 to 10

D = np.random.rand(2,3)   # generate random nos. in 2x3 matrix from 0 to 10
print(D)
print(D.ndim)

E = np.random.rand(2,3,4,2)
print(E)
print(E.ndim)

F = np.arange(100).reshape(4,25)
print(F)
print(F.shape)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
[20 23 26 29 32 35 38 41 44 47 50 53 56 59 62 65 68 71 74 77 80 83 86 89
 92 95 98]
[9 0 6 1 8 7 4 5 3 2]
28
[0.67092837 0.33182519 0.67872096 0.59500951 0.75766752 0.01934145
 0.17627079 0.63337137 0.61409882 0.69279467 0.12586912 0.07755747
 0.14897148 0.05708899 0.60549432 0.33240047 0.30857369 0.90883778
 0.14345581 0.99272771 0.61313637 0.33405223 0.86269753 0.82125228
 0.69226906 0.6434854  0.88723943 0.24260548 0.56206141 0.71384101
 0.73518312 0.12834575 0.23633905 0.88112841 0.35564097 0.24758638
 0.38803474 0.20672726 0.39165789 0.49140577 0.39339931 0.03983001
 0.14197346 0.74934436 0.66353073 0.36138061 0.89426466 0.55953116
 0.55446594 0.07152543 0.28570505 0.02575699 0.69

### Slicing
- Slicing doesn't copy data instead it points to the index.
- If our algorithm is not changing the elements, slicing give a very effecient and faster access to elements or subarrays

In [32]:
a = np.arange(100)
b = a[3:10]   # starting at index 3 and ending at 9
print(b)

b[0] = -1200
print(a)

b = a[3:10].copy() # copies the memory

print(a[::5])  # picks element after 5 elements
print(a[::-5])  # picks element after 5 elements but in reverse. To reverse array -1

[3 4 5 6 7 8 9]
[    0     1     2 -1200     4     5     6     7     8     9    10    11
    12    13    14    15    16    17    18    19    20    21    22    23
    24    25    26    27    28    29    30    31    32    33    34    35
    36    37    38    39    40    41    42    43    44    45    46    47
    48    49    50    51    52    53    54    55    56    57    58    59
    60    61    62    63    64    65    66    67    68    69    70    71
    72    73    74    75    76    77    78    79    80    81    82    83
    84    85    86    87    88    89    90    91    92    93    94    95
    96    97    98    99]
[ 0  5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95]
[99 94 89 84 79 74 69 64 59 54 49 44 39 34 29 24 19 14  9  4]


### Indexing Functions

In [35]:
# Find where -1200 is located in array a.

idx = np.argwhere(a==-1200)[0][0]  # argwhere returns 2-d array, [0][0] gives exact index
print(idx)

a[idx] = 3
print(a)

3
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


In [41]:
a = np.round(10*np.random.rand(5,4))  # multiply by 10 and rounds up
print(a)

print(a[1,2])
print(a[1,:])  # access the whole 2nd row
print(a[:,1])  # access the whole 2nd col
print(a[1:3,2:4])
print(a.T)  # transpose the matrix

[[6. 4. 4. 5.]
 [5. 4. 4. 2.]
 [1. 6. 4. 0.]
 [2. 7. 6. 1.]
 [3. 0. 7. 7.]]
4.0
[5. 4. 4. 2.]
[4. 4. 6. 7. 0.]
[[4. 2.]
 [4. 0.]]
[[6. 5. 1. 2. 3.]
 [4. 4. 6. 7. 0.]
 [4. 4. 4. 6. 7.]
 [5. 2. 0. 1. 7.]]


In [42]:
# Linear Algebra library

import numpy.linalg as la

print(la.inv(np.random.rand(3,3)))  # gives inverse of the values of elements

[[ 0.71841545 -0.7583711   0.54378711]
 [-1.62468677 -0.87233762  2.12181666]
 [ 1.2159278   1.86084555 -1.6059509 ]]


In [44]:
print(a)
a.sort(axis=0)   # every col is sorted
print(a)
a.sort(axis=1)   # every row is sorted
print(a)

[[6. 4. 4. 5.]
 [5. 4. 4. 2.]
 [1. 6. 4. 0.]
 [2. 7. 6. 1.]
 [3. 0. 7. 7.]]
[[1. 0. 4. 0.]
 [2. 4. 4. 1.]
 [3. 4. 4. 2.]
 [5. 6. 6. 5.]
 [6. 7. 7. 7.]]
[[0. 0. 1. 4.]
 [1. 2. 4. 4.]
 [2. 3. 4. 4.]
 [5. 5. 6. 6.]
 [6. 7. 7. 7.]]


In [63]:
# a[index_array]
a = np.arange(10)
b = a[[1,4,6]]  # index 1, 4 and 6 elements. It creates copy.
print(b)

b[0] = -4
print(b)
print(a)  # a doesn't changes

print(a[[True,True,False,False,True,True,True,False,True,False]])  # prints the true indices

print(a[a<8])

b = a[(a<6) & (a>2)]
print(b)

# & is used for arrays but 'and' is used for objects

[1 4 6]
[-4  4  6]
[0 1 2 3 4 5 6 7 8 9]
[0 1 4 5 6 8]
[0 1 2 3 4 5 6 7]
[3 4 5]


### Broadcasting
- Help to apply changes by matching the dimension ans size of the matrix.

In [59]:
print(a)
b = a + 5
print(b)

[0 1 2 3 4 5 6 7 8 9]
[ 5  6  7  8  9 10 11 12 13 14]


### Horizontal, Vertical Stack and Sorting
- These are Universal functions. They are very fast as their implementation is vectorized(time complexity in implemention at loop layer is reduced)
- np.hstack concatenate 2 arrays horizontlly if possible
- np.hstack concatenate 2 arrays vertically if possible
- 

In [64]:
a = np.round(10*np.random.rand(2,3))
print(a)
b = np.round(10*np.random.rand(2,2))
print(b)
c = np.hstack((a,b))  # a,b.. matrices should be in tuple
print(c)

[[ 7.  8.  8.]
 [ 9. 10.  2.]]
[[9. 1.]
 [7. 9.]]
[[ 7.  8.  8.  9.  1.]
 [ 9. 10.  2.  7.  9.]]


In [70]:
a = np.random.permutation(np.arange(10))
print(a)
a.sort()
print(a)

a = np.array(["abc", 'how are you', 'u765', '13er'])
a.sort()
print(a)

[1 6 7 2 0 8 9 3 4 5]
[0 1 2 3 4 5 6 7 8 9]
['13er' 'abc' 'how are you' 'u765']


### Time Complexity Comparison

In [74]:
b = np.random.rand(1000000)
%timeit sum(b)
%timeit np.sum(b)  # universal function

def mySum(g):      # custom function
    s = 0
    for x in g:
        s += x
    return s

%timeit mySum(b)

39.6 ms ± 2.92 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
202 μs ± 19.3 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
42.4 ms ± 718 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
