In [1]:
import numpy as np

In [3]:
arr=np.array([1,2,3,4,5])
arr

array([1, 2, 3, 4, 5])

In [25]:
np.random.rand(5)

array([0.43188515, 0.75600575, 0.42198728, 0.7853954 , 0.76968013])

In [31]:
np.random.randint(1,10,5)

array([3, 5, 5, 9, 2])

**Broadcasting**

*Broadcasting allows NumPy to perform operations on arrays of different shapes in a way that makes their shapes compatible. For example, adding a scalar to an array will add the scalar to every element in the array. Broadcasting is efficient because it avoids creating redundant arrays and performs operations directly on arrays of unequal shapes, if compatible.*

In [13]:
arr+5

array([ 6,  7,  8,  9, 10])

In [22]:
arr1=np.array([1,3,3])
arr2=np.array([[1],[3],[4]])
arr1+arr2

array([[2, 4, 4],
       [4, 6, 6],
       [5, 7, 7]])

**Mean Median Std**

In [25]:
np.mean(arr)

3.0

In [27]:
np.median(arr)

3.0

In [32]:
np.std(arr)

1.4142135623730951

**slicing**

In [36]:
arr[1:3]

array([2, 3])

In [42]:
arr2=np.array([[1,2,3],[3,4,5],[43,3,2]])
arr2[:2,:2]

array([[1, 2],
       [3, 4]])

**reshape**

In [45]:
arr=np.array([1,2,3,4,5,6])
arr.shape

(6,)

In [51]:
arr_reshape=arr.reshape(2,3)
arr_reshape.shape

(2, 3)

In [57]:
arr_reshape=arr.reshape(3,2)
print(arr_reshape.shape)
print(arr_reshape)

(3, 2)
[[1 2]
 [3 4]
 [5 6]]


**Aggregation funtions**

In [64]:
print('sum= ',np.sum(arr))
print('mean= ',np.mean(arr))
print('median= ',np.median(arr))
print('max= ',np.max(arr))
print('min= ',np.min(arr))
print('std= ',np.std(arr))


sum=  21
mean=  3.5
median=  3.5
max=  6
min=  1
std=  1.707825127659933


**Concatenate**

In [75]:
arr1=np.array([1,2,3])
arr2=np.array([3,21,1])
np.concatenate((arr1,arr2))

array([ 1,  2,  3,  3, 21,  1])

In [77]:
arr1=np.array([[1,2,3],[2,1,3]])
arr2=np.array([3,21,1])
np.concatenate((arr1,arr2))

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [79]:
arr1=np.array([[1,2,3],[2,1,3]])
arr2=np.array([3,21,1])
np.concatenate((arr1,arr2),axis=None)

array([ 1,  2,  3,  2,  1,  3,  3, 21,  1])

**handiling missing value**

In [82]:
arrna=np.array([2,3,np.nan,5,3])
arrna

array([ 2.,  3., nan,  5.,  3.])

In [96]:
np.isnan(arrna)

array([False, False,  True, False, False])

In [86]:
print('sum= ',np.nansum(arrna))
print('mean= ',np.nanmean(arrna))
print('median= ',np.nanmedian(arrna))
print('max= ',np.nanmax(arrna))
print('min= ',np.nanmin(arrna))
print('std= ',np.nanstd(arrna))

sum=  13.0
mean=  3.25
median=  3.0
max=  5.0
min=  2.0
std=  1.0897247358851685


**np.copy() and =**

In [101]:
arr=np.array([2,3,4,2,1])
arr_copy=np.copy(arr)
arr_reference=arr
arr_copy[3]=23
print("arr_copy=",arr_copy)
arr_reference[2]=44
print("arr_reference=",arr_reference)
print("arr_orginal=",arr)


arr_copy= [ 2  3  4 23  1]
arr_reference= [ 2  3 44  2  1]
arr_orginal= [ 2  3 44  2  1]


**element wise multiplication**

In [104]:
arr1=np.array([1,2,3,4,5])
arr2=np.array([1,8,2,6,4])
arr1*arr2

array([ 1, 16,  6, 24, 20])

In [106]:
np.multiply(arr1,arr2)

array([ 1, 16,  6, 24, 20])

**unique element**

In [115]:
arr2=np.array([1,8,2,2,4,6,4])
np.unique(arr2)

array([1, 2, 4, 6, 8])

In [117]:
np.unique(arr2,return_counts=True)

(array([1, 2, 4, 6, 8]), array([1, 2, 2, 1, 1], dtype=int64))

**Matrix multiplication**

In [120]:
matrix1 = np.array([[1, 2], [3, 4]])
matrix2 = np.array([[5, 6], [7, 8]])
np.dot(matrix1,matrix2)

array([[19, 22],
       [43, 50]])

**sorting by multiple columns**

In [126]:
matrix1 = np.array([[1, 2], [3, 4]])
arr[np.lexsort((matrix1[:,1],matrix1[:,0]))]

array([2, 3])

In [128]:
array = np.array([[1, 3], [2, 2], [1, 2]])
sorted_array = array[np.lexsort((array[:, 1], array[:, 0]))]
print(sorted_array)


[[1 2]
 [1 3]
 [2 2]]


**Custom funtion to perform on element-wise**

In [131]:
def cust(x):
    return x+5
vectorized_func=np.vectorize(cust)
arr2=np.array([1,8,2,2,4,6,4])
vectorized_func(arr2)


array([ 6, 13,  7,  7,  9, 11,  9])

**create memory-mapped arrays that reside on disk rather than in RAM**

In [136]:
# np.memmap()

**get indices for given condition**

In [141]:
arr2=np.array([1,8,2,2,4,6,4])
np.where(arr2>4)

(array([1, 5], dtype=int64),)

**np.all() and np.any()**

In [144]:
arr2=np.array([1,8,2,2,4,6,4])
np.all(arr2>2)

False

In [146]:
np.any(arr2>2)

True

**Euclidean distance**

In [151]:
point1=np.array([1,2])
point2=np.array([4,5])
np.linalg.norm(point1-point2)

4.242640687119285

In [153]:
np.sqrt(np.sum((point1-point2)**2))

4.242640687119285

**Normalizing**

In [161]:
arr=np.array([1,2,4,3])
new_arr=(arr-arr.min())/(arr.max()-arr.min())
new_arr

array([0.        , 0.33333333, 1.        , 0.66666667])

In [164]:
(arr-np.mean(arr))/np.std(arr)

array([-1.34164079, -0.4472136 ,  1.34164079,  0.4472136 ])

**Correlation matrix**

In [171]:
array = np.array([[1, 3], [2, 2], [1, 2]])
np.corrcoef(array,rowvar=False)

array([[ 1. , -0.5],
       [-0.5,  1. ]])

**Cumulative sum**

In [174]:
arr

array([1, 2, 4, 3])

In [176]:
np.cumsum(arr)

array([ 1,  3,  7, 10])

**Weighted avg**

In [181]:
data = np.array([10, 20, 30])
weights = np.array([1, 2, 3])
np.average(data,weights=weights)

23.333333333333332

## Linear Algebra

**Creating and Transposing Matrices**

In [185]:
A = np.array([[1, 2], [3, 4]])
A

array([[1, 2],
       [3, 4]])

In [187]:
A.T #Transpose

array([[1, 3],
       [2, 4]])

**Matrix Multiplication**

In [192]:
A*A #element wise

array([[ 1,  4],
       [ 9, 16]])

In [194]:
np.dot(A,A)

array([[ 7, 10],
       [15, 22]])

**Determinant**

In [199]:
np.linalg.det(A)

-2.0000000000000004

**Inverse**

In [202]:
np.linalg.inv(A)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

**Eigenvalues and Eigenvectors**

In [205]:
eig_val,eig_vect=np.linalg.eig(A)

In [209]:
eig_val

array([-0.37228132,  5.37228132])

In [211]:
eig_vect

array([[-0.82456484, -0.41597356],
       [ 0.56576746, -0.90937671]])

 **Solving Linear Equations**

In [214]:
b=np.array([2,3])
x=np.linalg.solve(A,b)
x

array([-1. ,  1.5])

**Norms of a Vector or Matrix** 

*Euclidean norm & Manhattan norm*

In [217]:
vec=np.array([1,2])
np.linalg.norm(vec,ord=2) # Euclidean norm

2.23606797749979

In [219]:
np.linalg.norm(vec, ord=1)  # Manhattan norm


3.0

In [37]:
dtype=[('name','U10'),('age','i4')]
arr=np.array([('Arshid',24),('Arun',25)],dtype=dtype)

import pandas as pd
df=pd.DataFrame(arr)

In [39]:
df

Unnamed: 0,name,age
0,Arshid,24
1,Arun,25
