## Numpy
Numpy is a funcdamental library for scientific computing in Python. It provides support for arrays and matrices, along with a collection of mathematical functions to operate on these data structures.

In [11]:
import numpy as np

# create an array using numpy
# 1D array
arr1 = np.array([1,2,3,4,5])
print(arr1, type(arr1))
arr1.shape

[1 2 3 4 5] <class 'numpy.ndarray'>


(5,)

In [6]:
arr2 = np.array([1,2,3,4,5,6]).reshape(1,6)    # reshape as 1 row 6 columns
arr2

array([[1, 2, 3, 4, 5, 6]])

In [7]:
arr2 = np.array([[1,2,3,4,5,6]])
arr2.shape

(1, 6)

In [10]:
# 2D array
arr3 = np.array([[1,2,3,4,5], [6,7,8,9,10]])
print(arr3)
arr3.shape

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]


(2, 5)

In [None]:
# Creating arrays using built in functions
arange = np.arange(1, 100, 10)
print(arange)

zeros = np.zeros((5,5))
print(zeros)

ones = np.ones((5,5))
print(ones)

identity = np.eye(4)
print(identity)

space = np.linspace(0, 10, 6)
print(space)

full = np.full((5,5), 5)   # first shape and then the constant number
print(full)

[ 1 11 21 31 41 51 61 71 81 91]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[ 0.  2.  4.  6.  8. 10.]
[[5 5 5 5 5]
 [5 5 5 5 5]
 [5 5 5 5 5]
 [5 5 5 5 5]
 [5 5 5 5 5]]


In [None]:
# Array attributes
print('Array: \n', ones)

print('Shape: \n', ones.shape)   # shape of the array as tuple (row, column)
print('Size: \n', ones.size)   # number of elements
print('Dimensions: \n', ones.ndim)   # dimensions of the array like 2 dimension
print('Transpose: \n', arr3.T)      # returns the transpose of the array
print('Datatype: \n', identity.dtype)   # datatype of the array
print('Itemsize: \n', ones.itemsize)     # size of each item in bytes

Array: 
 [[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
Shape: 
 (5, 5)
Size: 
 25
Dimensions: 
 2
Transpose: 
 [[ 1  6]
 [ 2  7]
 [ 3  8]
 [ 4  9]
 [ 5 10]]
Datatype: 
 float64
Itemsize: 
 8


In [None]:
# Numpy vectorized operations
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([10, 20, 30, 40, 50])

# Addition
print('Addition using vectorization: ', arr1 + arr2)

# Substraction
print('Substraction: ', arr1 - arr2)

# Multiplication
print('Substraction: ', arr1 * arr2)

# Division
print('Substraction: ', arr1 / arr2)

Addition using vectorization:  [11 22 33 44 55]
Substraction:  [ -9 -18 -27 -36 -45]
Substraction:  [ 10  40  90 160 250]
Substraction:  [0.1 0.1 0.1 0.1 0.1]


In [50]:
# Numpy Mathematical Functions

arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])

# mean
print(np.mean(arr))

# standard deviation
print(np.std(arr))

# variance
print(np.var(arr))

# minimum
print(np.min(arr))

# maximum
print(np.max(arr))

# sum
print(np.sum(arr))

# product 
print(np.prod(arr))

# median
print(np.median(arr))

# exponentiation
print(np.exp(arr))

5.5
2.8722813232690143
8.25
1
10
55
3628800
5.5
[[2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01
  1.48413159e+02]
 [4.03428793e+02 1.09663316e+03 2.98095799e+03 8.10308393e+03
  2.20264658e+04]]


In [63]:
# array indexing and slicing for 2 dimensional arrays
new_arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(new_arr)

# indexing :Array indexing starts from zero : for 2 dimensional arrays (row, col)
print(new_arr[0,0])  # 1
print(new_arr[1,2])  # 7
print(new_arr[2,3])  # 12

# slicing : You can slice a small portion of arrays using string slicing : (row_start:row_end, col_start, col_end)
print('First row: ', new_arr[0:1, 0:])   # here the order goes from (0:1) : from row_start to row_end - 1
print('First column: \n', new_arr[0:, 0:1])
print('Third column: \n', new_arr[0:, 2:3])
print("7 8 11 12 : \n", new_arr[1:, 2:])

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
1
7
12
First row:  [[1 2 3 4]]
First column: 
 [[1]
 [5]
 [9]]
Third column: 
 [[ 3]
 [ 7]
 [11]]
7 8 11 12 : 
 [[ 7  8]
 [11 12]]


In [64]:
# Real world use case: Normalization
# What is normalization -> Normalization is a technique to organize data to reduce redundancy and improve data integrity.  In machine learning, it's a feature scaling technique that adjusts the range of data, for example by scaling it up or down, to ensure all features have a similar scale for better model performance.
# Process:
# Min-Max Scaling: Rescales data to a specific range, typically between 0 and 1. 
# Z-Score Normalization (Standardization): Transforms data to have a mean of 0 and a standard deviation of 1. 

In [65]:
# I will be using Z-Score Normalization (Also know as standardization of data) :
data = np.array([1,2,3,4,5])

# step 1: calculate the mean and standard deviation
mean = np.mean(data)
std = np.std(data)

# step 2: Normalize the data
normalized_data = (data - mean) / std
print(f'Normalized data: {normalized_data}')

Normalized data: [-1.41421356 -0.70710678  0.          0.70710678  1.41421356]


In [76]:
# Logical operations
Data = np.array([1,2,3,4,5,6,7,8,9,10])

print(Data > 5)   # Returns an array of true and false values true for those which satisfy the condition and false for those who do not
print(Data < 8)

# Access the elements that satisfy the condition instead of true or false
print(Data[Data > 5])
print(Data[(Data >= 5) & (Data <= 8)])

[False False False False False  True  True  True  True  True]
[ True  True  True  True  True  True  True False False False]
[ 6  7  8  9 10]
[5 6 7 8]
