## **Step 1: Getting Started with NumPy**

In [1]:
!pip install numpy



In [2]:
import numpy as np

## **Step 2: Basics — Arrays**

In [4]:
# From Python list
arr = np.array([1, 2, 3, 4, 5])
print(arr)

# 2D array
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2d)

[1 2 3 4 5]
[[1 2 3]
 [4 5 6]]


In [5]:
print(arr.ndim)   # Dimensions (1D, 2D, 3D...)
print(arr.shape)  # Shape (rows, cols)
print(arr.size)   # Total number of elements
print(arr.dtype)  # Data type

1
(5,)
5
int64


In [6]:
print(arr2d.ndim)   # Dimensions (1D, 2D, 3D...)
print(arr2d.shape)  # Shape (rows, cols)
print(arr2d.size)   # Total number of elements
print(arr2d.dtype)  # Data type


2
(2, 3)
6
int64


In [16]:
#Special Arrays
print(np.zeros((3, 3)))        # 3x3 matrix of zeros
print(np.ones((2, 4)))         # 2x4 matrix of ones
print(np.full((2, 2), 7))      # 2x2 matrix filled with 7
print(np.eye(3))               # 3x3 Identity matrix
print(np.arange(0, 10, 2))     # [0, 2, 4, 6, 8]
print(np.linspace(0, 2, 10))    # [0, 0.25, 0.5, 0.75, 1]

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[7 7]
 [7 7]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[0 2 4 6 8]
[0.         0.22222222 0.44444444 0.66666667 0.88888889 1.11111111
 1.33333333 1.55555556 1.77777778 2.        ]


## **Step 3: Indexing & Slicing**

In [17]:
arr = np.array([10, 20, 30, 40, 50])
print(arr[0])     # First element
print(arr[-1])    # Last element
print(arr[1:4])   # Slice [20, 30, 40]

10
50
[20 30 40]


In [18]:
arr2d = np.array([[1, 2, 3], [4, 5, 6]])

print(arr2d[0, 1])     # Row 0, Col 1 → 2
print(arr2d[:, 1])     # Entire Column 1 → [2, 5]
print(arr2d[1, :])     # Entire Row 1 → [4, 5, 6]

2
[2 5]
[4 5 6]


## **Step 4: Math Operations & Vectorization**

In [19]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print(a + b)   # [5 7 9]
print(a - b)   # [-3 -3 -3]
print(a * b)   # [4 10 18]
print(a / b)   # [0.25 0.4  0.5 ]
print(a ** 2)  # [1 4 9]

[5 7 9]
[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]
[1 4 9]


In [20]:
# Universal Functions (ufuncs) NumPy provides fast built-in math functions:
arr = np.array([1, 2, 3, 4])

print(np.sqrt(arr))    # Square root
print(np.exp(arr))     # Exponential
print(np.log(arr))     # Natural log
print(np.sin(arr))     # Sine
print(np.sum(arr))     # Sum
print(np.mean(arr))    # Average
print(np.max(arr))     # Max
print(np.min(arr))     # Min

[1.         1.41421356 1.73205081 2.        ]
[ 2.71828183  7.3890561  20.08553692 54.59815003]
[0.         0.69314718 1.09861229 1.38629436]
[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]
10
2.5
4
1


## **Step 5: Broadcasting (Superpower)**

In [21]:
# Broadcasting lets NumPy handle different-shaped arrays without writing loops.
a = np.array([1, 2, 3])
b = 2

print(a + b)   # [3 4 5]  ← scalar broadcasted to each element

[3 4 5]


In [24]:
# 2D + 1D example:
A = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])

print(A + b)
# [[11 22 33]
#  [14 25 36]]
# Rule: NumPy stretches the smaller array to match the shape.

[[11 22 33]
 [14 25 36]]


## **Step 6: Matrix Operations (Linear Algebra)**

In [25]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

print(np.dot(A, B))      # Matrix multiplication
print(A @ B)             # Same as dot
print(np.transpose(A))   # Transpose
print(np.linalg.inv(A))  # Inverse
print(np.linalg.det(A))  # Determinant
print(np.linalg.eig(A))  # Eigenvalues & eigenvectors

[[19 22]
 [43 50]]
[[19 22]
 [43 50]]
[[1 3]
 [2 4]]
[[-2.   1. ]
 [ 1.5 -0.5]]
-2.0000000000000004
EigResult(eigenvalues=array([-0.37228132,  5.37228132]), eigenvectors=array([[-0.82456484, -0.41597356],
       [ 0.56576746, -0.90937671]]))


## **Step 7: Advanced Indexing & Reshaping**

In [26]:
# Boolean Indexing (filtering arrays)
arr = np.array([10, 20, 30, 40, 50])

mask = arr > 25
print(mask)        # [False False  True  True  True]
print(arr[mask])   # [30 40 50]

# Useful for filtering datasets (e.g., selecting rows with certain conditions).

[False False  True  True  True]
[30 40 50]


In [29]:
# Fancy Indexing
arr = np.array([10, 20, 30, 40, 50])

print(arr[[0, 2, 4]])   # Pick elements at indices 0,2,4 → [10 30 50]

# 2D example
arr2d = np.array([[1, 2], [3, 4], [5, 6]])

print(arr2d[[0, 2]])     # Select rows 0 and 2
print(arr2d[[0, 2], [1, 0]])  # Select (0,1) and (2,0) → [2, 5]

[10 30 50]
[[1 2]
 [5 6]]
[2 5]


In [31]:
# Sorting
arr = np.array([40, 10, 50, 20, 30])

print(np.sort(arr))   # [10 20 30 40 50]
print(arr.argsort())  # indices of sorted order → [1 3 4 0 2]

#2D sort
arr2d = np.array([[3, 1], [7, 2]])

print(np.sort(arr2d, axis=0))  # Sort by column
print(np.sort(arr2d, axis=1))  # Sort by row


[10 20 30 40 50]
[1 3 4 0 2]
[[3 1]
 [7 2]]
[[1 3]
 [2 7]]


In [32]:
# Reshaping & Flattening
arr = np.arange(12)  # [0 1 2 ... 11]

reshaped = arr.reshape(3, 4)
print(reshaped)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

print(arr.ravel())   # Flatten to 1D
print(arr.reshape(-1, 6))  # NumPy infers shape automatically

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]


In [33]:
# Stacking & Splitting
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print(np.stack([a, b]))       # Stack into 2D
print(np.vstack([a, b]))      # Vertical stack
print(np.hstack([a, b]))      # Horizontal stack

c = np.arange(9).reshape(3, 3)
print(np.split(c, 3))         # Split into 3 parts

[[1 2 3]
 [4 5 6]]
[[1 2 3]
 [4 5 6]]
[1 2 3 4 5 6]
[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]


## **Step 8: Random Numbers (very important in ML)**

In [34]:
np.random.seed(42)   # Fix seed for reproducibility

print(np.random.rand(3))        # Uniform [0,1)
print(np.random.randn(3))       # Normal distribution
print(np.random.randint(1, 10)) # Random integer
print(np.random.choice([1, 2, 3, 4], size=5))  # Random sampling
print(np.random.permutation(10)) # Random permutation

[0.37454012 0.95071431 0.73199394]
[-1.11188012  0.31890218  0.27904129]
8
[3 2 1 2 4]
[9 2 8 6 0 4 3 7 1 5]


## **Step 9: Performance & Optimization Tricks**

In [35]:
# Vectorization vs Loops
# Bad (Python loop):
arr = np.arange(1_000_000)

# Slow way
squared_loop = [x**2 for x in arr]

In [37]:
# Good (vectorized NumPy):
squared_vec = arr ** 2
# NumPy is written in C, so vectorized operations run ~100x faster.
# Rule: Avoid Python loops → always think in array operations.#

In [39]:
# Broadcasting Hacks
# Compute pairwise sum of two arrays without loops:
a = np.array([1, 2, 3])
b = np.array([10, 20, 30])

# Broadcast to 3x3
print(a[:, None])
print(b)
print(a[:, None] + b)
# [[11 21 31]
#  [12 22 32]
#  [13 23 33]]
# a[:, None] reshapes a into a column vector → enabling broadcasting.

[[1]
 [2]
 [3]]
[10 20 30]
[[11 21 31]
 [12 22 32]
 [13 23 33]]


In [41]:
# Memory Views (No Copy)
# Slicing does not copy data, just gives a "view":
arr = np.arange(10)
sub = arr[2:6]   # View, not a copy
sub[0] = 999

print(arr)  # [0 1 999 3 4 5 6 7 8 9]

# an independent copy:
sub_copy = arr[2:6].copy()
print(sub_copy)

[  0   1 999   3   4   5   6   7   8   9]
[999   3   4   5]


In [43]:
# Strides Trick, NumPy internally uses strides (steps in memory).
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr.strides)
# (24, 8) on 64-bit (each row is 24 bytes, each element 8 bytes)
# Advanced libs (like TensorFlow/PyTorch) rely heavily on stride tricks for speed.

(24, 8)


In [44]:
# In-place Operations (save memory)
arr = np.ones((1000, 1000))

arr *= 2    # In-place multiplication
# Faster + less memory used than arr = arr * 2.

In [49]:
# np.where & np.select (conditional logic)
arr = np.array([10, 20, 30, 40])

print(np.where(arr > 20, "big", "small"))
# ['small' 'small' 'big' 'big']

# Multiple conditions:

grades = np.array([85, 60, 45])

conditions = [grades >= 80, grades >= 50, grades < 50]
choices = ["A", "B", "F"]

print(np.select(conditions, choices, default="Unknown")) # This ensures you won’t get a TypeError, default="Unknown"

['small' 'small' 'big' 'big']
['A' 'B' 'F']


In [50]:
# np.unique, np.bincount (categorical tricks)
# Useful for label encoding in ML datasets.
arr = np.array([1, 2, 2, 3, 3, 3])

print(np.unique(arr))            # [1 2 3]
print(np.unique(arr, return_counts=True))  # ([1 2 3], [1 2 3])
print(np.bincount(arr))          # Frequency count

[1 2 3]
(array([1, 2, 3]), array([1, 2, 3]))
[0 1 2 3]


In [51]:
# Save & Load Arrays
np.save("myarray.npy", arr)        # Save
loaded = np.load("myarray.npy")    # Load
# For large datasets:
np.savez("many_arrays.npz", a=a, b=b)