📦 Phase 1.2: NumPy (Numerical Python)
🔍 Why it matters:

Backbone of ML/AI: fast array operations, math, and matrix ops

Used in nearly every AI pipeline (especially with vectors & tensors)
yes

Speed: Vectorized operations using C under the hood.

Memory-efficient: Works with large datasets better than Python lists.

Base for ML: Libraries like TensorFlow, PyTorch, and scikit-learn rely heavily on NumPy arrays.

✅ NumPy Core Concepts Checklist:
Concept	Why It Matters
np.array	Create arrays (1D, 2D, ND)
Indexing/Slicing	Access & manipulate data
Array Math (add, mult, etc)	Efficient vectorized computation
Shape, Reshape, Flatten	Manage tensor shapes (important for models)
Broadcasting	Auto-matching shapes in math ops
Random module	Generate data / test data
Axis & Aggregations	Sum, mean, etc. across rows/columns

In [2]:
#create array
import numpy as np

arr_1d = np.array([1, 2, 3])
arr_2d = np.array([[3, 4], [5, 6]])

a = np.array([2, 3])
b=np.array([1, 2, 3])

print(arr_1d)
print(arr_2d)

[1 2 3]
[[3 4]
 [5 6]]


In [3]:
#index slicing
print(arr_2d[0][1])
print(arr_1d[:2])
print(arr_2d[1:, 1:])
print(arr_2d[:,0])

4
[1 2]
[[6]]
[3 5]


In [4]:
#math
print(arr_1d + b)
print(arr_1d * b)

[2 4 6]
[1 4 9]


In [8]:
#broadcasting
print(arr_2d+ a)

ar = np.array([[1, 2, 3],
                [4, 5, 6]])

# Add 1 to every element
print(ar + 1)

# Add row-wise
row = np.array([10, 20, 30])
print(ar + row)

# Add column-wise
col = np.array([[100], [200]])
print(ar + col)

[[5 7]
 [7 9]]
[[2 3 4]
 [5 6 7]]
[[11 22 33]
 [14 25 36]]
[[101 102 103]
 [204 205 206]]


In [None]:
#method
print(np.sum(arr_2d))
print(np.mean(arr_2d))
print(np.max(arr_2d))
print(np.min(arr_2d))
print(arr_2d.std())
print(arr_2d.var())
print(np.median(arr_2d))
print(np.percentile(arr_2d, 25))

print(arr_2d.shape)
print(arr_2d.dtype)
print(arr_2d.size)
print(arr_2d.ndim)
print(arr_2d.reshape(4, 1))
print(arr_2d.flatten()) #Reshaping images for models, flattening before feeding into layers, normalizing using broadcasting.

18
4.5
6
3
1.118033988749895
1.25
4.5
3.75
(2, 2)
int64
4
2
[[3]
 [4]
 [5]
 [6]]
[3 4 5 6]


In [25]:
#generate data
np.zeros((2, 3))  
print(np.zeros((2,3)))
print(np.ones((2,3)))
print(np.random.rand(2,3))
print(np.random.randn(3,3)) #normal distribution float values
print(np.random.randint(0,10, size=(2,2)))

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[0.45881033 0.16526211 0.47054054]
 [0.89370606 0.82776201 0.8738129 ]]
[[ 0.11605705  0.47561646 -1.1488125 ]
 [-0.39343248  0.34704373 -0.33473023]
 [-0.7548672  -0.21132304  0.19933317]]
[[9 9]
 [1 9]]


In [None]:
#axis aggregation
print(np.sum(arr_2d, axis=0))
print(np.sum(arr_2d, axis=1))
print(np.mean(arr_2d, axis=0))
print(np.mean(arr_2d, axis=1))

print(arr_2d.sum(axis=0))
print(arr_2d.sum(axis=1))

[ 8 10]
[ 7 11]
[4. 5.]
[3.5 5.5]


In [28]:
#combine everything
a = np.random.randint(1, 10, (3, 3))
print("Matrix:\n", a)
print("Slice center:", a[1:, 1:])
print("Sum across rows:", np.sum(a, axis=1))
print("Sum across cols:", np.sum(a, axis=0))

b = np.array([1, 2, 3])
print("Broadcast result:\n", a + b)


Matrix:
 [[5 5 8]
 [8 2 6]
 [3 7 5]]
Slice center: [[2 6]
 [7 5]]
Sum across rows: [18 16 15]
Sum across cols: [16 14 19]
Broadcast result:
 [[ 6  7 11]
 [ 9  4  9]
 [ 4  9  8]]


In [None]:
#matrix calculator
x = np.random.randint(1, 10, size=(2, 2))
y = np.random.randint(1, 10, size=(2, 2))
print(x)
print(y)

#add
print(x+y)

#substract
print(x-y)

#multiply
print(x*y)

#power
print(x**y)

#transpose
print(np.transpose(x))

# Inverse (only if matrix is invertible)
try:
    inverse_x = np.linalg.inv(x)
    print("\nInverse of X:\n", inverse_x)
except np.linalg.LinAlgError:
    print("\nX is not invertible.")

[[7 5]
 [1 7]]
[[9 5]
 [7 5]]
[[16 10]
 [ 8 12]]
[[-2  0]
 [-6  2]]
[[63 25]
 [ 7 35]]
[[7 1]
 [5 7]]

Inverse of X:
 [[ 0.15909091 -0.11363636]
 [-0.02272727  0.15909091]]


In [19]:
#🚀 STEP 5: Boolean Masking & Conditional Filtering in NumPy
#✅ Boolean Masking
arr = np.array([10, 20, 30, 40, 50])
mask = arr > 25
print(mask)         # [False False  True  True  True]
print(arr[mask])    # [30 40 50]

#✅ Direct Conditional Filter - print values less than 40
print(arr[arr < 40])   # [10 20 30]

#✅ Modify values conditionally - 0 to less than 30
arr[arr < 30] = 0
print(arr)  # [ 0  0 30 40 50]

#✅ Logical Operators
arr = np.array([10, 20, 30, 40, 50])
print(arr[(arr > 20) & (arr < 45)])   # [30 40]

grades = np.array([45, 85, 30, 90])
result = np.where(grades >= 50, 'Pass', 'Fail')
print(result)  # ['Fail' 'Pass' 'Fail' 'Pass']

print((arr > 30).sum())     # How many values > 30
print(np.all(arr > 5))      # Are all elements > 5? → True
print(np.any(arr > 100))    # Any value > 100? → False



[False False  True  True  True]
[30 40 50]
[10 20 30]
[ 0  0 30 40 50]
[30 40]
['Fail' 'Pass' 'Fail' 'Pass']
2
True
False


In [13]:
# 🚀 STEP 6: Stacking & Splitting Arrays in NumPy
# These operations simulate real-world scenarios like merging data or segmenting arrays (e.g., image channels, sensor data).

# 🔷 1. Stacking Arrays
# ✅ np.vstack (vertical stack — row-wise) = axis=0 np.stack
a = np.array([1, 2])
b = np.array([3, 4])
stacked = np.vstack((a, b))
print(stacked)
# [[1 2]
#  [3 4]]

#✅ np.hstack (horizontal stack — column-wise)
hstacked = np.hstack((a, b))
print(hstacked)  # [1 2 3 4]

#✅ np.stack (combine along a new axis)
stacked = np.stack((a, b), axis=0)
print(stacked)
# [[1 2]
#  [3 4]]

stacked_axis1 = np.stack((a, b), axis=1)
print(stacked_axis1)
# [[1 3]
#  [2 4]]


[[1 2]
 [3 4]]
[1 2 3 4]
[[1 2]
 [3 4]]
[[1 3]
 [2 4]]


In [35]:
# 🔷 2. Splitting Arrays
# ✅ np.split (split evenly)
arr = np.array([10, 20, 30, 40, 50, 60])
splits = np.split(arr, 3)
print(splits)
# [array([10, 20]), array([30, 40]), array([50, 60])]

#✅ np.array_split (allows uneven splits)
splits = np.array_split(arr, 4)
print(splits)
# [array([10, 20]), array([30, 40]), array([50]), array([60])]

#✂️ 2. Splitting: Break into chunks
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

# Split into 2 parts along columns
split = np.hsplit(arr, 2)
print("hsplit:\n", split[0], "\n", split[1])



[array([10, 20]), array([30, 40]), array([50, 60])]
[array([10, 20]), array([30, 40]), array([50]), array([60])]
hsplit:
 [[1 2]
 [5 6]] 
 [[3 4]
 [7 8]]


In [None]:
x = np.array([30, 10, 20])
sorted_x = np.sort(x)
print("Sorted:", sorted_x)

# Argsort returns the original indexes of sorted values
print("Argsort:", np.argsort(x))

# 💡 Real Use:
# Stack images/text for batching
# Split dataset into training/testing
# Sort predictions or scores

Sorted: [10 20 30]
Argsort: [1 2 0]


✅ Mini Task:
Create a 3x4 NumPy array with random ints from 0–100.
Find:

Mean of each column

Row with highest sum

% of values > 50

In [34]:
arr = np.random.randint(0, 12, size=(3, 4))
print(arr, end="\n\n")

#Mean of each column
print(arr.mean(axis=0))

#Row with highest sum
print(np.max(arr.sum(axis=1)))
#OPTIONAL print(arr[arr.sum(axis=1).argmax()])
print(arr[arr > 50])

#% of values > 50
#print(np.percentile(arr[arr > 50], 100)) #This just gives values above 50 and the max of those.
percent_above_50 = (arr > 50).sum() / arr.size * 100
print(f"% of values > 50: {percent_above_50:.2f}%")



[[ 6  4  2  6]
 [ 7  3 10 11]
 [ 8  0  1  2]]

[7.         2.33333333 4.33333333 6.33333333]
31
[]
% of values > 50: 0.00%


✅ STEP 10: Real-World NumPy Mini Project — “📊 Student Score Analytics”
🎯 Objective:
You’re given raw exam data for 5 subjects and 100 students.

Perform analysis to:

Normalize scores
Calculate student averages
Identify top & bottom students
Assign grades based on percentile

In [58]:
#✅ Step 1: Simulate the data
import numpy as np

np.random.seed(42) #default - by system value, now by 42
scores = np.random.randint(35, 100, size=(100, 5))  # 100 students, 5 subjects
print("Shape:", scores.shape)
print(scores[:5])  # Preview


Shape: (100, 5)
[[86 49 95 55 58]
 [37 56 87 36 64]
 [72 36 98 94 55]
 [67 92 56 83 93]
 [76 94 49 96 96]]


z -> 🧠 Why we use it:
To scale all values between 0 and 1 → so that each feature/subject is on the same scale and contributes equally in ML models or data analysis.

axis=0 means column-wise operation (i.e., per subject).
It ensures each subject is normalized independently.

In [None]:
# ✅ Step 2: Normalize scores (0–1 range per subject)
# z= score-min/max-min = normalization 
# # print(scores[0:5] - scores.min(axis=0))
normalized = (scores - scores.min(axis=0)) / (scores.max(axis=0) - scores.min(axis=0))
print(normalized[:5])


[[0.796875   0.21875    0.95238095 0.3125     0.36507937]
 [0.03125    0.328125   0.82539683 0.015625   0.46031746]
 [0.578125   0.015625   1.         0.921875   0.31746032]
 [0.5        0.890625   0.33333333 0.75       0.92063492]
 [0.640625   0.921875   0.22222222 0.953125   0.96825397]]


In [None]:
# ✅ Step 3: Student-wise average score
averages = scores.mean(axis=1)
print("Top 5 averages:", averages[:5])

Top 5 averages: [68.6 56.  71.  78.2 82.2]


In [None]:
#✅ Step 4: Identify Top 5 and Bottom 5 students
top5_idx = np.argsort(averages)[-5:][::-1] #[::-1] - reverses array
bottom5_idx = np.argsort(averages)[:5]

print("Top 5 Student Indexes:", top5_idx)
print("Bottom 5 Student Indexes:", bottom5_idx)


Top 5 Student Indexes: [ 5 59  4 95 60]
Bottom 5 Student Indexes: [72 20 87 44 15]


In [None]:
#✅ Step 5: Assign Grades using percentiles
grades = []
for avg in averages:
    if avg >= np.percentile(averages, 90): #💡 Meaning: It checks if the variable avg is greater than or equal to the 90th percentile value in the averages array.
        grades.append("A")
    elif avg >= np.percentile(averages, 75):
        grades.append("B")
    elif avg >= np.percentile(averages, 50):
        grades.append("C")
    else:
        grades.append("D")

grades = np.array(grades)
print("Grades for first 10 students:", grades[:10])


78.2
78.2
78.2
78.2
78.2
78.2
78.2
78.2
78.2
78.2
78.2
Grades for first 10 students: ['C' 'D' 'C' 'A' 'A' 'A' 'D' 'D' 'D' 'C']


In [None]:
# 🧠 What is a percentile?
# The 90th percentile means:

# "The value below which 90% of the data falls." - top 10% data
import numpy as np

averages = np.array([50, 60, 70, 80, 90, 100])
print(np.percentile(averages, 90))
# There are 6 values, so:

# 90th percentile is at the position:
# 𝑃 = 90/100 * (N-1) 
#   = 0.9*5 = 4.5th position

# So we take the value midway between the 5th and 6th values:
# averages[4] = 90
# averages[5] = 100
# → Interpolated result:
# 90 + 0.5 * (100 - 90) = 90 + 5 = 95.0
# np.percentile(averages, 90) ➝ 95.0


average = np.array([1,2,3,4,5])
print(np.percentile(average, 90))
# This is between:
# average[3] = 4
# average[4] = 5

# Interpolate:
# Result=4+ 0.6×(5−4) = 4+0.6 = 4.6



95.0
4.6
