
# Data 101 — Module 5, Session 1
## NumPy Foundations (Demo Notebook)

This notebook demonstrates core NumPy operations for **Working with Data in Code**.
Run cells top to bottom.


In [1]:
import numpy as np

print("NumPy version:", np.__version__)

NumPy version: 2.3.1



## Python Lists vs. NumPy Arrays
- Lists are flexible and can hold mixed types.
- NumPy arrays are homogeneous and support fast vectorized math.


In [2]:
l = [1,2,3]
l*2

[1, 2, 3, 1, 2, 3]

In [3]:
a=np.array(l)
a*2 

array([2, 4, 6])


## Creating Arrays
Common constructors: `np.array`, `np.arange`, `np.zeros`, `np.ones`, `np.linspace`.


In [4]:
a = np.array([1, 2, 3])
print("a:", a)

a: [1 2 3]


In [9]:
b = np.arange(0, 10, 2)
print("b:", b)

b: [0 2 4 6 8]


In [6]:
z = np.zeros((2, 3))
print("z:\n", z)

z:
 [[0. 0. 0.]
 [0. 0. 0.]]


In [7]:
o = np.ones((3, 2))
print("o:\n", o)

o:
 [[1. 1.]
 [1. 1.]
 [1. 1.]]


In [8]:
l = np.linspace(0, 1, num=5)
print("l:", l)

l: [0.   0.25 0.5  0.75 1.  ]



## Array Attributes
Key properties: `.shape`, `.dtype`, `.ndim`, `.size`.


In [10]:

arr = np.arange(12).reshape(3, 4)
print("arr =\n", arr)

arr =
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [11]:
print("shape:", arr.shape)

shape: (3, 4)


In [12]:
print("dtype:", arr.dtype)

dtype: int64


In [13]:
print("ndim:", arr.ndim)

ndim: 2


In [14]:
print("size:", arr.size)

size: 12



## Indexing and Slicing
Use square brackets to access elements and subarrays.


In [15]:

x = np.arange(10)
print("x:", x)


x: [0 1 2 3 4 5 6 7 8 9]


In [16]:
print("x[0] =", x[0])

x[0] = 0


In [17]:
print("x[2:6] =", x[2:6])

x[2:6] = [2 3 4 5]


In [18]:
y = np.arange(12).reshape(3, 4)
print("\ny =\n", y)


y =
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [19]:
print("y[0, 1] =", y[0, 1])

y[0, 1] = 1


In [20]:
print("y[:, 0] =", y[:, 0])

y[:, 0] = [0 4 8]


In [21]:
print("y[1, :] =", y[1, :])

y[1, :] = [4 5 6 7]


In [22]:
print("y[0:2, 1:3] =\n", y[0:2, 1:3])

y[0:2, 1:3] =
 [[1 2]
 [5 6]]



## Elementwise Operations and Broadcasting
Vectorized math applies operations to entire arrays without explicit loops.


In [23]:

u = np.array([1, 2, 3, 4])
v = np.array([10, 10, 10, 10])

print("u + v =", u + v)
print("u * v =", u * v)
print("u ** 2 =", u ** 2)

u + v = [11 12 13 14]
u * v = [10 20 30 40]
u ** 2 = [ 1  4  9 16]


In [24]:
# Broadcasting with a scalar
print("u + 5 =", u + 5)

u + 5 = [6 7 8 9]


In [25]:
# Broadcasting with different shapes
M = np.arange(12).reshape(3, 4)
row = np.array([1, 0, -1, 2])
print("\nM =\n", M)
print("row =", row)


M =
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
row = [ 1  0 -1  2]


In [26]:
print("M + row =\n", M + row)

M + row =
 [[ 1  1  1  5]
 [ 5  5  5  9]
 [ 9  9  9 13]]



## Speed: Vectorization vs. Loops
Simple timing to compare Python loops and NumPy vectorization.


In [27]:

import time

n = 20_000_000  # 20M elements
py_list = list(range(n))

# Python loop
t0 = time.perf_counter()
py_out = [val * 2 for val in py_list]
t1 = time.perf_counter()

# NumPy vectorized
arr = np.arange(n)
t2 = time.perf_counter()
np_out = arr * 2
t3 = time.perf_counter()

print(f"Python list comprehension: {(t1 - t0):.4f} sec")
print(f"NumPy vectorized:          {(t3 - t2):.4f} sec")


Python list comprehension: 0.9000 sec
NumPy vectorized:          0.1095 sec



## Reshaping and Stacking
Change array shapes and combine arrays along axes.


In [28]:

A = np.arange(6).reshape(2, 3)
B = np.arange(6, 12).reshape(2, 3)

print("A =\n", A)
print("B =\n", B)


A =
 [[0 1 2]
 [3 4 5]]
B =
 [[ 6  7  8]
 [ 9 10 11]]


In [29]:
print("\nConcatenate rows (axis=0):\n", np.concatenate([A, B], axis=0))


Concatenate rows (axis=0):
 [[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [30]:
print("\nConcatenate cols (axis=1):\n", np.concatenate([A, B], axis=1))


Concatenate cols (axis=1):
 [[ 0  1  2  6  7  8]
 [ 3  4  5  9 10 11]]


In [31]:
print("\nVertical stack (vstack):\n", np.vstack([A, B]))


Vertical stack (vstack):
 [[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [32]:
print("\nHorizontal stack (hstack):\n", np.hstack([A, B]))


Horizontal stack (hstack):
 [[ 0  1  2  6  7  8]
 [ 3  4  5  9 10 11]]


In [33]:
print("\nFlatten A:", A.flatten())


Flatten A: [0 1 2 3 4 5]


In [34]:
print("B =\n", B)
print("Reshape B to (3, 2):\n", B.reshape(3, 2))

B =
 [[ 6  7  8]
 [ 9 10 11]]
Reshape B to (3, 2):
 [[ 6  7]
 [ 8  9]
 [10 11]]



## Example: Study Hours
Rows = students. Columns = weeks. Compute per-student and per-week means.


In [35]:

hours = np.array([
    [10, 12, 14],
    [8, 9, 7],
    [15, 16, 14],
    [6, 5, 7],
    [12, 11, 13]
])

print("Hours =\n", hours)
per_student = hours.mean(axis=1)
per_week = hours.mean(axis=0)

print("\nAverage per student:", per_student)
print("Average per week:", per_week)


Hours =
 [[10 12 14]
 [ 8  9  7]
 [15 16 14]
 [ 6  5  7]
 [12 11 13]]

Average per student: [12.  8. 15.  6. 12.]
Average per week: [10.2 10.6 11. ]



## Summary
- Arrays are fast and memory-efficient for numerical data.
- Key skills: creation, inspection, indexing, broadcasting, reshaping, stacking.
- This prepares you for tabular data with Pandas.
