In [3]:
# Imports and display settings
!pip install numpy

import numpy as np
np.set_printoptions(suppress=True, precision=3)

def describe(name, x):
    print(f"{name}: {x}")
    print(f"shape: {x.shape}, dtype: {x.dtype}")

print("NumPy version:", np.__version__)

Collecting numpy
  Downloading numpy-2.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.6 kB)
Downloading numpy-2.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m54.2 MB/s[0m  [33m0:00:00[0m6m0:00:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-2.4.0
NumPy version: 2.4.0


## 1D Arrays (Feature Vectors)
Represent simple features like counts or measurements.

In [4]:
# Simple feature vectors
a = np.array([1, 2, 3])
b = np.array([2, 1, 0])
describe("a", a)
describe("b", b)

# Access elements
print("a[0] =", a[0])
print("a[1:] =", a[1:])

a: [1 2 3]
shape: (3,), dtype: int64
b: [2 1 0]
shape: (3,), dtype: int64
a[0] = 1
a[1:] = [2 3]


In [None]:
# TODO: Create a 1D vector with 5 elements
# Example: counts or measurements
c = np.array  # Add your 5 numbers here
describe("c", c)

# TODO: Perform basic operations on your vector
print("c + 1 =", c + 1)
print("c - 1 =", c - 1)
print("2 * c =", 2 * c)
print("Slice c[1:4] =", ) # Print elements from index 1 to 3

### Student Tasks: 1D Vectors
Create practice vectors and perform basic operations.

## 2D Arrays (Samples or Token Sequences)
Each row is a sample; columns are features.
Example: token-count vectors for a tiny vocabulary (Unit 1.3 style).

In [7]:
# Tiny vocabulary: ['security', 'cloud', 'data']
# Rows represent short documents/token sequences with counts
X = np.array([
    [1, 2, 0],  # doc1: mentions security(1), cloud(2), data(0)
    [0, 1, 3],  # doc2
    [2, 0, 1],  # doc3
])
describe("X", X)
print("First row (doc1) =", X[0])
print("Column 'cloud' feature across docs =", X[:, 1])

X: [[1 2 0]
 [0 1 3]
 [2 0 1]]
shape: (3, 3), dtype: int64
First row (doc1) = [1 2 0]
Column 'cloud' feature across docs = [2 1 0]


In [None]:
# TODO: Create your own 2D array 'Z' with at least 4 samples and 3–5 features
Z = np.array([
    # Example row: [count_security, count_cloud, count_data]
    # TODO: add 4+ rows here
])
describe("Z", Z)
print("Pairwise dot products (Z):\n", Z @ Z.T)

### Student Tasks: 2D Arrays
Create a small dataset of samples (rows) and features (columns).

## Basic Operations
Addition, subtraction, and scalar multiplication are element-wise.

In [None]:
print("a + b =", a + b)
print("a - b =", a - b)
print("2 * a =", 2 * a)

# Apply operations to all samples
print("X + 1 (broadcast) =", X + 1)
print("X * 2 (broadcast) ", 2 * X)

## Dot Product
Measures alignment: large when vectors point in similar directions.

In [8]:
dot_ab = np.dot(a, b)
print("a · b =", dot_ab)

# Dot products between samples (rows of X)
print("doc1 · doc2 =", np.dot(X[0], X[1]))
print("doc1 · doc3 =", np.dot(X[0], X[2]))
print("doc2 · doc3 =", np.dot(X[1], X[2]))

# All pairwise dot products via matrix multiplication
pairwise_dot = X @ X.T
print("Pairwise dot products:", pairwise_dot)

a · b = 4
doc1 · doc2 = 2
doc1 · doc3 = 2
doc2 · doc3 = 3
Pairwise dot products: [[ 5  2  2]
 [ 2 10  3]
 [ 2  3  5]]


## Distances & Similarities
Distances (e.g., Euclidean) quantify how far vectors are.
Similarities (e.g., cosine) quantify how aligned vectors are.
These build intuition for embeddings in NLP/ML.

In [9]:
def l2_norm(x):
    return np.sqrt(np.sum(x * x))

def euclidean(u, v):
    return l2_norm(u - v)

def cosine(u, v):
    denom = l2_norm(u) * l2_norm(v)
    return (np.dot(u, v) / denom) if denom != 0 else 0.0

print("‖a‖₂ =", l2_norm(a))
print("‖b‖₂ =", l2_norm(b))
print("Euclidean(a, b) =", euclidean(a, b))
print("Cosine(a, b) =", cosine(a, b))

# Distances/similarities between documents
for i in range(len(X)):
    for j in range(i + 1, len(X)):
        d = euclidean(X[i], X[j])
        c = cosine(X[i], X[j])
        print(f"doc{i+1}-doc{j+1}: Euclidean={d:.3f}, Cosine={c:.3f}")

‖a‖₂ = 3.7416573867739413
‖b‖₂ = 2.23606797749979
Euclidean(a, b) = 3.3166247903554
Cosine(a, b) = 0.47809144373375745
doc1-doc2: Euclidean=3.317, Cosine=0.283
doc1-doc3: Euclidean=2.449, Cosine=0.400
doc2-doc3: Euclidean=3.000, Cosine=0.424


In [None]:
# TODO: Define another vector 'e' with the same length as 'c'
e = np.array([    ])  # Add your numbers here

# Compute distances and similarity
print("Euclidean(c, e) =", euclidean(c, e))
print("Cosine(c, e) =", cosine(c, e))

# Optional: Compare documents in your custom dataset Z (if created)
if 'Z' in globals():
    for i in range(len(Z)):
        for j in range(i + 1, len(Z)):
            print(f"Z doc{i+1}-doc{j+1}: d={euclidean(Z[i], Z[j]):.3f}, c={cosine(Z[i], Z[j]):.3f}")

### Student Tasks: Distances & Similarities
Compare your vectors and samples using Euclidean distance and cosine similarity.

## Mini Exercises (Unit 1.3 style)
1. Create your own 1D feature vectors (length 3–5).
2. Build a 2D array for at least 4 samples with a small vocabulary (3–5 tokens).
3. Compute pairwise dot products and interpret which samples are most similar.
4. Compute Euclidean distances and cosine similarities; compare rankings.
5. Scale one vector by a constant; observe effects on dot product and cosine.

In [None]:
# Your turn: edit and run these cells
# 1) Custom 1D vectors
u = np.array([1, 0, 2])
v = np.array([0, 3, 1])
print("u + v =", u + v)
print("u · v =", np.dot(u, v))
print("Euclidean(u, v) =", euclidean(u, v))
print("Cosine(u, v) =", cosine(u, v))

# 2) Custom 2D samples (replace with your own!)
Y = np.array([
    [1, 0, 1],
    [0, 2, 2],
    [2, 1, 0],
])
print("Pairwise dot (Y):", Y @ Y.T)
for i in range(len(Y)):
    for j in range(i + 1, len(Y)):
        print(f"Y doc{i+1}-doc{j+1}: d={euclidean(Y[i], Y[j]):.3f}, c={cosine(Y[i], Y[j]):.3f}")

---
**Summary:**
- Vectors encode features; geometry encodes similarity.
- Dot product grows with magnitude and alignment; cosine isolates alignment.
- Distances and similarities underpin embeddings intuition in Unit 1.3.

Experiment: modify counts, scale vectors, and observe changes.