# Linear Algebra from Scratch

This notebook covers the fundamental concepts of linear algebra implemented from scratch using NumPy, focusing on the mathematical foundations needed for AI.

In [1]:
# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Tuple, List
import sys
import os

# Add project root to path
sys.path.append(os.path.join(os.path.dirname("__file__"), '..', '..'))

# Set style for plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## Mathematical Foundations of Linear Algebra in AI

Linear algebra is the backbone of machine learning and AI. Key concepts include:

1. **Vectors**: Represent data points and model parameters
2. **Matrices**: Represent datasets, transformations, and model weights
3. **Dot Product**: Measures similarity between vectors
4. **Matrix Multiplication**: Represents linear transformations
5. **Eigenvalues and Eigenvectors**: Used in dimensionality reduction
6. **SVD**: Used in recommendation systems and dimensionality reduction

In [2]:
# Implementing fundamental linear algebra operations from scratch

def vector_magnitude(v: np.ndarray) -> float:
    """
    Calculate the magnitude (norm) of a vector.
    
    Args:
        v: Input vector
        
    Returns:
        Magnitude of the vector
    """
    return np.sqrt(np.sum(v ** 2))

def dot_product(u: np.ndarray, v: np.ndarray) -> float:
    """
    Calculate the dot product of two vectors.
    
    Args:
        u: First vector
        v: Second vector
        
    Returns:
        Dot product of the vectors
    """
    return np.sum(u * v)

def vector_angle(u: np.ndarray, v: np.ndarray) -> float:
    """
    Calculate the angle between two vectors in radians.
    
    Args:
        u: First vector
        v: Second vector
        
    Returns:
        Angle between vectors in radians
    """
    dot = dot_product(u, v)
    mag_u = vector_magnitude(u)
    mag_v = vector_magnitude(v)
    
    cos_theta = dot / (mag_u * mag_v)
    # Clamp to avoid numerical errors
    cos_theta = np.clip(cos_theta, -1.0, 1.0)
    
    return np.arccos(cos_theta)

def matrix_multiply(A: np.ndarray, B: np.ndarray) -> np.ndarray:
    """
    Multiply two matrices A and B.
    
    Args:
        A: First matrix of shape (m, n)
        B: Second matrix of shape (n, p)
        
    Returns:
        Product matrix of shape (m, p)
    """
    if A.shape[1] != B.shape[0]:
        raise ValueError(f"Cannot multiply matrices with shapes {A.shape} and {B.shape}")
    
    m, n = A.shape
    _, p = B.shape
    
    result = np.zeros((m, p))
    
    for i in range(m):
        for j in range(p):
            for k in range(n):
                result[i, j] += A[i, k] * B[k, j]
    
    return result

def matrix_inverse(matrix: np.ndarray) -> np.ndarray:
    """
    Calculate the inverse of a matrix using Gauss-Jordan elimination.
    
    Args:
        matrix: Square matrix to invert
        
    Returns:
        Inverse of the matrix
    """
    if matrix.shape[0] != matrix.shape[1]:
        raise ValueError("Matrix must be square to compute inverse")
    
    n = matrix.shape[0]
    # Create augmented matrix [A|I]
    augmented = np.hstack([matrix.astype(float), np.eye(n)])
    
    # Forward elimination
    for i in range(n):
        # Find pivot
        max_row = np.argmax(np.abs(augmented[i:, i])) + i
        augmented[[i, max_row]] = augmented[[max_row, i]]
        
        # Check for singular matrix
        if abs(augmented[i, i]) < 1e-10:
            raise ValueError("Matrix is singular and cannot be inverted")
        
        # Scale pivot row
        augmented[i] = augmented[i] / augmented[i, i]
        
        # Eliminate column
        for j in range(n):
            if i != j:
                augmented[j] = augmented[j] - augmented[j, i] * augmented[i]
    
    return augmented[:, n:]

# Test our implementations
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])

print(f"Vector 1: {v1}")
print(f"Vector 2: {v2}")
print(f"Magnitude of v1: {vector_magnitude(v1):.4f}")
print(f"Dot product: {dot_product(v1, v2)}")
print(f"Angle between vectors: {vector_angle(v1, v2):.4f} radians")

Vector 1: [1 2 3]
Vector 2: [4 5 6]
Magnitude of v1: 3.7417
Dot product: 32
Angle between vectors: 0.2257 radians


In [3]:
# Eigenvalues and Eigenvectors from scratch

from typing import Tuple

def power_iteration(matrix: np.ndarray, max_iterations: int = 1000, tolerance: float = 1e-10) -> Tuple[float, np.ndarray]:
    """
    Find the dominant eigenvalue and eigenvector using power iteration.
    
    Args:
        matrix: Square matrix
        max_iterations: Maximum number of iterations
        tolerance: Convergence tolerance
        
    Returns:
        Tuple of (eigenvalue, eigenvector)
    """
    if matrix.shape[0] != matrix.shape[1]:
        raise ValueError("Matrix must be square")
    
    n = matrix.shape[0]
    x = np.random.rand(n)
    x = x / np.linalg.norm(x)  # Normalize initial vector
    
    for i in range(max_iterations):
        x_new = matrix @ x
        eigenvalue = x.T @ matrix @ x  # Rayleigh quotient
        x_new = x_new / np.linalg.norm(x_new)
        
        if np.allclose(x, x_new, rtol=tolerance):
            break
        
        x = x_new
    
    return eigenvalue, x_new

# Test power iteration
A = np.array([[4, 2], [1, 3]], dtype=float)
eigenval, eigenvec = power_iteration(A)
print(f"Dominant eigenvalue: {eigenval:.4f}")
print(f"Corresponding eigenvector: {eigenvec}")

# Verify with numpy
eigenvals_np, eigenvecs_np = np.linalg.eig(A)
print(f"NumPy eigenvalues: {eigenvals_np}")
print(f"NumPy dominant eigenvector: {eigenvecs_np[:, 0]}")

NameError: name 'Tuple' is not defined

In [None]:
# Singular Value Decomposition (SVD) from scratch

def svd_from_scratch(matrix: np.ndarray, max_iterations: int = 1000, tolerance: float = 1e-10) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Compute SVD using power iteration approach.
    
    Args:
        matrix: Input matrix to decompose
        max_iterations: Maximum number of iterations
        tolerance: Convergence tolerance
        
    Returns:
        Tuple of (U, S, Vt) matrices such that A = U @ S @ Vt
    """
    A = matrix.astype(float)
    m, n = A.shape
    
    # Compute A^T * A for right singular vectors
    ATA = A.T @ A
    
    # Compute eigenvalues and eigenvectors of A^T * A
    # For simplicity, we'll use numpy here, but in a full implementation
    # we would implement eigenvalue decomposition from scratch
    eigenvals, V = np.linalg.eigh(ATA)
    
    # Singular values are square roots of eigenvalues
    singular_vals = np.sqrt(np.abs(eigenvals))
    
    # Sort in descending order
    idx = np.argsort(singular_vals)[::-1]
    singular_vals = singular_vals[idx]
    V = V[:, idx]
    
    # Compute left singular vectors
    U = np.zeros((m, m))
    for i in range(min(m, n)):
        if singular_vals[i] > 1e-10:
            U[:, i] = (A @ V[:, i]) / singular_vals[i]
        else:
            # For zero singular values, use random orthogonal vector
            u = np.random.rand(m)
            u = u - U[:, :i] @ (U[:, :i].T @ u)  # Orthogonalize
            U[:, i] = u / np.linalg.norm(u)
    
    # Handle remaining columns of U if m > n
    if m > n:
        for i in range(n, m):
            u = np.random.rand(m)
            # Orthogonalize with all previous vectors
            for j in range(i):
                u = u - U[:, j] @ (U[:, j].T @ u)
            U[:, i] = u / np.linalg.norm(u)
    
    # Create diagonal matrix of singular values
    S = np.zeros((m, n))
    np.fill_diagonal(S, singular_vals)
    
    return U, S, V.T

# Test SVD
B = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=float)
U, S, Vt = svd_from_scratch(B)

# Verify reconstruction
reconstructed = U @ S @ Vt
print(f"Original matrix shape: {B.shape}")
print(f"Reconstructed matrix shape: {reconstructed.shape}")
print(f"Reconstruction error: {np.linalg.norm(B - reconstructed):.10f}")

# Compare with numpy SVD
U_np, S_np, Vt_np = np.linalg.svd(B)
reconstructed_np = U_np @ np.diag(S_np) @ Vt_np
print(f"NumPy reconstruction error: {np.linalg.norm(B - reconstructed_np):.10f}")

In [None]:
# Application: Principal Component Analysis (PCA) from scratch

class PCAFromScratch:
    """
    Principal Component Analysis implemented from scratch.
    """
    
    def __init__(self, n_components: int = 2):
        self.n_components = n_components
        self.components = None
        self.mean = None
        
    def fit(self, X: np.ndarray):
        """
        Fit the PCA model to the data.
        
        Args:
            X: Data matrix of shape (n_samples, n_features)
        """
        # Center the data
        self.mean = np.mean(X, axis=0)
        X_centered = X - self.mean
        
        # Compute covariance matrix
        cov_matrix = (X_centered.T @ X_centered) / (X_centered.shape[0] - 1)
        
        # Compute eigenvalues and eigenvectors
        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
        
        # Sort by eigenvalues in descending order
        idx = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]
        
        # Select top n_components
        self.components = eigenvectors[:, :self.n_components]
        
    def transform(self, X: np.ndarray) -> np.ndarray:
        """
        Transform the data to the principal component space.
        
        Args:
            X: Data matrix of shape (n_samples, n_features)
            
        Returns:
            Transformed data of shape (n_samples, n_components)
        """
        X_centered = X - self.mean
        return X_centered @ self.components
    
    def fit_transform(self, X: np.ndarray) -> np.ndarray:
        """
        Fit the PCA model and transform the data.
        
        Args:
            X: Data matrix of shape (n_samples, n_features)
            
        Returns:
            Transformed data of shape (n_samples, n_components)
        """
        self.fit(X)
        return self.transform(X)

# Test PCA
from sklearn.datasets import make_classification

# Generate sample data
X, y = make_classification(n_samples=100, n_features=5, n_informative=3, 
                          n_redundant=2, random_state=42)

# Apply our PCA
pca_scratch = PCAFromScratch(n_components=2)
X_pca_scratch = pca_scratch.fit_transform(X)

# Compare with sklearn PCA
from sklearn.decomposition import PCA
pca_sklearn = PCA(n_components=2)
X_pca_sklearn = pca_sklearn.fit_transform(X)

# Plot results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.scatter(X_pca_scratch[:, 0], X_pca_scratch[:, 1], c=y, cmap='viridis')
ax1.set_title('PCA from Scratch')
ax1.set_xlabel('First Principal Component')
ax1.set_ylabel('Second Principal Component')

ax2.scatter(X_pca_sklearn[:, 0], X_pca_sklearn[:, 1], c=y, cmap='viridis')
ax2.set_title('Sklearn PCA')
ax2.set_xlabel('First Principal Component')
ax2.set_ylabel('Second Principal Component')

plt.tight_layout()
plt.show()

# Compare explained variance ratios
variance_explained_scratch = np.var(X_pca_scratch, axis=0) / np.var(X, axis=0).sum()
variance_explained_sklearn = pca_sklearn.explained_variance_ratio_

print(f"Variance explained by our PCA: {variance_explained_scratch}")
print(f"Variance explained by sklearn PCA: {variance_explained_sklearn}")

## Key Takeaways

1. **Mathematical Understanding**: Understanding the mathematical foundations of linear algebra operations is crucial for AI
2. **Implementation Skills**: Implementing algorithms from scratch deepens understanding
3. **Applications**: Linear algebra concepts have direct applications in ML algorithms like PCA
4. **Numerical Stability**: Pay attention to numerical stability when implementing algorithms
5. **Verification**: Always verify your implementations against established libraries