# Linear Algebra Concepts for Data Science


In [1]:
import numpy as np

# Define vectors
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])

# Vector operations
add = v1 + v2
scalar_mul = 3 * v1
dot_product = np.dot(v1, v2)

print("Addition:", add)
print("Scalar Multiplication:", scalar_mul)
print("Dot Product:", dot_product)

Addition: [5 7 9]
Scalar Multiplication: [3 6 9]
Dot Product: 32


## 2. Matrices
Matrices are used to represent data transformations, relationships, and computations in machine learning.

In [2]:
# Define matrices
A = np.array([[1, 2], [3, 4]])
B = np.array([[2, 0], [1, 3]])

# Matrix operations
add = A + B
mul = np.dot(A, B)
transpose = A.T

print("Addition:\n", add)
print("Multiplication:\n", mul)
print("Transpose:\n", transpose)

Addition:
 [[3 2]
 [4 7]]
Multiplication:
 [[ 4  6]
 [10 12]]
Transpose:
 [[1 3]
 [2 4]]


## 3. Systems of Linear Equations
Linear equations are often solved to find optimal solutions in regression and other models.

In [3]:
# Solve 2x + y = 5, x - y = 1
A = np.array([[2, 1], [1, -1]])
b = np.array([5, 1])

solution = np.linalg.solve(A, b)
print("Solution:", solution)

Solution: [2. 1.]


## 4. Eigenvalues and Eigenvectors
Eigenvalues and eigenvectors describe matrix transformations and are used in PCA and other algorithms.

In [4]:
# Eigenvalues and eigenvectors
A = np.array([[4, 2], [1, 3]])
eigenvalues, eigenvectors = np.linalg.eig(A)

print("Eigenvalues:", eigenvalues)
print("Eigenvectors:\n", eigenvectors)

Eigenvalues: [5. 2.]
Eigenvectors:
 [[ 0.89442719 -0.70710678]
 [ 0.4472136   0.70710678]]


## 5. Norms and Distance Metrics
Norms measure vector magnitude, and distance metrics help compute similarity or dissimilarity.

In [5]:
# Vector norms
v = np.array([3, 4])

l2_norm = np.linalg.norm(v)  # Euclidean norm
l1_norm = np.linalg.norm(v, ord=1)  # Manhattan norm

print("L2 Norm:", l2_norm)
print("L1 Norm:", l1_norm)

L2 Norm: 5.0
L1 Norm: 7.0


## 6. Projections
Projections map one vector onto another or onto a subspace, used in dimensionality reduction.

In [6]:
# Projection of v onto u
v = np.array([3, 4])
u = np.array([1, 2])

projection = (np.dot(v, u) / np.dot(u, u)) * u
print("Projection:", projection)

Projection: [2.2 4.4]


## 7. Linear Transformations
Linear transformations include scaling, rotation, and translation, represented as matrix operations.

In [7]:
# Scaling and rotation
matrix = np.array([[2, 0], [0, 2]])  # Scaling
vector = np.array([1, 1])

transformed = np.dot(matrix, vector)
print("Transformed Vector:", transformed)

Transformed Vector: [2 2]


## 8. Singular Value Decomposition (SVD)
SVD decomposes a matrix into three matrices (U, Σ, V^T), used in PCA, latent analysis, etc.

In [8]:
# SVD
A = np.array([[1, 2], [3, 4], [5, 6]])
U, S, Vt = np.linalg.svd(A)

print("U:\n", U)
print("Singular Values:", S)
print("V^T:\n", Vt)

U:
 [[-0.2298477   0.88346102  0.40824829]
 [-0.52474482  0.24078249 -0.81649658]
 [-0.81964194 -0.40189603  0.40824829]]
Singular Values: [9.52551809 0.51430058]
V^T:
 [[-0.61962948 -0.78489445]
 [-0.78489445  0.61962948]]


## 9. Basis and Dimension
A basis is a set of vectors that span a vector space. Dimension is the number of basis vectors.

In [9]:
# Basis and dimension
basis = np.array([[1, 0], [0, 1]])  # Standard basis
rank = np.linalg.matrix_rank(basis)

print("Rank:", rank)

Rank: 2


## 10. Rank and Determinants
Rank determines the number of independent rows or columns in a matrix. Determinants indicate invertibility.

In [10]:
# Rank and determinant
A = np.array([[1, 2], [3, 4]])

rank = np.linalg.matrix_rank(A)
determinant = np.linalg.det(A)

print("Rank:", rank)
print("Determinant:", determinant)

Rank: 2
Determinant: -2.0000000000000004


## 11. Orthogonality and Orthogonalization
Orthogonality ensures vectors are perpendicular, reducing redundancy in data representation.

In [11]:
# Check orthogonality
v1 = np.array([1, 0])
v2 = np.array([0, 1])

dot_product = np.dot(v1, v2)
print("Orthogonal:", dot_product == 0)  # True if 0

Orthogonal: True


## 12. Principal Component Analysis (PCA)
PCA reduces data dimensions by projecting data onto principal components with the most variance.

In [12]:
from sklearn.decomposition import PCA

# Example dataset
data = np.array([[2.5, 2.4], [0.5, 0.7], [2.2, 2.9], [1.9, 2.2]])

# PCA with 1 component
pca = PCA(n_components=1)
reduced_data = pca.fit_transform(data)

print("Reduced Data:\n", reduced_data)

Reduced Data:
 [[ 0.75001714]
 [-1.85685726]
 [ 0.91187236]
 [ 0.19496777]]
