# Assignment 1: Dimensionality Reduction

This notebook is a starter template for your assignment. Please fill in the required functions and complete the analysis as described in the assignment instructions.


In [None]:
# libraries
import numpy as np
import matplotlib.pyplot as plt

In [4]:
## Normalization
def normalize_data(X):
    """
    Normalize the data by row
    :param X: numpy array
    :return: row-normalized numpy array
    """
    X = np.asarray(X)
    mean = np.mean(X, axis=1, keepdims=True)
    std = np.std(X, axis=1, keepdims=True)
    return (X - mean) / (std + 1e-8)  # Avoid division by zero

def euclidean_distance_matrix(X):
    """
    Compute the pairwise Euclidean distance matrix for X.
    Args:
        X: numpy array of shape (n_samples, n_features)
    Returns:
        dist_matrix: numpy array of shape (n_samples, n_samples)
    """
    X = np.asarray(X)
    sum_X = np.sum(np.square(X), axis=1)
    dist_matrix = np.sqrt(np.maximum(sum_X[:, None] + sum_X[None, :] - 2 * np.dot(X, X.T), 0))
    return dist_matrix

# Example usage (you can uncomment this after implementing for testing):
# dist_matrix = euclidean_distance(data_norm[:100])
# print('Distance matrix shape:', dist_matrix.shape)

def haar_matrix(n):
    if n == 1:
        return np.array([[1]])
    H = haar_matrix(n // 2)
    top = np.kron(H, [1, 1])
    bottom = np.kron(np.eye(len(H)), [1, -1])
    return np.vstack((top, bottom)) #/ np.sqrt(2.0)


def wavelet_transform(X, haar_n, reduced_n):
    """
    Apply Haar wavelet transform to X using an n x n Haar matrix.
    Args:
        X: numpy array of shape (n_samples, n_features)
        haar_n: int, number of features (should match X.shape[1])
    Returns:
        X_wavelet: numpy array of shape (n_samples, n_features)
    """
    H = haar_matrix(haar_n)
    norm_factor = np.count_nonzero(H, axis=1)  # Normalization factor
    X_wavelet = np.dot(X, H.T) / norm_factor
    return X_wavelet[:, :reduced_n]

# Example usage (you can uncomment this after implementing for testing):
# H = haar_matrix(data_norm.shape[1])
# data_wavelet = wavelet_transform(data_norm, data_norm.shape[1])
# print('Wavelet-transformed data shape:', data_wavelet.shape)

def pca(X, n_components):
    """
    Perform PCA on X and return the projected data.
    Args:
        X: numpy array of shape (n_samples, n_features)
        n_components: int, number of principal components to keep
    Returns:
        X_pca: numpy array of shape (n_samples, n_components)
    """
    X = np.asarray(X)
    # z-normalize the data
    X_centered = X - np.mean(X, axis=0)
    cov = np.cov(X_centered, rowvar=False)
    eigvals, eigvecs = np.linalg.eigh(cov)
    idx = np.argsort(eigvals)[::-1]
    eigvecs = eigvecs[:, idx]
    eigvecs = eigvecs[:, :n_components]
    X_pca = np.dot(X_centered, eigvecs)
    return X_pca

# Example usage (uncomment after implementing):
# data_pca = pca(data_znorm, 4)
# print('PCA-transformed data shape:', data_pca.shape)


In [5]:
import numpy as np

eval_data = np.array([
    [1, 1, 1, 1, 1, 1, 1, 1],
    [1.1, 0.9, 1, 1, 1, 1, 1, 1],
    [0.95, 1.05, 1, 1, 1, 1, 1, 1],
    [1, 1, 1.1, 0.9, 1, 1, 1, 1],
    [-1, -1, -1, -1, -1, -1, -1, -1],
    [-1.1, -0.9, -1, -1, -1, -1, -1, -1],
    [-0.95, -1.05, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1.1, -0.9, -1, -1, -1, -1]
])

def all_close(actual, expected, tol=1e-4, no_sign=False):

    correct = np.allclose(actual, expected, atol=tol) if not no_sign else np.allclose(np.abs(actual), np.abs(expected), atol=tol)
    
    if not correct:
        print("The function is NOT working correctly.")
        print("Expected result:")
        print(expected)
        print("Actual result:")
        print(actual)
    else:
        print("The function is working correctly.")
        print("Actual result:")
        print(actual)


# DATA NORMALIZATION
expected_result = np.array([
    [0, 0, 0, 0, 0, 0, 0, 0],
    [2, -2, 0, 0, 0, 0, 0, 0],
    [-2, 2, 0, 0, 0, 0, 0, 0],
    [0, 0, 2, -2, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0],
    [-2, 2, 0, 0, 0, 0, 0, 0],
    [2, -2, 0, 0, 0, 0, 0, 0],
    [0, 0, -2, 2, 0, 0, 0, 0]
])


actual_result = normalize_data(eval_data)
# compare the normalized data with the expected result
all_close(actual_result, expected_result)
# DISTANCE MATRIX
expected_result = np.array(
    [
        [0.    , 0.1414, 0.0707, 0.1414, 5.6569, 5.6586, 5.6573, 5.6586],
        [0.1414, 0.    , 0.2121, 0.2   , 5.6586, 5.6639, 5.6573, 5.6604],
        [0.0707, 0.2121, 0.    , 0.1581, 5.6573, 5.6573, 5.6586, 5.6591],
        [0.1414, 0.2   , 0.1581, 0.    , 5.6586, 5.6604, 5.6591, 5.6639],
        [5.6569, 5.6586, 5.6573, 5.6586, 0.    , 0.1414, 0.0707, 0.1414],
        [5.6586, 5.6639, 5.6573, 5.6604, 0.1414, 0.    , 0.2121, 0.2   ],
        [5.6573, 5.6573, 5.6586, 5.6591, 0.0707, 0.2121, 0.    , 0.1581],
        [5.6586, 5.6604, 5.6591, 5.6639, 0.1414, 0.2   , 0.1581, 0.    ]
    ]
)

actual_result = euclidean_distance_matrix(eval_data)
# compare the distance matrix with the expected result
all_close(actual_result, expected_result)
#  HAAR MATRIX
expected_result = np.array([
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1., -1., -1., -1., -1.],
    [ 1.,  1., -1., -1.,  0.,  0., -0., -0.],
    [ 0.,  0., -0., -0.,  1.,  1., -1., -1.],
    [ 1., -1.,  0., -0.,  0., -0.,  0., -0.],
    [ 0., -0.,  1., -1.,  0., -0.,  0., -0.],
    [ 0., -0.,  0., -0.,  1., -1.,  0., -0.],
    [ 0., -0.,  0., -0.,  0., -0.,  1., -1.]])

actual_result = haar_matrix(8)
# compare the Haar matrix with the expected result
all_close(actual_result, expected_result)

The function is working correctly.
Actual result:
[[ 0.         0.         0.         0.         0.         0.
   0.         0.       ]
 [ 1.9999996 -1.9999996  0.         0.         0.         0.
   0.         0.       ]
 [-1.9999992  1.9999992  0.         0.         0.         0.
   0.         0.       ]
 [ 0.         0.         1.9999996 -1.9999996  0.         0.
   0.         0.       ]
 [ 0.         0.         0.         0.         0.         0.
   0.         0.       ]
 [-1.9999996  1.9999996  0.         0.         0.         0.
   0.         0.       ]
 [ 1.9999992 -1.9999992  0.         0.         0.         0.
   0.         0.       ]
 [ 0.         0.        -1.9999996  1.9999996  0.         0.
   0.         0.       ]]
The function is working correctly.
Actual result:
[[0.         0.14142136 0.07071068 0.14142136 5.65685425 5.65862174
  5.65729617 5.65862174]
 [0.14142136 0.         0.21213203 0.2        5.65862174 5.6639209
  5.65729617 5.66038868]
 [0.07071068 0.21213203 0.

In [6]:
# wavelet
expected_result = np.array([
    [ 0.,  0.,  0.,  0.],
    [ 0.,  0.,  0.,  0.],
    [ 0.,  0.,  0.,  0.],
    [ 0.,  0., -0.,  0.],
    [ 0.,  0.,  0.,  0.],
    [-0., -0., -0.,  0.],
    [ 0.,  0.,  0.,  0.],
    [-0., -0.,  0.,  0.]]
    )

data = normalize_data(eval_data)
actual_result = wavelet_transform(data, haar_n=data.shape[1], reduced_n=4)
# compare the wavelet transformed data with the expected result
all_close(actual_result, expected_result)

expected_result = np.array([
    [ 0.    ,  0.    ,  0.    ,  0.    ],
    [ 2.8284,  0.    ,  0.    ,  0.    ],
    [-2.8284,  0.    ,  0.    ,  0.    ],
    [ 0.    ,  2.8284,  0.    ,  0.    ],
    [ 0.    ,  0.    ,  0.    ,  0.    ],
    [-2.8284,  0.    , -0.    ,  0.    ],
    [ 2.8284,  0.    , -0.    ,  0.    ],
    [ 0.    , -2.8284,  0.    , -0.    ]])

data = normalize_data(eval_data)
actual_result = pca(data, n_components=4)
# compare the PCA transformed data with the expected result
all_close(actual_result, expected_result, no_sign=True)

The function is working correctly.
Actual result:
[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 2.77555756e-16  2.77555756e-16  5.55111512e-16  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 2.77555756e-16  2.77555756e-16 -5.55111512e-16  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [-2.77555756e-16 -2.77555756e-16 -5.55111512e-16  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [-2.77555756e-16 -2.77555756e-16  5.55111512e-16  0.00000000e+00]]
The function is working correctly.
Actual result:
[[ 0.          0.          0.          0.        ]
 [-2.82842656  0.          0.          0.        ]
 [ 2.82842599  0.          0.          0.        ]
 [ 0.         -2.82842656  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 2.82842656  0.          0.          0.        ]
 [-2.82842599  0.          0.          0.        ]
 [ 0. 