In [4]:
%%file ../libs/findClosestCentroids.m
function idx = findClosestCentroids(X, centroids)

    m = size(X, 1);
    K = size(centroids, 1);
    idx = zeros(size(X, 1), 1);

    for i = 1:m
        x = X(i, :);
        squared_error = sum((x - centroids) .^ 2, 2);
        [val, muk] = min(squared_error);
        idx(i) = muk;
    end
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/findClosestCentroids.m'.


In [7]:
%%file ../libs/computeCentroids.m
function centroids = computeCentroids(X, idx, K)
    [m, n] = size(X);

    centroids = zeros(K, n);

    for i = 1:K
        ck = find(idx == i);
        centroids(i, :) = sum(X(ck, :)) / numel(ck);
    end
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/computeCentroids.m'.


In [13]:
%%file ../libs/kMeansInitCentroids.m
function centroids = kMeansInitCentroids(X, K)
    centroids = zeros(K, size(X, 2));

    randidx = randperm(size(X, 1));
    centroids = X(randidx(1:K), :);
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/kMeansInitCentroids.m'.


In [3]:
%%file ../libs/pca.m
function [U, S] = pca(X)
    [m, n] = size(X);

    U = zeros(n);
    S = zeros(n);

    Sigma = (X' * X) ./ m;
    [U, S, V] = svd(Sigma);
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/pca.m'.


In [10]:
%%file ../libs/projectData.m
function Z = projectData(X, U, K)
    Z = zeros(size(X, 1), K);

    Z = X * U(:, 1:K); % (m x n) * (n x k)
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/projectData.m'.


In [11]:
%%file ../libs/recoverData.m
function X_rec = recoverData(Z, U, K)

    X_rec = zeros(size(Z, 1), size(U, 1)); # m x n

    # Ureduced = (n x k)
    # Z = (m x k)

    X_rec = Z * U(:, 1:K)';

end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/recoverData.m'.


In [4]:
%%file ../libs/pcaLoss.m
function loss = pcaLoss(X, Z, U, K)
    m = size(X, 1);
    X_rec = recoverData(Z, U, K);

    loss = sum(sum((X_rec - X) .^ 2)) / sum(sum(X .^ 2));
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/pcaLoss.m'.


In [26]:
%%file ../libs/pcaLossQuick.m
function loss = pcaLossQuick(S, K)
    n = size(S, 1);
    psum = zeros(n+1, 1);
    for i = 2:n+1
        psum(i) = psum(i-1) + S(i-1, i-1);
    end
    loss = 1 - psum(K+1) / psum(end);
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/pcaLossQuick.m'.


In [32]:
%%file ../libs/pcaKinMaxLoss.m
function K = pcaKinMaxLoss(S, max_loss=0.01)
    n = size(S, 1);
    lo = 1;
    hi = n + 1;

    psum = zeros(n+1, 1);
    for i = 2:n+1
        psum(i) = psum(i-1) + S(i-1, i-1);
    end

    while (lo < hi)
        mid = floor(lo + (hi - lo) / 2);
        loss = 1 - psum(mid+1) / psum(end);
        if loss <= max_loss
            hi = mid;
        else
            lo = mid + 1;
        end
    endwhile
    K = lo;
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/pcaKinMaxLoss.m'.


# Helper

In [2]:
%%file ../libs/featureNormalize.m
function [X_norm, mu, sigma] = featureNormalize(X)
    mu = mean(X);
    X_norm = bsxfun(@minus, X, mu);
    sigma = std(X_norm);
    X_norm = bsxfun(@rdivide, X_norm, sigma);
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/featureNormalize.m'.


In [12]:
%%file ../libs/runkMeans.m
function [centroids, idx] = runkMeans(X, initial_centroids, max_iters, plot_progress)
    if ~exist('plot_progress', 'var') || isempty(plot_progress)
        plot_progress = false;
    end

    if plot_progress
        figure;
        hold on;
    end

    [m, n] = size(X);
    K = size(initial_centroids, 1);
    centroids = initial_centroids;
    previous_centroids = centroids;
    idx = zeros(m, 1);

    for i=1:max_iters
        % Output progress
        fprintf('K-Means iteration %d/%d...\n', i, max_iters);
        if exist('OCTAVE_VERSION')
            fflush(stdout);
        end
        
        % For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids);
        
        % Optionally, plot progress here
        if plot_progress
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i);
            previous_centroids = centroids;
            % fprintf('Press enter to continue.\n');
            % pause;
        end
        
        % Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K);
    end
    if plot_progress
        hold off;
    end
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/runkMeans.m'.


In [8]:
%%file ../libs/displayData.m
function [h, display_array] = displayData(X, example_width)
	%DISPLAYDATA Display 2D data in a nice grid
	%   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
	%   stored in X in a nice grid. It returns the figure handle h and the 
	%   displayed array if requested.

	% Set example_width automatically if not passed in
	if ~exist('example_width', 'var') || isempty(example_width) 
		example_width = round(sqrt(size(X, 2)));
	end

	% Gray Image
	colormap(gray);

	% Compute rows, cols
	[m n] = size(X);
	example_height = (n / example_width);

	% Compute number of items to display
	display_rows = floor(sqrt(m));
	display_cols = ceil(m / display_rows);

	% Between images padding
	pad = 1;

	% Setup blank display
	display_array = - ones(pad + display_rows * (example_height + pad), ...
						pad + display_cols * (example_width + pad));

	% Copy each example into a patch on the display array
	curr_ex = 1;
	for j = 1:display_rows
		for i = 1:display_cols
			if curr_ex > m, 
				break; 
			end
			% Copy the patch
			
			% Get the max value of the patch
			max_val = max(abs(X(curr_ex, :)));
			display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
						pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
							reshape(X(curr_ex, :), example_height, example_width) / max_val;
			curr_ex = curr_ex + 1;
		end
		if curr_ex > m, 
			break; 
		end
	end

	% Display Image
	h = imagesc(display_array, [-1 1]);

	% Do not show axis
	axis image off

	drawnow;

end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/displayData.m'.


In [9]:
%%file ../libs/plotProgresskMeans.m
function plotProgresskMeans(X, centroids, previous, idx, K, i)
    %PLOTPROGRESSKMEANS is a helper function that displays the progress of 
    %k-Means as it is running. It is intended for use only with 2D data.
    %   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
    %   points with colors assigned to each centroid. With the previous
    %   centroids, it also plots a line between the previous locations and
    %   current locations of the centroids.
    %

    % Plot the examples
    plotDataPoints(X, idx, K);

    % Plot the centroids as black x's
    plot(centroids(:,1), centroids(:,2), 'x', ...
        'MarkerEdgeColor','k', ...
        'MarkerSize', 10, 'LineWidth', 3);

    % Plot the history of the centroids with lines
    for j=1:size(centroids,1)
        drawLine(centroids(j, :), previous(j, :));
    end

    % Title
    title(sprintf('Iteration number %d', i))

end



Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/plotProgresskMeans.m'.


In [29]:
%%file ../libs/plotDataPoints.m
function plotDataPoints(X, idx, K)
    %PLOTDATAPOINTS plots data points in X, coloring them so that those with the same
    %index assignments in idx have the same color
    %   PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those 
    %   with the same index assignments in idx have the same color

    % Create palette
    palette = hsv(K + 1);
    colors = palette(idx, :);

    % Plot the data
    scatter(X(:,1), X(:,2), 15, colors);
end


Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/plotDataPoints.m'.


In [11]:
%%file ../libs/drawLine.m
function drawLine(p1, p2, varargin)
    %DRAWLINE Draws a line from point p1 to point p2
    %   DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the
    %   current figure

    plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:});
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex7/octave/libs/drawLine.m'.
