In [3]:
%%file ../libs/plotData.m
function plotData(X, y)
    pos = find(y == 1);
    neg = find(y == 0);

    plot(X(pos, 1), X(pos, 2), 'k+', 'LineWidth', 1, 'MarkerSize', 7);
    hold on;
    plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7);
    hold off;
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/plotData.m'.


In [5]:
%%file ../libs/visualizeBoundaryLinear.m
function visualizeBoundaryLinear(X, y, model)
    w = model.w;
    b = model.b;
    xp = linspace(min(X(:, 1)), max(X(:, 1)), 100);
    yp = -(w(1)*xp + b) / w(2);
    plotData(X, y);
    hold on;
    plot(xp, yp, '-b');
    hold off;
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/visualizeBoundaryLinear.m'.


In [5]:
%%file ../libs/visualizeBoundary.m
function visualizeBoundary(X, y, model, varargin)
    %VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM
    %   VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision 
    %   boundary learned by the SVM and overlays the data on it

    % Plot the training data on top of the boundary
    plotData(X, y)

    % Make classification predictions over a grid of values
    x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)';
    x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)';
    [X1, X2] = meshgrid(x1plot, x2plot);
    vals = zeros(size(X1));
    for i = 1:size(X1, 2)
        this_X = [X1(:, i), X2(:, i)];
        vals(:, i) = svmPredict(model, this_X);
    end

    % Plot the SVM boundary
    hold on
    contour(X1, X2, vals, [-1 1], 'LineColor', 'b');
    hold off;

end


Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/visualizeBoundary.m'.


In [8]:
%%file ../libs/gaussianKernel.m
function sim = gaussianKernel(x1, x2, sigma)

    x1=x1(:); x2=x2(:);
    sim = 0;

    sim = exp(-((x1-x2)' * (x1-x2))/(2*sigma^2));
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/gaussianKernel.m'.


In [8]:
%%file ../libs/dataset3Params.m
function [C, sigma] = dataset3Params(X, y, Xval, yval)

    C = 1;
    sigma = 0.3;

    candidates = [0.01; 0.03; 0.1; 0.3; 1; 3; 10; 30];
    Cs = candidates;
    sigmas = candidates;
    error_val = zeros(length(Cs), length(sigmas));

    for i = 1:length(candidates)
        for j = 1:length(candidates)
            model = svmTrain(X, y, Cs(i), @(x1, x2) gaussianKernel(x1, x2, sigmas(j)));
            pred = svmPredict(model, Xval);
            error_val(i, j) = mean(double(pred ~= yval));
        end
    end

    [value, ind] = min(error_val(:));
    [i, j] = ind2sub(size(error_val), ind);
    C = Cs(i);
    sigma = sigmas(j);
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/dataset3Params.m'.


In [14]:
%%file ../libs/processEmail.m
function word_indices = processEmail(email_contents, vocabList)
    %PROCESSEMAIL preprocesses a the body of an email and
    %returns a list of word_indices 
    %   word_indices = PROCESSEMAIL(email_contents) preprocesses 
    %   the body of an email and returns a list of indices of the 
    %   words contained in the email. 
    %

    % Load Vocabulary
    % vocabList = getVocabList();

    % Init return value
    word_indices = [];

    % ========================== Preprocess Email ===========================

    % Find the Headers ( \n\n and remove )
    % Uncomment the following lines if you are working with raw emails with the
    % full headers

    % hdrstart = strfind(email_contents, ([char(10) char(10)]));
    % email_contents = email_contents(hdrstart(1):end);

    % Lower case
    email_contents = lower(email_contents);

    % Strip all HTML
    % Looks for any expression that starts with < and ends with > and replace
    % and does not have any < or > in the tag it with a space
    email_contents = regexprep(email_contents, '<[^<>]+>', ' ');

    % Handle Numbers
    % Look for one or more characters between 0-9
    email_contents = regexprep(email_contents, '[0-9]+', 'number');

    % Handle URLS
    % Look for strings starting with http:// or https://
    email_contents = regexprep(email_contents, ...
                            '(http|https)://[^\s]*', 'httpaddr');

    % Handle Email Addresses
    % Look for strings with @ in the middle
    email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');

    % Handle $ sign
    email_contents = regexprep(email_contents, '[$]+', 'dollar');


    % ========================== Tokenize Email ===========================

    % Output the email to screen as well
    % fprintf('\n==== Processed Email ====\n\n');

    % Process file
    l = 0;

    while ~isempty(email_contents)

        % Tokenize and also get rid of any punctuation
        [str, email_contents] = ...
        strtok(email_contents, ...
                [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
    
        % Remove any non alphanumeric characters
        str = regexprep(str, '[^a-zA-Z0-9]', '');

        % Stem the word 
        % (the porterStemmer sometimes has issues, so we use a try catch block)
        try str = porterStemmer(strtrim(str)); 
        catch str = ''; continue;
        end;

        % Skip the word if it is too short
        if length(str) < 1
        continue;
        end

        % Look up the word in the dictionary and add to word_indices if
        % found
        % ====================== YOUR CODE HERE ======================
        % Instructions: Fill in this function to add the index of str to
        %               word_indices if it is in the vocabulary. At this point
        %               of the code, you have a stemmed word from the email in
        %               the variable str. You should look up str in the
        %               vocabulary list (vocabList). If a match exists, you
        %               should add the index of the word to the word_indices
        %               vector. Concretely, if str = 'action', then you should
        %               look up the vocabulary list to find where in vocabList
        %               'action' appears. For example, if vocabList{18} =
        %               'action', then, you should add 18 to the word_indices 
        %               vector (e.g., word_indices = [word_indices ; 18]; ).
        % 
        % Note: vocabList{idx} returns a the word with index idx in the
        %       vocabulary list.
        % 
        % Note: You can use strcmp(str1, str2) to compare two strings (str1 and
        %       str2). It will return 1 only if the two strings are equivalent.
        %

        % for i = 1:length(vocabList)
        %     if strcmp(vocabList(i), str) == 1
        %         word_indices(end+1) = i;
        %     end
        % end

        if vocabList.isKey(str)
            word_indices(end+1) = vocabList(str);
        end



        % =============================================================


        % Print to screen, ensuring that the output lines are not too long
        % if (l + length(str) + 1) > 78
        %     fprintf('\n');
        %     l = 0;
        % end
        % fprintf('%s ', str);
        % l = l + length(str) + 1;
    end

    % Print footer
    % fprintf('\n\n=========================\n');

end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/processEmail.m'.


In [7]:
%%file ../libs/emailFeatures.m
function x = emailFeatures(word_indices)
    n = 1899;
    x = zeros(n, 1);

    x(word_indices) = 1;
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/emailFeatures.m'.


# Helper

In [16]:
%%file ../libs/getVocabList.m
function [vocabList, vocabListReversed] = getVocabList(filename)
    fd = fopen(filename);
    n = 1899;

    % For ease of implementation, we use a struct to map the strings => integers
    % In practice, you'll want to use some form of hashmap
    % vocabList = cell(n, 1);
    vocabulary = cell(n, 1);
    for i = 1:n
        fscanf(fd, '%d', 1);
        vocabulary(i) = fscanf(fd, '%s', 1);
        % vocabList{i} = fscanf(fd, '%s', 1);
    end
    fclose(fd);
    vocabList = containers.Map(vocabulary, 1:n);
    vocabListReversed = containers.Map(1:n, vocabulary);
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/getVocabList.m'.


In [9]:
%%file ../libs/readFile.m
function file_contents = readFile(filename)
    fd = fopen(filename);
    if fd
        file_contents = fscanf(fd, '%c', inf);
        fclose(fd);
    else
        file_contents = '';
        fprintf('Unable to open %s\n', filename);
    end
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/readFile.m'.


In [2]:
%%file ../libs/svmPredict.m
function pred = svmPredict(model, X)
    %SVMPREDICT returns a vector of predictions using a trained SVM model
    %(svmTrain). 
    %   pred = SVMPREDICT(model, X) returns a vector of predictions using a 
    %   trained SVM model (svmTrain). X is a mxn matrix where there each 
    %   example is a row. model is a svm model returned from svmTrain.
    %   predictions pred is a m x 1 column of predictions of {0, 1} values.
    %

    % Check if we are getting a column vector, if so, then assume that we only
    % need to do prediction for a single example
    if (size(X, 2) == 1)
        % Examples should be in rows
        X = X';
    end

    % Dataset 
    m = size(X, 1);
    p = zeros(m, 1);
    pred = zeros(m, 1);

    if strcmp(func2str(model.kernelFunction), 'linearKernel')
        % We can use the weights and bias directly if working with the 
        % linear kernel
        p = X * model.w + model.b;
    elseif strfind(func2str(model.kernelFunction), 'gaussianKernel')
        % Vectorized RBF Kernel
        % This is equivalent to computing the kernel on every pair of examples
        X1 = sum(X.^2, 2);
        X2 = sum(model.X.^2, 2)';
        K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X'));
        K = model.kernelFunction(1, 0) .^ K;
        K = bsxfun(@times, model.y', K);
        K = bsxfun(@times, model.alphas', K);
        p = sum(K, 2);
    else
        % Other Non-linear kernel
        for i = 1:m
            prediction = 0;
            for j = 1:size(model.X, 1)
                prediction = prediction + ...
                    model.alphas(j) * model.y(j) * ...
                    model.kernelFunction(X(i,:)', model.X(j,:)');
            end
            p(i) = prediction + model.b;
        end
    end

    % Convert predictions into 0 / 1
    pred(p >= 0) =  1;
    pred(p <  0) =  0;

end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/svmPredict.m'.


In [10]:
%%file svmTrain.m
function [model] = svmTrain(X, Y, C, kernelFunction, ...
                            tol, max_passes)
    %SVMTRAIN Trains an SVM classifier using a simplified version of the SMO 
    %algorithm. 
    %   [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an
    %   SVM classifier and returns trained model. X is the matrix of training 
    %   examples.  Each row is a training example, and the jth column holds the 
    %   jth feature.  Y is a column matrix containing 1 for positive examples 
    %   and 0 for negative examples.  C is the standard SVM regularization 
    %   parameter.  tol is a tolerance value used for determining equality of 
    %   floating point numbers. max_passes controls the number of iterations
    %   over the dataset (without changes to alpha) before the algorithm quits.
    %
    % Note: This is a simplified version of the SMO algorithm for training
    %       SVMs. In practice, if you want to train an SVM classifier, we
    %       recommend using an optimized package such as:  
    %
    %           LIBSVM   (http://www.csie.ntu.edu.tw/~cjlin/libsvm/)
    %           SVMLight (http://svmlight.joachims.org/)
    %
    %

    if ~exist('tol', 'var') || isempty(tol)
        tol = 1e-3;
    end

    if ~exist('max_passes', 'var') || isempty(max_passes)
        max_passes = 5;
    end

    % Data parameters
    m = size(X, 1);
    n = size(X, 2);

    % Map 0 to -1
    Y(Y==0) = -1;

    % Variables
    alphas = zeros(m, 1);
    b = 0;
    E = zeros(m, 1);
    passes = 0;
    eta = 0;
    L = 0;
    H = 0;

    % Pre-compute the Kernel Matrix since our dataset is small
    % (in practice, optimized SVM packages that handle large datasets
    %  gracefully will _not_ do this)
    % 
    % We have implemented optimized vectorized version of the Kernels here so
    % that the svm training will run faster.
    if strcmp(func2str(kernelFunction), 'linearKernel')
        % Vectorized computation for the Linear Kernel
        % This is equivalent to computing the kernel on every pair of examples
        K = X*X';
    elseif strfind(func2str(kernelFunction), 'gaussianKernel')
        % Vectorized RBF Kernel
        % This is equivalent to computing the kernel on every pair of examples
        X2 = sum(X.^2, 2);
        K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X')));
        K = kernelFunction(1, 0) .^ K;
    else
        % Pre-compute the Kernel Matrix
        % The following can be slow due to the lack of vectorization
        K = zeros(m);
        for i = 1:m
            for j = i:m
                K(i,j) = kernelFunction(X(i,:)', X(j,:)');
                K(j,i) = K(i,j); %the matrix is symmetric
            end
        end
    end

    % Train
    fprintf('\nTraining ...');
    dots = 12;
    while passes < max_passes,
                
        num_changed_alphas = 0;
        for i = 1:m,
            
            % Calculate Ei = f(x(i)) - y(i) using (2). 
            % E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i);
            E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i);
            
            if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)),
                
                % In practice, there are many heuristics one can use to select
                % the i and j. In this simplified code, we select them randomly.
                j = ceil(m * rand());
                while j == i,  % Make sure i \neq j
                    j = ceil(m * rand());
                end

                % Calculate Ej = f(x(j)) - y(j) using (2).
                E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j);

                % Save old alphas
                alpha_i_old = alphas(i);
                alpha_j_old = alphas(j);
                
                % Compute L and H by (10) or (11). 
                if (Y(i) == Y(j)),
                    L = max(0, alphas(j) + alphas(i) - C);
                    H = min(C, alphas(j) + alphas(i));
                else
                    L = max(0, alphas(j) - alphas(i));
                    H = min(C, C + alphas(j) - alphas(i));
                end
            
                if (L == H),
                    % continue to next i. 
                    continue;
                end

                % Compute eta by (14).
                eta = 2 * K(i,j) - K(i,i) - K(j,j);
                if (eta >= 0),
                    % continue to next i. 
                    continue;
                end
                
                % Compute and clip new value for alpha j using (12) and (15).
                alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta;
                
                % Clip
                alphas(j) = min (H, alphas(j));
                alphas(j) = max (L, alphas(j));
                
                % Check if change in alpha is significant
                if (abs(alphas(j) - alpha_j_old) < tol),
                    % continue to next i. 
                    % replace anyway
                    alphas(j) = alpha_j_old;
                    continue;
                end
                
                % Determine value for alpha i using (16). 
                alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j));
                
                % Compute b1 and b2 using (17) and (18) respectively. 
                b1 = b - E(i) ...
                    - Y(i) * (alphas(i) - alpha_i_old) *  K(i,j)' ...
                    - Y(j) * (alphas(j) - alpha_j_old) *  K(i,j)';
                b2 = b - E(j) ...
                    - Y(i) * (alphas(i) - alpha_i_old) *  K(i,j)' ...
                    - Y(j) * (alphas(j) - alpha_j_old) *  K(j,j)';

                % Compute b by (19). 
                if (0 < alphas(i) && alphas(i) < C),
                    b = b1;
                elseif (0 < alphas(j) && alphas(j) < C),
                    b = b2;
                else
                    b = (b1+b2)/2;
                end

                num_changed_alphas = num_changed_alphas + 1;

            end
            
        end
        
        if (num_changed_alphas == 0),
            passes = passes + 1;
        else
            passes = 0;
        end

        fprintf('.');
        dots = dots + 1;
        if dots > 78
            dots = 0;
            fprintf('\n');
        end
        if exist('OCTAVE_VERSION')
            fflush(stdout);
        end
    end
    fprintf(' Done! \n\n');

    % Save the model
    idx = alphas > 0;
    model.X= X(idx,:);
    model.y= Y(idx);
    model.kernelFunction = kernelFunction;
    model.b= b;
    model.alphas= alphas(idx);
    model.w = ((alphas.*Y)'*X)';
end

Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/notebooks/svmTrain.m'.


In [10]:
%%file ../libs/porterStemmer.m
function stem = porterStemmer(inString)
% Applies the Porter Stemming algorithm as presented in the following
% paper:
% Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
%   no. 3, pp 130-137

% Original code modeled after the C version provided at:
% http://www.tartarus.org/~martin/PorterStemmer/c.txt

% The main part of the stemming algorithm starts here. b is an array of
% characters, holding the word to be stemmed. The letters are in b[k0],
% b[k0+1] ending at b[k]. In fact k0 = 1 in this demo program (since
% matlab begins indexing by 1 instead of 0). k is readjusted downwards as
% the stemming progresses. Zero termination is not in fact used in the
% algorithm.

% To call this function, use the string to be stemmed as the input
% argument.  This function returns the stemmed word as a string.

% Lower-case string
inString = lower(inString);

global j;
b = inString;
k = length(b);
k0 = 1;
j = k;



% With this if statement, strings of length 1 or 2 don't go through the
% stemming process. Remove this conditional to match the published
% algorithm.
stem = b;
if k > 2
    % Output displays per step are commented out.
    %disp(sprintf('Word to stem: %s', b));
    x = step1ab(b, k, k0);
    %disp(sprintf('Steps 1A and B yield: %s', x{1}));
    x = step1c(x{1}, x{2}, k0);
    %disp(sprintf('Step 1C yields: %s', x{1}));
    x = step2(x{1}, x{2}, k0);
    %disp(sprintf('Step 2 yields: %s', x{1}));
    x = step3(x{1}, x{2}, k0);
    %disp(sprintf('Step 3 yields: %s', x{1}));
    x = step4(x{1}, x{2}, k0);
    %disp(sprintf('Step 4 yields: %s', x{1}));
    x = step5(x{1}, x{2}, k0);
    %disp(sprintf('Step 5 yields: %s', x{1}));
    stem = x{1};
end

% cons(j) is TRUE <=> b[j] is a consonant.
function c = cons(i, b, k0)
c = true;
switch(b(i))
    case {'a', 'e', 'i', 'o', 'u'}
        c = false;
    case 'y'
        if i == k0
            c = true;
        else
            c = ~cons(i - 1, b, k0);
        end
end

% mseq() measures the number of consonant sequences between k0 and j.  If
% c is a consonant sequence and v a vowel sequence, and <..> indicates
% arbitrary presence,

%      <c><v>       gives 0
%      <c>vc<v>     gives 1
%      <c>vcvc<v>   gives 2
%      <c>vcvcvc<v> gives 3
%      ....
function n = measure(b, k0)
global j;
n = 0;
i = k0;
while true
    if i > j
        return
    end
    if ~cons(i, b, k0)
        break;
    end
    i = i + 1;
end
i = i + 1;
while true
    while true
        if i > j
            return
        end
        if cons(i, b, k0)
            break;
        end
        i = i + 1;
    end
    i = i + 1;
    n = n + 1;
    while true
        if i > j
            return
        end
        if ~cons(i, b, k0)
            break;
        end
        i = i + 1;
    end
    i = i + 1;
end


% vowelinstem() is TRUE <=> k0,...j contains a vowel
function vis = vowelinstem(b, k0)
global j;
for i = k0:j,
    if ~cons(i, b, k0)
        vis = true;
        return
    end
end
vis = false;

%doublec(i) is TRUE <=> i,(i-1) contain a double consonant.
function dc = doublec(i, b, k0)
if i < k0+1
    dc = false;
    return
end
if b(i) ~= b(i-1)
    dc = false;
    return
end
dc = cons(i, b, k0);


% cvc(j) is TRUE <=> j-2,j-1,j has the form consonant - vowel - consonant
% and also if the second c is not w,x or y. this is used when trying to
% restore an e at the end of a short word. e.g.
%
%      cav(e), lov(e), hop(e), crim(e), but
%      snow, box, tray.

function c1 = cvc(i, b, k0)
if ((i < (k0+2)) || ~cons(i, b, k0) || cons(i-1, b, k0) || ~cons(i-2, b, k0))
    c1 = false;
else
    if (b(i) == 'w' || b(i) == 'x' || b(i) == 'y')
        c1 = false;
        return
    end
    c1 = true;
end

% ends(s) is TRUE <=> k0,...k ends with the string s.
function s = ends(str, b, k)
global j;
if (str(length(str)) ~= b(k))
    s = false;
    return
end % tiny speed-up
if (length(str) > k)
    s = false;
    return
end
if strcmp(b(k-length(str)+1:k), str)
    s = true;
    j = k - length(str);
    return
else
    s = false;
end

% setto(s) sets (j+1),...k to the characters in the string s, readjusting
% k accordingly.

function so = setto(s, b, k)
global j;
for i = j+1:(j+length(s))
    b(i) = s(i-j);
end
if k > j+length(s)
    b((j+length(s)+1):k) = '';
end
k = length(b);
so = {b, k};

% rs(s) is used further down.
% [Note: possible null/value for r if rs is called]
function r = rs(str, b, k, k0)
r = {b, k};
if measure(b, k0) > 0
    r = setto(str, b, k);
end

% step1ab() gets rid of plurals and -ed or -ing. e.g.

%       caresses  ->  caress
%       ponies    ->  poni
%       ties      ->  ti
%       caress    ->  caress
%       cats      ->  cat

%       feed      ->  feed
%       agreed    ->  agree
%       disabled  ->  disable

%       matting   ->  mat
%       mating    ->  mate
%       meeting   ->  meet
%       milling   ->  mill
%       messing   ->  mess

%       meetings  ->  meet

function s1ab = step1ab(b, k, k0)
global j;
if b(k) == 's'
    if ends('sses', b, k)
        k = k-2;
    elseif ends('ies', b, k)
        retVal = setto('i', b, k);
        b = retVal{1};
        k = retVal{2};
    elseif (b(k-1) ~= 's')
        k = k-1;
    end
end
if ends('eed', b, k)
    if measure(b, k0) > 0;
        k = k-1;
    end
elseif (ends('ed', b, k) || ends('ing', b, k)) && vowelinstem(b, k0)
    k = j;
    retVal = {b, k};
    if ends('at', b, k)
        retVal = setto('ate', b(k0:k), k);
    elseif ends('bl', b, k)
        retVal = setto('ble', b(k0:k), k);
    elseif ends('iz', b, k)
        retVal = setto('ize', b(k0:k), k);
    elseif doublec(k, b, k0)
        retVal = {b, k-1};
        if b(retVal{2}) == 'l' || b(retVal{2}) == 's' || ...
                b(retVal{2}) == 'z'
            retVal = {retVal{1}, retVal{2}+1};
        end
    elseif measure(b, k0) == 1 && cvc(k, b, k0)
        retVal = setto('e', b(k0:k), k);
    end
    k = retVal{2};
    b = retVal{1}(k0:k);
end
j = k;
s1ab = {b(k0:k), k};

%  step1c() turns terminal y to i when there is another vowel in the stem.
function s1c = step1c(b, k, k0)
global j;
if ends('y', b, k) && vowelinstem(b, k0)
    b(k) = 'i';
end
j = k;
s1c = {b, k};

% step2() maps double suffices to single ones. so -ization ( = -ize plus
% -ation) maps to -ize etc. note that the string before the suffix must give
% m() > 0.
function s2 = step2(b, k, k0)
global j;
s2 = {b, k};
switch b(k-1)
    case {'a'}
        if ends('ational', b, k) s2 = rs('ate', b, k, k0);
        elseif ends('tional', b, k) s2 = rs('tion', b, k, k0); end;
    case {'c'}
        if ends('enci', b, k) s2 = rs('ence', b, k, k0);
        elseif ends('anci', b, k) s2 = rs('ance', b, k, k0); end;
    case {'e'}
        if ends('izer', b, k) s2 = rs('ize', b, k, k0); end;
    case {'l'}
        if ends('bli', b, k) s2 = rs('ble', b, k, k0);
        elseif ends('alli', b, k) s2 = rs('al', b, k, k0);
        elseif ends('entli', b, k) s2 = rs('ent', b, k, k0);
        elseif ends('eli', b, k) s2 = rs('e', b, k, k0);
        elseif ends('ousli', b, k) s2 = rs('ous', b, k, k0); end;
    case {'o'}
        if ends('ization', b, k) s2 = rs('ize', b, k, k0);
        elseif ends('ation', b, k) s2 = rs('ate', b, k, k0);
        elseif ends('ator', b, k) s2 = rs('ate', b, k, k0); end;
    case {'s'}
        if ends('alism', b, k) s2 = rs('al', b, k, k0);
        elseif ends('iveness', b, k) s2 = rs('ive', b, k, k0);
        elseif ends('fulness', b, k) s2 = rs('ful', b, k, k0);
        elseif ends('ousness', b, k) s2 = rs('ous', b, k, k0); end;
    case {'t'}
        if ends('aliti', b, k) s2 = rs('al', b, k, k0);
        elseif ends('iviti', b, k) s2 = rs('ive', b, k, k0);
        elseif ends('biliti', b, k) s2 = rs('ble', b, k, k0); end;
    case {'g'}
        if ends('logi', b, k) s2 = rs('log', b, k, k0); end;
end
j = s2{2};

% step3() deals with -ic-, -full, -ness etc. similar strategy to step2.
function s3 = step3(b, k, k0)
global j;
s3 = {b, k};
switch b(k)
    case {'e'}
        if ends('icate', b, k) s3 = rs('ic', b, k, k0);
        elseif ends('ative', b, k) s3 = rs('', b, k, k0);
        elseif ends('alize', b, k) s3 = rs('al', b, k, k0); end;
    case {'i'}
        if ends('iciti', b, k) s3 = rs('ic', b, k, k0); end;
    case {'l'}
        if ends('ical', b, k) s3 = rs('ic', b, k, k0);
        elseif ends('ful', b, k) s3 = rs('', b, k, k0); end;
    case {'s'}
        if ends('ness', b, k) s3 = rs('', b, k, k0); end;
end
j = s3{2};

% step4() takes off -ant, -ence etc., in context <c>vcvc<v>.
function s4 = step4(b, k, k0)
global j;
switch b(k-1)
    case {'a'}
        if ends('al', b, k) end;
    case {'c'}
        if ends('ance', b, k)
        elseif ends('ence', b, k) end;
    case {'e'}
        if ends('er', b, k) end;
    case {'i'}
        if ends('ic', b, k) end;
    case {'l'}
        if ends('able', b, k)
        elseif ends('ible', b, k) end;
    case {'n'}
        if ends('ant', b, k)
        elseif ends('ement', b, k)
        elseif ends('ment', b, k)
        elseif ends('ent', b, k) end;
    case {'o'}
        if ends('ion', b, k)
            if j == 0
            elseif ~(strcmp(b(j),'s') || strcmp(b(j),'t'))
                j = k;
            end
        elseif ends('ou', b, k) end;
    case {'s'}
        if ends('ism', b, k) end;
    case {'t'}
        if ends('ate', b, k)
        elseif ends('iti', b, k) end;
    case {'u'}
        if ends('ous', b, k) end;
    case {'v'}
        if ends('ive', b, k) end;
    case {'z'}
        if ends('ize', b, k) end;
end
if measure(b, k0) > 1
    s4 = {b(k0:j), j};
else
    s4 = {b(k0:k), k};
end

% step5() removes a final -e if m() > 1, and changes -ll to -l if m() > 1.
function s5 = step5(b, k, k0)
global j;
j = k;
if b(k) == 'e'
    a = measure(b, k0);
    if (a > 1) || ((a == 1) && ~cvc(k-1, b, k0))
        k = k-1;
    end
end
if (b(k) == 'l') && doublec(k, b, k0) && (measure(b, k0) > 1)
    k = k-1;
end
s5 = {b(k0:k), k};


Created file '/Users/jchien/workspace/courses/coursera_ml/ex6/octave/libs/porterStemmer.m'.
