#### Input: A matrix X of size NxD(N is number of sample, D is number of features); y is colunm vector of size Nx1 of values 1, 2, ..., K
#### Output: Conditional probability matrix theta_jk of size DxK and prior distribution theta, which is a vecto of size K.

In [1]:
function [theta_jk, theta] = train(X, y)
    [N, D] = size(X);
    K = length(unique(y));
    %prior distribution
    theta = zeros(K, 1);
    
    %conditional likelihood
    theta_jk = zeros(K, D);
    
    for k=1:K
        theta(k) = sum(y == k);
        idx = (y == k); %a colunm logic vecto of size N
        
        X_k = X(idx, :); %extract all samples in X that have label k
        u_k= sum(X_k); %a row vector of size D
        
        theta_jk(k, :)= u_k/theta(k);
    endfor
    theta = theta/N; %sum(theta) = N
endfunction

In [2]:
function y = classify(x, theta_jk, theta_k)
    [K, D] = size(theta_jk);
    y = 1;
    r = zeros(K, 1);
    for k = 1: K
        p_k = 1
        for i = 1:D
            if (x(i) == 1)
                p_k = p_k * theta_jk(k, i);
            else
                p_k = p_k * (1 - theta_jk(k, i));
            endif
        endfor
        r(k) = p_k * theta_k(k)
        if (r(k) > r(y))
             y = k
        endif
    endfor
endfunction

#### Evaluates the accuracy of our model on a new test set.

In [3]:
function accuracy = eval(X_test, y_test, theta_jk, theta_k)
    M = length(y_test);
    prediction = zeros(M, 1);
    for i = 1:M
        x = X_test(i, :);
        prediction(i) = classify(x, theta_jk, theta_k)
    endfor
    % compare y_test and prediction
    match = (y_test == prediction); % vector logic
    accuracy = sum(match)/M * 100;
endfunction

In [4]:
function naiveBayes
    A = load('weather.txt');
    y = A(:, end);
    X = A(:, 1 : (end - 1));
    [theta_jk, theta] = train(X, y)
    % prediction for a new sample
    x = [1, 0, 1, 0];
    y = classify(x, theta_jk, theta)
endfunction

In [5]:
naiveBayes

theta_jk =

   0.25000   0.25000   0.50000   0.25000
   0.50000   0.75000   0.75000   0.75000

theta =

   0.50000
   0.50000

p_k =  1
r =

   0.035156
   0.000000

p_k =  1
r =

   0.035156
   0.011719

y =  1
