In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, multivariate_normal

In [2]:
data = np.loadtxt('./winery-multivariate/wine.data.txt', delimiter=',')
featurenames = ['Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash','Magnesium', 'Total phenols', 
                'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 
                'OD280/OD315 of diluted wines', 'Proline']

np.random.seed(0)
perm = np.random.permutation(178)
train_size = 130
feature_size = 14
trainx = data[perm[0:train_size],1:feature_size]
trainy = data[perm[0:train_size],0]
testx = data[perm[train_size:], 1:feature_size]
testy = data[perm[train_size:],0]

In [3]:
print(trainx.shape, trainy.shape, testx.shape, testy.shape)

(130, 13) (130,) (48, 13) (48,)


In [4]:
def fit_generative_model(x, y):
    num_class = 3
    k = num_class
    d = (x.shape)[1]
    mu = np.zeros((k+1, d))
    sigma = np.zeros((k+1, d, d))
    pi = np.zeros(k+1)
    for label in range(1, k+1):
        indices = (y==label)
        mu[label] = np.mean(x[indices,:], axis=0)
        sigma[label] = np.cov(x[indices,:], rowvar=0, bias=1)
        pi[label] = float(sum(indices))/float(len(y))
    return mu, sigma, pi

In [5]:
mu, sigma, pi = fit_generative_model(trainx, trainy)

In [9]:
a3 = np.array([[[10, 11, 12], [13, 14, 15], [16, 17, 18]],
               [[20, 21, 22], [23, 24, 25], [26, 27, 28]],
               [[30, 31, 32], [33, 34, 35], [36, 37, 38]]])
print(a3.shape)
m1 = a3[:, 1:2, 1:2]
print(m1.shape)
f1 = [1,2]
m2 = a3[:,1:2,1:2]
print(m2.shape)
print(m2)
m3 = a3[:,[1,2],:]
print(m3.shape)
m4 = m3[:,:,[1,2]]
print(m4.shape)

(3, 3, 3)
(3, 1, 1)
(3, 1, 1)
[[[14]]

 [[24]]

 [[34]]]
(3, 2, 3)
(3, 2, 2)


In [12]:
print(mu.shape, sigma.shape, pi.shape)

f1 = [2,4,6]
print('mu subset = \n', mu[:,f1])
s1 = sigma[:,f1,:]
print('s1 = \n', s1.shape)
s1 = s1[:,:,f1]
print('s2 shape: ', s1.shape)
print('cov')
print(type(sigma))

(4, 13) (4, 13, 13) (4,)
mu subset = 
 [[  0.           0.           0.        ]
 [  2.42790698 105.8372093    2.99627907]
 [  2.22703704  95.83333333   2.10907407]
 [  2.40090909  99.03030303   0.75727273]]
s1 = 
 (4, 3, 13)
s2 shape:  (4, 3, 3)
cov
<class 'numpy.ndarray'>


In [16]:
def test_model(mu, sigma, pi, features, tx, ty):
    num_class = 3
    sub_mu = mu[:, features]
    sub_sigma = sigma[:, features, :]
    sub_sigma = sub_sigma[:,:,features]
    score = np.zeros((len(ty), num_class+1))
    for i in range(len(ty)):
        for label in range(1, num_class+1):
            score[i,label] = np.log(pi[label]) + \
                multivariate_normal.logpdf(tx[i, features], mean=sub_mu[label,:],
                                          cov=sub_sigma[label,:,:])
    predictions = np.argmax(score[:, 1:num_class+1], axis=1) + 1
    errors = np.sum(predictions != ty)
    print("Test error using features: ")
    for f in features:
        print("'" + featurenames[f] + "'" + " ")
    print("Errors: " + str(errors) + "/" + str(len(ty)))

In [17]:
test_model(mu, sigma, pi, [2,4,6], testx, testy)

Test error using features: 
'Ash' 
'Magnesium' 
'Flavanoids' 
Errors: 7/48


In [18]:
# Use single feature 'Ash'