In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, multivariate_normal
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact, interactive, fixed, interact_manual, IntSlider

In [4]:
data = np.loadtxt('./wine.data.txt', delimiter=',')
featurenames = ['Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash','Magnesium', 'Total phenols', 
                'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 
                'OD280/OD315 of diluted wines', 'Proline']
train_size = 130
np.random.seed(0)
perm = np.random.permutation(data.shape[0])
trainx = data[perm[:train_size],1:14]
trainy = data[perm[:train_size],0]
testx = data[perm[train_size:],1:14]
testy = data[perm[train_size:],0]

In [5]:
print(trainx.shape, trainy.shape, testx.shape, testy.shape)

(130, 13) (130,) (48, 13) (48,)


In [8]:
#help(np.cov)
def fit_guassian(x, features):
    mu = np.mean(x[:,features], axis=0)
    covar = np.cov(x[:,features], rowvar=0, bias=1)
    return mu, covar

In [10]:
f1 = 0
f2 = 6
label = 1
mu, covar = fit_guassian(trainx[trainy==label,:], [f1,f2])
print("Mean: " + str(mu))
print("Covariance matrix: " + str(covar))

Mean: [13.78534884  2.99627907]
Covariance matrix: [[0.23325279 0.07526874]
 [0.07526874 0.15240941]]


In [11]:
def find_range(x):
    lower = min(x)
    upper = max(x)
    width = upper - lower
    lower = lower - 0.2*width
    upper = upper + 0.2*width
    return lower, upper

In [12]:
def plot_contours(mu, cov, x1g, x2g, col):
    rv = multivariate_normal(mean=mu, cov=cov)
    z = np.zeros((len(x1g), len(x2g)))
    for i in range(0, len(x1g)):
        for j in range(0, len(x2g)):
            z[j,i] = rv.logpdf([x1g[i],x2g[j]])
    sign, logdet = np.linalg.slogdet(cov)
    normalizer = -0.5 * (2*np.log(6.28) + sign*logdet)
    for offset in range(1,4):
        plt.contour(x1g,x2g,z,levels=[normalizer-offset],colors=col,
                   linewidth=2.0, linestyles='solid')

In [14]:
@interact_manual(f1=IntSlider(0,0,12,1), f2=IntSlider(6,0,12,1), label=IntSlider(1,1,3,1))
def two_features_plot(f1, f2, label):
    if f1==f2:
        print("Please choose different features for f1 and f2.")
        return
    x1_lower, x1_upper = find_range(trainx[trainy==label,f1])
    x2_lower, x2_upper = find_range(trainx[trainy==label,f2])
    plt.xlim(x1_lower, x1_upper)
    plt.ylim(x2_lower, x2_upper)
    plt.plot(trainx[trainy==label,f1], trainx[trainy==label,f2], 'ro')
    
    res = 200
    x1g = np.linspace(x1_lower, x1_upper, res)
    x2g = np.linspace(x2_lower, x2_upper, res)
    
    mu,cov = fit_guassian(trainx[trainy==label,:],[f1,f2])
    plot_contours(mu, cov, x1g, x2g, 'k')
    
    plt.xlabel(featurenames[f1], fontsize=14, color='red')
    plt.ylabel(featurenames[f2], fontsize=14, color='red')
    plt.title('Class ' + str(label), fontsize=14, color='blue')
    plt.show()

In [18]:
def fit_generative_model(x, y, features):
    k = 3
    d = len(features)
    mu = np.zeros((k+1,d))
    covar = np.zeros((k+1,d,d))
    pi = np.zeros(k+1)
    for label in range(1, k+1):
        indices = (y==label)
        mu[label,:],covar[label,:,:] = fit_guassian(x[indices,:],features)
        pi[label] = float(sum(indices))/float(len(y))
    return mu, covar, pi

In [20]:
@interact_manual(f1=IntSlider(0,0,12,1), f2=IntSlider(6,0,12,1))
def three_class_plot(f1, f2):
    if f1==f2:
        print("Please choose different features for f1 and f2.")
        return
    x1_lower, x1_upper = find_range(trainx[:,f1])
    x2_lower, x2_upper = find_range(trainx[:,f2])
    plt.xlim(x1_lower, x1_upper)
    plt.ylim(x2_lower, x2_upper)
    
    colors = ['r','k','g']
    for label in range(1,4):
        plt.plot(trainx[trainy==label,f1],trainx[trainy==label,f2],marker='o',ls='None',
                c=colors[label-1])
        
    res = 200
    x1g = np.linspace(x1_lower, x1_upper, res)
    x2g = np.linspace(x2_lower, x2_upper, res)
    
    mu, covar, pi = fit_generative_model(trainx, trainy, [f1,f2])
    for label in range(1,4):
        gmean = mu[label,:]
        gcov = covar[label,:,:]
        plot_contours(gmean, gcov, x1g, x2g, colors[label-1])
        
    plt.xlabel(featurenames[f1], fontsize=14, color='red')
    plt.ylabel(featurenames[f2], fontsize=14, color='red')
    plt.title('Wind data', fontsize=14, color='blue')
    plt.show()