In [5]:
from scipy import linalg
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colors
import pickle
from random import sample
import matplotlib.lines as mlines

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

cmap = colors.LinearSegmentedColormap(
    'red_blue_classes',
    {'red': [(0, 1, 1), (1, 0.7, 0.7)],
     'green': [(0, 0.7, 0.7), (1, 0.7, 0.7)],
     'blue': [(0, 0.7, 0.7), (1, 1, 1)]})
plt.cm.register_cmap(cmap=cmap)


In [2]:
def dataset_fixed_cov():
    '''Generate 2 Gaussians samples with the same covariance matrix'''
    n, dim = 300, 2
    np.random.seed(0)
    C = np.array([[0., -0.23], [0.83, .23]])
    X = np.r_[np.dot(np.random.randn(n, dim), C),
              np.dot(np.random.randn(n, dim), C) + np.array([1, 1])]
    y = np.hstack((np.zeros(n), np.ones(n)))
    return X, y


def dataset_cov():
    '''Generate 2 Gaussians samples with different covariance matrices'''
    n, dim = 300, 2
    np.random.seed(0)
    # C = np.array([[0., -1.], [2.5, .7]]) * 2.
    # X = np.r_[np.dot(np.random.randn(n, dim), C),
    #           np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4])]
    C = np.array([[0., -1.], [2.5, 1.7]]) * 2.   #[[0., -1.], [2.5, 1.7]]) * 2.    + np.array([0,0])] : 0.9
    X = np.r_[np.dot(np.random.randn(n, dim), C),
              np.dot(np.random.randn(n, dim), C.T) + np.array([0, 0])]
    y = np.hstack((np.zeros(n), np.ones(n)))
    return X, y

def plot_hyperplane_data(lda, X, y, X_test, y_pred, y_truth):
    splot = plt.subplot(1, 2, 1)
    if True:
        # print(type(X))
        # print(X.shape)
        plt.title('Training, %i samples' %X.shape[0])
        plt.ylabel('Data with\n fixed covariance')
        X_neg = X[y == 0]
        X_pos = X[y == 1]

        plt.scatter(X_neg[:, 0], X_neg[:, 1], marker='.', color='red')
        plt.scatter(X_pos[:, 0], X_pos[:, 1], marker='.', color='blue')

        # nx, ny = 200, 100
        nx, ny = 600, 300
        plt.xlim(-20, 20)
        plt.ylim(-20, 20)
        x_min, x_max = plt.xlim()
        y_min, y_max = plt.ylim()
        
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                         np.linspace(y_min, y_max, ny))
        Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
        Z = Z[:, 1].reshape(xx.shape)
        plt.pcolormesh(xx, yy, Z, cmap='red_blue_classes',
                   norm=colors.Normalize(0., 1.), zorder=0)
        plt.contour(xx, yy, Z, [0.5], linewidths=2., colors='white')


        xx_train = xx
        yy_train = yy
        Z_train = Z



        # # means
        # plt.plot(lda.means_[0][0], lda.means_[0][1],
        #      '*', color='yellow', markersize=15, markeredgecolor='grey')
        # plt.plot(lda.means_[1][0], lda.means_[1][1],
        #      '*', color='yellow', markersize=15, markeredgecolor='grey')



    if True:
        splot = plt.subplot(1, 2, 2)
        
        plt.ylabel('Data with\n fixed covariance')

        tp = (y_truth == y_pred)  # True Positive
        # print(tp)
        # print(y_truth)
        # print(sum(tp)/len(tp))
        # Acc = sum(tp)/len(tp)*100
        # # print("%.2f/% Accuracy" %Acc)
        # print("{0:.2%} Accuracy".format(sum(tp)/len(tp)))
        # input('....')
        tp0, tp1 = tp[y_truth == 0], tp[y_truth == 1]
        X0, X1 = X_test[y_truth == 0], X_test[y_truth == 1]
        X0_tp, X0_fp = X0[tp0], X0[~tp0]
        X1_tp, X1_fp = X1[tp1], X1[~tp1]

        # class 0: dots
        plt.scatter(X0_tp[:, 0], X0_tp[:, 1], marker='.', color='red')
        plt.scatter(X0_fp[:, 0], X0_fp[:, 1], marker='x',
                s=20, color='black')  # dark red

        # class 1: dots
        plt.scatter(X1_tp[:, 0], X1_tp[:, 1], marker='.', color='blue')
        plt.scatter(X1_fp[:, 0], X1_fp[:, 1], marker='x',
                s=20, color='black')  # dark blue

        # class 0 and 1 : areas
        nx, ny = 200, 100
        plt.xlim(-20, 20)
        plt.ylim(-20, 20)
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                         np.linspace(y_min, y_max, ny))
        Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
        Z = Z[:, 1].reshape(xx.shape)
        plt.pcolormesh(xx_train, yy_train, Z_train, cmap='red_blue_classes',
                   norm=colors.Normalize(0., 1.), zorder=0)
        plt.contour(xx_train, yy_train, Z_train, [0.5], linewidths=2., colors='white')

        plt.title('Testing, ' "{0:.2%} Accuracy".format(sum(tp)/len(tp)))

        TPs = mlines.Line2D([], [], color='blue', marker='.', linestyle='None',
                          markersize=5, label='True Positive')
        TNs = mlines.Line2D([], [], color='red', marker='.', linestyle='None',
                          markersize=5, label='True Negatives')
        misclassified = mlines.Line2D([], [], color='black', marker='x', linestyle='None',
                          markersize=5, label='missclassified')
        plt.legend(handles=[TPs, TNs, misclassified],loc='lower right')

        # means
        # plt.plot(lda.means_[0][0], lda.means_[0][1],
        #      '*', color='yellow', markersize=15, markeredgecolor='grey')
        # plt.plot(lda.means_[1][0], lda.means_[1][1],
        #      '*', color='yellow', markersize=15, markeredgecolor='grey')


    

    
    return splot, sum(tp)/len(tp)

In [3]:
def plot_ellipse(splot, mean, cov, color):
    v, w = linalg.eigh(cov)
    u = w[0] / linalg.norm(w[0])
    angle = np.arctan(u[1] / u[0])
    angle = 180 * angle / np.pi  # convert to degrees
    # filled Gaussian at 2 standard deviation
    ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
                              180 + angle, facecolor=color,
                              edgecolor='black', linewidth=2)
    ell.set_clip_box(splot.bbox)
    ell.set_alpha(0.2)
    splot.add_artist(ell)
    splot.set_xticks(())
    splot.set_yticks(())


def plot_lda_cov(lda, splot):
    plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red')
    plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue')


def plot_qda_cov(qda, splot):
    plot_ellipse(splot, qda.means_[0], qda.covariance_[0], 'red')
    plot_ellipse(splot, qda.means_[1], qda.covariance_[1], 'blue')


In [4]:
def plot_raw_data(X, y, X_test, y_test):
    splot = plt.subplot(1, 2, 1)
    if True:

        plt.title('Training, %i samples' %X.shape[0])
        plt.ylabel('Data with\n fixed covariance')
        X_neg = X[y == 0]
        X_pos = X[y == 1]

        plt.scatter(X_neg[:, 0], X_neg[:, 1], marker='.', color='red')
        plt.scatter(X_pos[:, 0], X_pos[:, 1], marker='.', color='blue')

        # nx, ny = 200, 100
        nx, ny = 600, 300
        plt.xlim(-20, 20)
        plt.ylim(-20, 20)
        x_min, x_max = plt.xlim()
        y_min, y_max = plt.ylim()
        
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                         np.linspace(y_min, y_max, ny))

        TPs = mlines.Line2D([], [], color='blue', marker='.', linestyle='None',
                          markersize=5, label='Positives')
        TNs = mlines.Line2D([], [], color='red', marker='.', linestyle='None',
                          markersize=5, label='Negatives')
        
        plt.legend(handles=[TPs, TNs],loc='lower right')

    if True:
        splot = plt.subplot(1, 2, 2)
        
        plt.title('Testing, %i samples' %X_test.shape[0])
        plt.ylabel('Data with\n fixed covariance')
        X_neg = X_test[y_test == 0]
        X_pos = X_test[y_test == 1]

        plt.scatter(X_neg[:, 0], X_neg[:, 1], marker='.', color='red')
        plt.scatter(X_pos[:, 0], X_pos[:, 1], marker='.', color='blue')

        # nx, ny = 200, 100
        nx, ny = 200, 100
        plt.xlim(-20, 20)
        plt.ylim(-20, 20)
        x_min, x_max = plt.xlim()
        y_min, y_max = plt.ylim()
        
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                         np.linspace(y_min, y_max, ny))

        TPs = mlines.Line2D([], [], color='blue', marker='.', linestyle='None',
                          markersize=5, label='Positives')
        TNs = mlines.Line2D([], [], color='red', marker='.', linestyle='None',
                          markersize=5, label='Negatives')
        
        plt.legend(handles=[TPs, TNs],loc='lower right')
   
    return plt


In [None]:
print('hello world')