In [4]:
import numpy as np
from numpy.linalg import inv
import matplotlib.pyplot as plt

%matplotlib inline

In [222]:
# least square

In [279]:
class DiscriminantModel:
    pass

class LeastSquares(DiscriminantModel):
    def __init__(self):
        pass
        
    def setUp(self, X, y):
        self.classes = np.unique(y)
        self.K = len(self.classes)
        
        
        idx = 0
        self.N,self.p = X.shape
        new_col = np.array([1]*self.N)
        self.X = np.insert(X, idx, new_col, axis=1)
        
        self.T = np.zeros((self.K,len(y)))
        for i,c in enumerate(self.classes):
            self.T[i] = np.array(Y == self.classes[i],int)
        
    def fit(self, X, y):
        self.setUp(X,y)
        
        first = inv(np.matmul(self.X.T,self.X))
        second = np.matmul(self.X.T, self.T.T)
        self.W = np.matmul(first, second).T
        
    def loss(self):
        diff = np.matmul(self.W,self.X.T) - T
        E = np.trace(np.matmul(diff, diff.T))/2
        return E
    
    
    def predict_discriminant_function(self, X):
        
        n_w, p_w  = self.W.shape
        n_x, p_x = X.shape
        if p_x != p_w:
            idx = 0
            new_col = np.array([1]*n_x)
            X = np.insert(X, idx, new_col, axis=1)
        return np.matmul(self.W,X.T).T
        
    def predict(self,X):

        disc = self.predict_discriminant_function(X)
        return np.argmax(disc,1)
    
class Perceptron:
    """
    Binary case
    """
    def __init__(self):
        pass
    
    def setUp(self, X, y):
        self.classes = np.unique(y)
        self.K = len(self.classes)
        
        self.y = y
        idx = 0
        self.N,self.p = X.shape
        new_col = np.array([1]*self.N)
        self.X = np.insert(X, idx, new_col, axis=1)
        self.T = self.y - np.array(self.y == 0,int)
        
        self.W = self.X.mean(0)
        
    def _find_misclassifications(self):
        res = np.array(np.matmul(self.W,self.X.T) >= 0,int) - np.array(np.matmul(self.W,self.X.T) < 0,int)
        misclassified_indices = np.where(res != self.T) 
        return misclassified_indices
        
    def loss(self):
        misclassified_indices = self._find_misclassifications()
        X_part = np.matmul(self.W,self.X[misclassified_indices].T)
        T_part = self.T[misclassified_indices]

        return -np.sum(np.multiply(X_part, T_part))
    
    def fit(self, X, y, learning_rate = 0.001, max_iterations = 10000, print_every = 1000, tolerance = 10e-8,\
           verbose = True):
        self.setUp(X,y)
        current_W = self.W
        for _ in range(max_iterations):

            misclassified_indices = self._find_misclassifications()
            X_part = self.X[misclassified_indices]
            T_part = self.T[misclassified_indices]
            self.W = self.W + learning_rate*np.matmul(X_part.T, T_part)
            loss = self.loss()
            if _%print_every==0:
                if verbose:
                    print(loss)
            if abs(np.sum(self.W - current_W)) < tolerance:
                if verbose:
                    print('Tolerance reached at {} with inner product difference {}'.\
                          format(self.W, abs(np.sum(self.W - current_W))))
                break
            
            if loss == 0:
                if verbose:
                    print('Loss = 0 reached')
                break
                
    def predict_discriminant(self, X):
        n_w  = self.W.shape
        n_x, p_x = X.shape
        if p_x != n_w:
            idx = 0
            new_col = np.array([1]*n_x)
            X = np.insert(X, idx, new_col, axis=1)
        return np.matmul(self.W,X.T)    

        
    def predict(self,X):
        disc = self.predict_discriminant(X)
        return np.array(disc > 0,int)
    
    
class DiscriminantAnalysis:
    def __init__(self, alpha = 1):
        # alpha = 1 : full QDA
        # alpha = 0 : LDA (pooled covariance matrices)
        self.alpha = alpha
        #alpha preset to one so that we have quadratic discriminant
            
    def setUp(self, X, y):
        self.y = y
        self.X = X
        self.classes = np.unique(self.y)
        
        if (np.sort(self.classes) != np.arange(len(self.classes))).all():
            raise ValueError('Please make class labels increasing integers')
        
        self.K = len(self.classes)
        self.n, self.p = self.X.shape
        
    def _compute_k_data(self):
        self.prior = {}
        self.means = {}
        self.covariances = {}
        self.Nk = {}

        for k in self.classes:
            idx = np.where(Y == k)
            X_temp = X[idx]
            self.Nk[k] = X_temp.shape[0]
            self.prior[k] = self.Nk[k]/self.n
            self.means[k] = X_temp.mean(0)
            self.covariances[k] = np.cov(X_temp.T)
            
    def _compute_pooled_covariance(self):
        self.pooled_covariance = 0
        for k,v in self.covariances.items():
            self.pooled_covariance+=v*self.n*self.prior[k]
        self.pooled_covariance*=(1/(self.n-self.K))
        
    def _compute_reg_covariance(self):
        for k in self.covariances.keys():
            self.covariances[k] = self.alpha*self.covariances[k] +  (1-self.alpha)*self.pooled_covariance
        
    def fit(self, X, y):
        self.setUp(X,y)
        self._compute_k_data()
        self._compute_pooled_covariance()
        self._compute_reg_covariance()
        
    def _delta(self,k,x):
        first = 0.5*np.log(np.linalg.det(self.covariances[k]))
        diff = (x - self.means[k])
        second = 0.5*np.sum(diff*np.matmul(diff, np.linalg.inv(self.covariances[k])),1)
        third = np.log(self.prior[k])

        return -first - second + third
        
    def predict_discriminant(self, X):
        n,p = X.shape
        delta = np.zeros((n,self.K))
        
        for k in self.classes:
            delta[:,k] = self._delta(k,X)
        
        return delta
    
    def predict(self, X):
        return np.argmax(self.predict_discriminant(X),1)
    


In [280]:
# generate data with many classes:
def generate_data(classes, method = 'hard'):
    X = np.random.uniform(-1,1,(1000,2))

    # random function
    if method == 'hard':
        Y = np.sin(X)**2 + np.tanh(X)
    else:
        Y = X
    Y = np.sum(Y,1)
    K = np.arange(classes)
    


    boundaries = [np.quantile(Y,i/classes) for i in range(1,classes)]
    boundaries = [np.min(Y)] + boundaries + [np.max(Y) + 10-5]
    Y_class = []
    for y in Y:
        for i in range(0,len(boundaries)-1):
            if (boundaries[i] <= y) and (y < boundaries[i+1]):
                Y_class.append(i)
                break
    Y = np.array(Y_class)
    return X, Y

In [281]:
# Least squares

In [282]:
X,Y = generate_data(2, 'easy')

model = LeastSquares()
model.fit(X,Y)
np.sum(model.predict(X) == Y)/len(Y)

0.995

In [283]:
X,Y = generate_data(3, 'hard')

model = LeastSquares()
model.fit(X,Y)
np.sum(model.predict(X) == Y)/len(Y)

0.643

In [284]:
# Perceptron (only for the binary case)

X,Y = generate_data(2, 'easy')
model = Perceptron()
model.fit(X,Y,verbose = False)
np.sum(model.predict(X) == Y)/len(Y)

1.0

In [285]:
X,Y = generate_data(2, 'hard')
model = Perceptron()
model.fit(X,Y,verbose = False)
np.sum(model.predict(X) == Y)/len(Y)

0.863

In [286]:
# DA

X,Y = generate_data(2, 'easy')

alpha_grid = np.linspace(0,1,20)
result = {}

for alpha in alpha_grid:
    model = DiscriminantAnalysis(alpha = alpha)
    model.fit(X,Y)
    result[alpha] = np.sum(model.predict(X) == Y)/len(Y)
    
result

{0.0: 0.994,
 0.05263157894736842: 0.993,
 0.10526315789473684: 0.993,
 0.15789473684210525: 0.992,
 0.21052631578947367: 0.992,
 0.2631578947368421: 0.992,
 0.3157894736842105: 0.992,
 0.3684210526315789: 0.992,
 0.42105263157894735: 0.992,
 0.47368421052631576: 0.992,
 0.5263157894736842: 0.992,
 0.5789473684210527: 0.992,
 0.631578947368421: 0.992,
 0.6842105263157894: 0.992,
 0.7368421052631579: 0.992,
 0.7894736842105263: 0.992,
 0.8421052631578947: 0.991,
 0.894736842105263: 0.991,
 0.9473684210526315: 0.991,
 1.0: 0.991}

In [288]:
X,Y = generate_data(3, 'hard')

alpha_grid = np.linspace(0,1,20)
result = {}

for alpha in alpha_grid:
    model = DiscriminantAnalysis(alpha = alpha)
    model.fit(X,Y)
    result[alpha] = np.sum(model.predict(X) == Y)/len(Y)
result

{0.0: 0.741,
 0.05263157894736842: 0.748,
 0.10526315789473684: 0.75,
 0.15789473684210525: 0.751,
 0.21052631578947367: 0.755,
 0.2631578947368421: 0.757,
 0.3157894736842105: 0.759,
 0.3684210526315789: 0.76,
 0.42105263157894735: 0.76,
 0.47368421052631576: 0.764,
 0.5263157894736842: 0.764,
 0.5789473684210527: 0.766,
 0.631578947368421: 0.771,
 0.6842105263157894: 0.772,
 0.7368421052631579: 0.775,
 0.7894736842105263: 0.778,
 0.8421052631578947: 0.78,
 0.894736842105263: 0.784,
 0.9473684210526315: 0.787,
 1.0: 0.792}