# ML Assignment 1 : Fischer’s Linear Discriminant
### Anirudh Agrawal: 2018A7PS0099H | Aviral Agarwal: 2018A7PS0192H | Vikramjeet Das: 2018A7PS0280H

In [1]:
import numpy as np
import pandas as pd
import random
import math
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm

%matplotlib inline

In [2]:
ds = pd.read_csv('dataset_FLD.csv', header=None)
X = ds[[0,1,2]]
y = ds[3]

In [3]:
class LDA():
    
    def __init__(self):
        self.w = None
        self.threshold = None
        self.less_class = None
        self.more_class = None
        self.X_transformed0 = None
        self.X_transformed1 = None
    
    def fit(self, X, y):
        
        X0 = X[y == 0]
        X1 = X[y == 1]
        
        mean0 = X0.mean(axis=0)
        mean1 = X1.mean(axis=0)
        diff_means = mean0 - mean1
        
        N = np.shape(X)[0]
        
        sw0 = np.array((1 / N) * (X0 - X0.mean(axis=0)).T.dot(X0 - X0.mean(axis=0)))
        sw1 = np.array((1 / N) * (X1 - X1.mean(axis=0)).T.dot(X1 - X1.mean(axis=0)))
        sw = sw0 + sw1
        
        self.w = np.linalg.pinv(sw).dot(diff_means)
        X_t = self.transform(X)
        self.fit_normal_dist(X, y)
        
    def fit_transform(self, X, y):
        self.fit(X, y)
        X_transform = self.transform(X)
        return X_transform
    
    def transform(self, X):
        # Project data onto vector
        X_transform = X.dot(self.w)
        return X_transform

    def fit_normal_dist(self, X, y):
        X_transformed = self.transform(X)
        self.X_transformed0 = X_transformed[y == 0]
        self.X_transformed1 = X_transformed[y == 1]

        mean_transformed0 = self.X_transformed0.mean(axis=0)
        mean_transformed1 = self.X_transformed1.mean(axis=0)

        variation_transformed0 = np.var(self.X_transformed0)
        variation_transformed1 = np.var(self.X_transformed1)

        a = 0.5 * ((1/variation_transformed0) - (1/variation_transformed1))
        b = 2 * ((mean_transformed1/variation_transformed1) - (mean_transformed0/variation_transformed0))
        c = (((mean_transformed0 ** 2) / variation_transformed0) - ((mean_transformed1 ** 2) / variation_transformed1)) + np.log(variation_transformed0 / variation_transformed1)

        D = np.sqrt((b ** 2) - (4 * a * c))

        root1 = ((-b) + D) / (2 * a)
        root2 = ((-b) - D) / (2 * a) 

        if((root1 > mean_transformed1 and root1 < mean_transformed0) or (root1 > mean_transformed0 and root1 < mean_transformed1)):
            self.threshold = root1
        elif((root2 > mean_transformed1 and root2 < mean_transformed0) or (root2 > mean_transformed0 and root2 < mean_transformed1)):
            self.threshold = root2

        if((mean_transformed0 <= self.threshold) and (mean_transformed1 >= self.threshold)):
            self.less_class = 0
            self.more_class = 1
        if((mean_transformed1 <= self.threshold) and (mean_transformed0 >= self.threshold)):
            self.less_class = 1
            self.more_class = 0

    def evaluate(self, X, y):
        y_pred = self.transform(X)
        more_idx = np.where(y_pred > self.threshold)[0]
        less_idx = np.where(y_pred <= self.threshold)[0]
        y_pred[more_idx] = self.more_class
        y_pred[less_idx] = self.less_class
        return (np.sum(y == y_pred) / len(y))

In [4]:
l = LDA()
l.fit_transform(X, y)
print(f'Accuracy is: {l.evaluate(X, y)}')

NameError: name 'X_transformed' is not defined

In [None]:
print(f'The discriminant vector in 1-D is {l.threshold} a single point.') # this is the discriminant vector(a single point) in 1-D

In [None]:
def normal_pdf(x, mu, var):
    return (1 / np.sqrt(2 * np.pi * var)) * np.exp(-0.5 * ((x - mu)**2) / var)

In [None]:
from matplotlib.lines import Line2D
fig = plt.figure(figsize=(20, 10))
plt.ylim(-.01, 0.15)

lines = [Line2D([0], [0], color='red', linewidth=3, linestyle='-'),
        Line2D([0], [0], color='blue', linewidth=3, linestyle='-')]
labels = ['0', '1']
plt.legend(lines, labels)

plt.plot(l.X_transformed0, np.zeros_like(l.X_transformed0), 'x', color='red')
plt.plot(l.X_transformed1, np.zeros_like(l.X_transformed1), 'o', color='blue')
x_plot = np.linspace(-50, 50, 10000)
y_0 = normal_pdf(x_plot, np.mean(l.X_transformed0), np.var(l.X_transformed0))
y_1 = normal_pdf(x_plot, np.mean(l.X_transformed1), np.var(l.X_transformed1))
plt.fill_between(x_plot, y_0, color='red', alpha=0.2)
plt.fill_between(x_plot, y_1, color='blue', alpha=0.2)
plt.plot([l.threshold for x in x_plot], x_plot, color='green')

In [None]:
fig = plt.figure(figsize=(20, 10))
ax3D=fig.add_subplot(121, projection='3d')
collection = ax3D.scatter(X[0], X[1], X[2], c=y, vmin=min(y), vmax=max(y),marker='o', cmap=cm.Spectral)

ax3D.view_init(5, 130)

transformation = l.w
x_plot = np.linspace(-10,10,1000)
y_plot = np.linspace(-3,3,1000)

X_plot,Y_plot = np.meshgrid(x_plot,y_plot)
Z_plot = (-transformation[0]*X_plot - transformation[1]*Y_plot + l.threshold) / transformation[2]

ax3D.plot_surface(X_plot, Y_plot, Z_plot, alpha=0.2, color='green')

lines = [Line2D([0], [0], color='red', linewidth=3, linestyle='-'),
        Line2D([0], [0], color='blue', linewidth=3, linestyle='-')]
labels = ['0', '1']
plt.legend(lines, labels)