In [1]:
import numpy as np  
import pandas as pd  

In [2]:
# Importing the Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"  
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']  
dataset = pd.read_csv(url, names=names)  

In [3]:
X = dataset.iloc[:, 0:4].values  
y = dataset.iloc[:, 4].values  

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

In [5]:
# Feature Scaling
# As was the case with PCA, we need to perform feature scaling for LDA too. 

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()  

X_train = sc.fit_transform(X_train)  
X_test  = sc.transform(X_test)  

In [65]:
# Performing LDA
# It requires only four lines of code to perform LDA with Scikit-Learn. 
# The LinearDiscriminantAnalysis class of the sklearn.discriminant_analysis 

In [6]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components=2)  

X_train = lda.fit_transform(X_train, y_train)  
X_test  = lda.transform(X_test)  

the LinearDiscriminantAnalysis class is imported as LDA. 

Like PCA, we have to pass the value for the n_components parameter of the LDA, 
which refers to the number of linear discriminates that we want to retrieve. 

In this case we set the n_components to 1, since we first want to check the performance of 
our classifier with a single linear discriminant. 

Finally we execute the fit and transform methods to actually retrieve the linear discriminants.

Notice, in case of LDA, the transform method takes two parameters: 

the X_train and the y_train. 

However in the case of PCA, the transform method only requires one parameter i.e. X_train. 

This reflects the fact that LDA takes the output class labels into account while 
selecting the linear discriminants, while PCA doesn't depend upon the output labels.

In [7]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(n_estimators=100, random_state=0)

classifier.fit(X_train, y_train)  
y_pred = classifier.predict(X_test)  

In [8]:
from sklearn.metrics import confusion_matrix  
from sklearn.metrics import accuracy_score

cm = confusion_matrix(y_test, y_pred)  
print(cm) 

print('Accuracy ' + str(accuracy_score(y_test, y_pred)))  

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
Accuracy 1.0


In [69]:
# with one linear discriminant, the algorithm achieved an accuracy of 100%, 
# which is greater than the accuracy achieved with one principal component, which was 93.33%.