# Linear Classifiers: An Overview

# Setup

In [44]:
# Import necessary modules
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression

# Load data and split in train and test sets
spam_data = pd.read_csv('spam.txt', header=None)
X_train, X_test, y_train, y_test = train_test_split(spam_data.iloc[:, :-1], 
                                                    spam_data.iloc[:, -1], 
                                                    test_size=0.2,
                                                    random_state=42)

# Linear Discriminant Analysis

In [41]:
lda_model = LinearDiscriminantAnalysis(solver='lsqr')
lda_preds = lda_model.fit(X_train, y_train).predict(X_test)
lda_acc = accuracy_score(y_test, lda_preds)
print('LDA Accuracy: {}'.format(lda_acc))

LDA Accuracy: 0.8816503800217155


# Quadratic Discriminant Analysis

<img src="img/lda_vs_qda.png", width=750, height=750>

In [43]:
qda_model = QuadraticDiscriminantAnalysis()
qda_preds = qda_model.fit(X_train, y_train).predict(X_test)
qda_acc = accuracy_score(y_test,qda_preds)
print('QDA Accuracy: {}'.format(qda_acc))

QDA Accuracy: 0.8371335504885994


# Logistic Regression

In [46]:
logreg_model = LogisticRegression()
logreg_preds = logreg_model.fit(X_train, y_train).predict(X_test)
logreg_acc = accuracy_score(y_test,logreg_preds)
print('Logistic Regression Accuracy: {}'.format(logreg_acc))

Logistic Regression Accuracy: 0.9229098805646037


# Recap & Conclusions

# Sources

1. Hastie, T., Tibshirani, R., & Friedman, J. H. (2009). The elements of statistical learning: data mining, inference, and prediction. 2nd ed. New York: Springer.
2. https://scikit-learn.org/stable/modules/lda_qda.html