<a href="https://colab.research.google.com/github/LongNguyen1984/TimeSeriesWithPython/blob/main/LDAandQDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import confusion_matrix, classification_report, precision_score

%matplotlib inline

In [5]:
df = pd.read_csv('https://raw.githubusercontent.com/JWarmenhoven/ISLR-python/master/Notebooks/Data/Smarket.csv', usecols=range(1,10), index_col=0, parse_dates=True)
df.head()

Unnamed: 0_level_0,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today,Direction
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2001-01-01,0.381,-0.192,-2.624,-1.055,5.01,1.1913,0.959,Up
2001-01-01,0.959,0.381,-0.192,-2.624,-1.055,1.2965,1.032,Up
2001-01-01,1.032,0.959,0.381,-0.192,-2.624,1.4112,-0.623,Down
2001-01-01,-0.623,1.032,0.959,0.381,-0.192,1.276,0.614,Up
2001-01-01,0.614,-0.623,1.032,0.959,0.381,1.2057,0.213,Up


In [26]:
X_train = df[:'2004'][['Lag1', 'Lag2']]
y_train = df[:'2004']['Direction']

X_test = df['2005':][['Lag1', 'Lag2']]
y_test = df['2005':]['Direction']

lda = LinearDiscriminantAnalysis()
model = lda.fit(X_train, y_train)

print(model.priors_)

[0.49198397 0.50801603]


In [9]:
print(model.means_)

[[ 0.04279022  0.03389409]
 [-0.03954635 -0.03132544]]


In [10]:
print(model.coef_)

[[-0.05544078 -0.0443452 ]]


### Check the number of true predictors

In [11]:
pred = model.predict(X_test)
print(np.unique(pred, return_counts=True))

(array(['Down', 'Up'], dtype='<U4'), array([ 70, 182]))


### Confusion matrixes

In [13]:
print(confusion_matrix(pred, y_test))
print(classification_report(y_test, pred, digits=3))

[[ 35  35]
 [ 76 106]]
              precision    recall  f1-score   support

        Down      0.500     0.315     0.387       111
          Up      0.582     0.752     0.656       141

    accuracy                          0.560       252
   macro avg      0.541     0.534     0.522       252
weighted avg      0.546     0.560     0.538       252



## Quadratic Discriminant Analysis

In [14]:
qda = QuadraticDiscriminantAnalysis()
model2 = qda.fit(X_train, y_train)
print(model2.priors_)
print(model2.means_)

[0.49198397 0.50801603]
[[ 0.04279022  0.03389409]
 [-0.03954635 -0.03132544]]


In [15]:
pred2 = model2.predict(X_test)
print(np.unique(pred2, return_counts=True))
print(confusion_matrix(pred2, y_test))
print(classification_report(y_test, pred2, digits=3))

(array(['Down', 'Up'], dtype=object), array([ 50, 202]))
[[ 30  20]
 [ 81 121]]
              precision    recall  f1-score   support

        Down      0.600     0.270     0.373       111
          Up      0.599     0.858     0.706       141

    accuracy                          0.599       252
   macro avg      0.600     0.564     0.539       252
weighted avg      0.599     0.599     0.559       252



## An Application to Carseats Data

In [20]:
df2 = pd.read_csv('https://raw.githubusercontent.com/JWarmenhoven/ISLR-python/master/Notebooks/Data/Carseats.csv', index_col=0)
df2.head(20)

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
1,9.5,138,73,11,276,120,Bad,42,17,Yes,Yes
2,11.22,111,48,16,260,83,Good,65,10,Yes,Yes
3,10.06,113,35,10,269,80,Medium,59,12,Yes,Yes
4,7.4,117,100,4,466,97,Medium,55,14,Yes,Yes
5,4.15,141,64,3,340,128,Bad,38,13,Yes,No
6,10.81,124,113,13,501,72,Bad,78,16,No,Yes
7,6.63,115,105,0,45,108,Medium,71,15,Yes,No
8,11.85,136,81,15,425,120,Good,67,10,Yes,Yes
9,6.54,132,110,0,108,124,Medium,76,10,No,No
10,4.69,132,113,0,131,124,Medium,76,17,No,Yes


In [19]:
df2.describe()

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,Age,Education
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,7.496325,124.975,68.6575,6.635,264.84,115.795,53.3225,13.9
std,2.824115,15.334512,27.986037,6.650364,147.376436,23.676664,16.200297,2.620528
min,0.0,77.0,21.0,0.0,10.0,24.0,25.0,10.0
25%,5.39,115.0,42.75,0.0,139.0,100.0,39.75,12.0
50%,7.49,125.0,69.0,5.0,272.0,117.0,54.5,14.0
75%,9.32,135.0,91.0,12.0,398.5,131.0,66.0,16.0
max,16.27,175.0,120.0,29.0,509.0,191.0,80.0,18.0


In [119]:
X = df2[['Sales','Price', 'Advertising', 'Income']]
y = df2['ShelveLoc']

In [120]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [121]:
model1 = lda.fit(X_train, y_train)
print(model1.priors_)

[0.25       0.21428571 0.53571429]


In [122]:
print(model1.means_)
print(model1.coef_)

[[  5.40814286 114.77142857   6.05714286  71.18571429]
 [ 10.13766667 118.48333333   7.55        65.81666667]
 [  7.45473333 114.85333333   7.          68.97333333]]
[[-6.68750658e-01 -3.89475269e-02  4.93313406e-02  1.18072896e-02]
 [ 8.75182253e-01  5.42982559e-02 -7.38858178e-02 -1.52111463e-02]
 [-3.79892611e-02 -3.54378981e-03  6.53303484e-03  5.74390025e-04]]


In [123]:
pred = model1.predict(X_test)
print(np.unique(pred, return_counts=True))

(array(['Bad', 'Good', 'Medium'], dtype='<U6'), array([21, 21, 78]))


In [124]:
print(confusion_matrix(pred, y_test))
print(classification_report(y_test, pred, digits=3))

[[11  0 10]
 [ 0 16  5]
 [15  9 54]]
              precision    recall  f1-score   support

         Bad      0.524     0.423     0.468        26
        Good      0.762     0.640     0.696        25
      Medium      0.692     0.783     0.735        69

    accuracy                          0.675       120
   macro avg      0.659     0.615     0.633       120
weighted avg      0.670     0.675     0.669       120



### Using Quaratic Discriminate Analysis

In [125]:
model2 = qda.fit(X_train, y_train)
pred2 = model2.predict(X_test)
print(np.unique(pred2, return_counts=True))

(array(['Bad', 'Good', 'Medium'], dtype=object), array([19, 20, 81]))


In [126]:
print(confusion_matrix(pred2, y_test))
print(classification_report(y_test, pred2, digits=3))

[[11  0  8]
 [ 0 17  3]
 [15  8 58]]
              precision    recall  f1-score   support

         Bad      0.579     0.423     0.489        26
        Good      0.850     0.680     0.756        25
      Medium      0.716     0.841     0.773        69

    accuracy                          0.717       120
   macro avg      0.715     0.648     0.673       120
weighted avg      0.714     0.717     0.708       120

