### Machine Learning Lab06 Exercise: LDA-based Handwritten Number Recognition

### Exercise 1 Linear Discriminant Analysis from Scratch using numpy (50 points )
- <font size=4> Complete the missing code in the LDA class below<font>

In [3]:
import numpy as np
import matplotlib.pyplot as plt


class LDA:
    # n_components:Number of components (<= min(n_classes - 1, n_features)) for dimensionality reduction.
    def __init__(self, n_components=None):
        self.n_components = n_components
        self.eigenvalues = None
        self.eigenvectors = None

    def fit(self, X, y):
        if self.n_components is None or self.n_components > X.shape[1]:
            n_components = X.shape[1]
        else:
            n_components = self.n_components

        n_features = np.shape(X)[1]
        labels = np.unique(y)

        u_k = np.array([np.mean(X[y == label], axis=0) for label in labels])

        # Calculate within-class scatter matrix S_W
        S_W = np.zeros((n_features, n_features))
        for label in labels:
            class_data = X[y == label]
            diff = class_data - u_k[label]
            S_W += diff @ diff.T

        # Calculate between-class scatter matrix S_B
        u = np.mean(X, axis=0)
        S_B = np.zeros((n_features, n_features))
        for label in labels:
            n = class_data.shape[0]
            diff = u_k[label] - u
            S_B += n * (diff @ diff.T)

        # Calculate SW^-1 * SB
        S_W_inv = np.linalg.inv(S_W)
        SW_SB = np.dot(S_W_inv, S_B)

        # Get eigenvalues and eigenvectors of SW^-1 * SB
        eigenvalues, eigenvectors = np.linalg.eig(SW_SB)

        # Sort the eigenvalues and corresponding eigenvectors
        idx = eigenvalues.argsort()[::-1]
        eigenvalues = eigenvalues[idx][:n_components]
        eigenvectors = eigenvectors[:, idx][:, :n_components]

        self.eigenvalues = eigenvalues
        self.eigenvectors = eigenvectors

    def fit_transform(self, X):
        if self.eigenvectors is not None:
            return X @ self.eigenvectors
        else:
            raise ValueError("You must call fit() before fit_transform()")

    def transform(self, X):
        if self.eigenvectors is not None:
            return X @ self.eigenvectors
        else:
            raise ValueError("You must call fit() before transform()")

- <font size=4> Dimensionality reduction visualization<font>

In [4]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets

np.random.seed(5)


iris = datasets.load_iris()
X = iris.data
y = iris.target
pca = LDA(n_components=3)
pca.fit(X, y)
X = pca.transform(X)

fig = plt.figure(1, figsize=(4, 3))
ax = fig.add_subplot(111, projection="3d", elev=48, azim=134)
ax.set_position([0, 0, 0.95, 1])
for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]:    
    ax.text3D(
        X[y == label, 0].mean(),
        X[y == label, 1].mean(),
        X[y == label, 2].mean() + 2,
        name,
        horizontalalignment="center",
        bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
    )
# Reorder the labels to have colors matching the cluster results
y = np.choose(y, [1, 2, 0]).astype(float)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, 
        edgecolor='k')

ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])

ax.set_xlabel("Petal width")
ax.set_ylabel("Sepal length")
ax.set_zlabel("Petal length")
# ax.set_title("Ground Truth")
ax.dist = 12

plt.show()

ValueError: operands could not be broadcast together with shapes (4,4) (50,50) (4,4) 

### Exercise 2 Recognize handwritten numbers with LDA (50 points ). 

Your task in this section is to recognize handwritten numbers, and you can use the linear discriminant analysis model from the Scikit-Learn library to fit, evaluate, and predict them.

<font color='red' size=4>Note that your accuracy in this section will directly determine your score.</font>

### Loading the MNIST Dataset


In [None]:
########### Write Your Code Here ###########

############################################

###  Visualization of MINIST

In [None]:
# Visualize MINIST
########### Write Your Code Here ###########

############################################

### Fit  Model

In [2]:
########### Write Your Code Here ###########

############################################

## Evaluate model

In [3]:
########### Write Your Code Here ###########

############################################


### Exercise 3: Qustions
- Q1:What are the advantages and disadvantages of LDA?


- Q2 LDA can be used for dimensionality reduction, so can PCA. Please tell the difference between them?