# Chapter 3 - Classification
The following notebook consists of different code examples for graphs seen in Chapter 3.

In [1]:
import sys
sys.path.append("../")
from utils import *

## Model Evaluations

In [2]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

models = [LogisticRegression(), SVC(kernel='rbf', probability=True), KNeighborsClassifier(n_neighbors=4)]
model_names = [r"$\text{Logistic Regression}$", r"$\text{Gaussian Kernel SVM}$", r"$k-\text{NN}$"]

from sklearn.datasets import make_moons
X, y = make_moons(n_samples=500, noise=0.2, random_state=1)

In [3]:
lims = np.array([X.min(axis=0), X.max(axis=0)]).T + np.array([-.2, .2])

fig = make_subplots(rows=1, cols=3, subplot_titles=model_names, horizontal_spacing = 0.01)
for i, m in enumerate(models):
    fig.add_traces([decision_surface(m.fit(X, y).predict, lims[0], lims[1], showscale=False),
                    go.Scatter(x=X[:,0], y=X[:,1], mode="markers", showlegend=False,
                               marker=dict(color=y, symbol=class_symbols[y], colorscale=class_colors(3), 
                                           line=dict(color="black", width=1)))], 
                   rows=1, cols=i+1)

fig.update_layout(width=1000, height=300).update_xaxes(visible=False).update_yaxes(visible=False)
fig.write_image(f"../figures/decision_boundary.png")
fig.show()

FileNotFoundError: [Errno 2] No such file or directory: '../figures/decision_boundary.png'

In [None]:
from sklearn import metrics

fig = go.Figure(layout=go.Layout(title=r"$\text{ROC Curves Of Models - Moons Dataset}$", 
                                 xaxis=dict(title=r"$\text{False Positive Rate (FPR)}$"),
                                 yaxis=dict(title=r"$\text{True Positive Rate (TPR)}$")),
                data=[go.Scatter(x=[0,1], y=[0,1], mode="lines", showlegend=False, line_color="black", line_dash='dash')])

for i, model in enumerate(models):
    fpr, tpr, th = metrics.roc_curve(y, model.predict_proba(X)[:, 1])
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=model_names[i], text=th,
                             hovertemplate="<b>Threshold:</b>%{text:.3f}<br>FPR: %{x:.3f}<br>TPR: %{y:.3f}"))

    
fig.update_layout(width=800, height=500, yaxis=dict(range=[0,1.1]))
fig.write_image("../figures/roc.png")
fig.show()

## Perceptron

In [None]:
from sklearn.datasets import make_classification, make_blobs
np.random.seed(9)

def create_linearly_separable_dataset(n=30, scale=10):
    # Create a dataset that is linearly separable
    separable = False
    while not separable:
        samples = make_classification(n_samples=n, n_features=2, n_informative=2, n_repeated = 0, 
                                  n_redundant=0,n_clusters_per_class=1, class_sep = 0.5, scale=scale)
        red, blue = samples[0][samples[1] == 0], samples[0][samples[1] == 1]
        separable = any([red[:, k].max() < blue[:, k].min() or red[:, k].min() > blue[:, k].max() for k in range(2)])
        
    X, y = samples[0], samples[1]
    
    # Make sure that the first two samples are from both classes
    idx = np.array([np.where(y == 0)[0][0], np.where(y == 1)[0][0]])
    msk = np.array([False]*X.shape[0])
    msk[idx] = True
    return np.r_[X[msk], X[~msk]], np.r_[y[msk], y[~msk]]

X, y = create_linearly_separable_dataset(16)

In [None]:
from sklearn.linear_model import Perceptron as Perceptron
lim = np.array([X.min(axis=0), X.max(axis=0)]).T + np.array([-.5,.5])

frames = []
per = Perceptron()
per.partial_fit([X[0]], [y[0]], np.unique(y))

rnd = 1
while per.score(X, y) != 1:
    for i in range(X.shape[0]):
        # Perform another fitting over new sample
        per.partial_fit([X[i]], [y[i]], np.unique(y))

        # Get Perceptron separator
        w = per.coef_[0]
        yy = (-w[0] / w[1]) * lim[0] - (per.intercept_[0] / w[1])

        # Create animation frame
        frames.append(go.Frame(
            data = [
                go.Scatter(x = X[:,0], y=X[:, 1], mode = 'markers', showlegend=False,
                           marker = dict(size = 10, color = y, line=dict(color="black", width=1),
                                         symbol=class_symbols[y], colorscale=class_colors(2), opacity = [1]*(i+1) + [0.2]*(X.shape[0] - i-1))),
                go.Scatter(x = lim[0], y = [yy[0], yy[1]], mode = 'lines', line_color="black", showlegend=False)],
            traces=[0, 1],
            layout = go.Layout(title=rf"$\text{{Perceptron Fit - Round {rnd} After {i+1} Samples}}$")))
    rnd += 1


fig = go.Figure(data=frames[0]["data"],
                frames=frames,
                layout = go.Layout(
                    title=frames[0]["layout"]["title"],
                    xaxis=dict(range=lim[0], autorange=False),
                    yaxis=dict(range=lim[1], autorange=False),
                    updatemenus=[dict(type="buttons", buttons=[AnimationButtons.play(frame_duration=100), AnimationButtons.pause()])]))


animation_to_gif(fig, "../figures/perceptron_fit.gif", 500, width=700, height=700) 
fig.show()

## Generative Models - Simulation and Decision Boundaries

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA

# Generate dataset according to LDA/QDA generative model
mu = np.array([[0, 0], [2.5, 5], [5, 0]])
pi = [.33, .34, .33]

learner = "QDA"
if learner == "LDA":
    cov = np.repeat(np.array([[.5, 0], [0, .5]])[None,:, :], 3, axis=0)
    model = LDA()
else:
    cov = np.array([[[.5, 0], [0, .5]], [[.5, 0], [0, .5]], [[1, 0], [0, .5]],])
    model = QDA()

# y = np.random.binomial(n=1, p=pi, size=500)
y = np.random.choice([0,1,2], size=500, p=pi)
X = np.array([np.random.multivariate_normal(mu[yi], cov[yi]) for yi in y])


# Plotting dataset and LAD decision boundaries
lims = np.array([X.min(axis=0), X.max(axis=0)]).T + np.array([-.5, .5])


fig = go.Figure([
    decision_surface(model.fit(X, y).predict, lims[0], lims[1], showscale=False, colorscale=class_colors(3), density=300),
    go.Scatter(x=X[:,0], y=X[:,1], mode="markers", showlegend=False, 
               marker=dict(color=y, symbol=class_symbols[y], colorscale=class_colors(3), 
                           line=dict(color="black", width=1)))],
                layout=go.Layout(xaxis=dict(range=lims[0], autorange=False, visible=False),
                                 yaxis=dict(range=lims[1], autorange=False, visible=False),
                                 width=600, height=400,
                                 title=rf"$\text{{Multi-class {learner} Decision Boundary}}$"))
fig.write_image(f"../figures/{learner}_decision_boundary.png")
fig.show()

## Decision Boundaries and Accuracy As Function Of Complexity

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

classifier, params, title, name = {
    "knn": (lambda k: KNeighborsClassifier(n_neighbors=k),
            list(range(1, 41, 2)),
            "Fitting k-NN Classifier - Number of Neighbors",
            "KNN"),
    "decision_tree": (lambda d: DecisionTreeClassifier(max_depth=d, random_state=42),
                      list(range(1, 21)),
                      "Fitting Decision Tree Classifier - Max Depth",
                      "Decision Tree")}["knn"] # "decision_tree"


# Generate data and split into train and test sets
np.random.seed(1)
X, y = make_moons(n_samples=300, noise=0.4, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y)


# Set variables used to create plots
lims = np.array([X.min(axis=0), X.max(axis=0)]).T + np.array([-.2, .2])
scatter_settings = [("Train - Class 0", X_train[y_train == 0,:], class_colors(2)[0][1], f"{class_symbols[0]}-open"),
                    ("Train - Class 1", X_train[y_train == 1,:], class_colors(2)[1][1], f"{class_symbols[1]}-open"),
                    ("Test  - Class 0", X_test[y_test == 0,:],   class_colors(2)[0][1], f"{class_symbols[0]}"),
                    ("Test  - Class 1", X_test[y_test == 1,:],   class_colors(2)[1][1], f"{class_symbols[1]}")]


# Run model for different parameters (governing complexity) and create animation frames
frames, train_error, test_error = [], [], []
for i, k in enumerate(params):
    # Fit model
    m = classifier(k).fit(X_train, y_train)
  
    # Evaluate over data sets
    train_error.append(1 - np.mean(m.predict(X_train) == y_train))
    test_error.append( 1 - np.mean(m.predict(X_test)  == y_test))

    # Create scatter objects of train/test data of different classes
    scatters = [
        go.Scatter(x=data[:,0], y=data[:,1], mode="markers", name=name, showlegend=True,
                   marker=dict(color=color, symbol=symbol, line=dict(color="black", width=.75)))
        for (name, data, color, symbol) in scatter_settings]
    
    
    # Create animation frame
    frames.append(go.Frame(
        data=[decision_surface(m.fit(X_train, y_train).predict, lims[0], lims[1], showscale=False)] + 
             scatters + 
             [go.Scatter(x=params[:i], y=train_error[:i], name="Train Error", xaxis="x2", yaxis="y2", marker_color="black"),
              go.Scatter(x=params[:i], y=test_error[:i], name="Test Error", xaxis="x2", yaxis="y2", marker_color="red")],
        layout=go.Layout(title=rf"$\text{{{title}}}={k}$"),
        traces=[0,1,2,3,4,5,6]))

    
# Create figure and populate with frames defined above
fig = make_subplots(rows=1, cols=2, 
                    subplot_titles=(r"$\text{Decisions Boundaries}$", r"$\text{Accuracy}$"),
                    horizontal_spacing=0.1)\
    .add_traces(data=frames[0]["data"], rows=[1]*7, cols=[1]*5 + [2,2])\
    .update(frames=frames)\
    .update_layout(updatemenus = [dict(type="buttons", buttons=[AnimationButtons.play(), AnimationButtons.pause()])],
                   width=1100, height=500, margin=dict(t=80), 
                   title=frames[0]["layout"]["title"])

fig.update_yaxes(visible=False, row=1, col=1)\
    .update_xaxes(visible=False, row=1, col=1)
fig.update_yaxes(range=[-0.05, max(train_error+test_error)+.1], row=1, col=2)\
    .update_xaxes(range=[params[0], params[-1]], row=1, col=2)


animation_to_gif(fig, f"../figures/{name}_animation.gif", 1000, width=1000, height=500) 
fig.show()