# Kernels en SVM

In [None]:
import numpy as np

import matplotlib.pyplot as plt

import sklearn
import sklearn.datasets
import sklearn.linear_model
import sklearn.neural_network

import mlutils
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline


In [None]:
def load_dataset_disks(size, seed=39):    
    def get_sample_disk(from_, to, size):
        length = np.random.uniform(from_, to, size)
        angle = np.pi * np.random.uniform(0, 2, size)
        x = length * np.cos(angle)
        y = length * np.sin(angle)
        return np.array(list(zip(x,y)))

    np.random.seed(seed)
    size1 = int(size / 2)
    size2 = size - size1
    X = np.concatenate((get_sample_disk(0, 1, size1), get_sample_disk(1, 2, size2)))
    Y = np.concatenate((np.ones(size1), -np.ones(size2)))
    return X, Y

# SVMs (kernels)

In [None]:
X, Y = load_dataset_disks(500)
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap=plt.cm.Spectral);

### Probemos utilizar algunos kerneles

#### Sigmoide

$$tanh(\gamma \cdot \langle x \;,\; x^´\rangle + r)$$

In [None]:
clf = sklearn.svm.SVC(kernel="sigmoid", gamma="auto")
clf.fit(X, Y)

In [None]:
mlutils.plot_decision_boundary(lambda x: clf.predict(x), X.T, Y.T)
predictions = clf.predict(X)
print ('Accuracy: %d ' % ((np.sum(Y == predictions))/float(Y.size)*100))

#### RBF 

$$exp(-\gamma \cdot {\| x \; - \; x^´\|}²)$$

In [None]:
clf = sklearn.svm.SVC(kernel="rbf", gamma="auto")
clf.fit(X, Y)

In [None]:
mlutils.plot_decision_boundary(lambda x: clf.predict(x), X.T, Y.T)
predictions = clf.predict(X)
print ('Accuracy: %d ' % ((np.sum(Y == predictions))/float(Y.size)*100))

#### Polinomial

$$(\gamma \cdot \langle x \;,\; x^´\rangle + r)^d$$

In [None]:
clf = sklearn.svm.SVC(kernel="poly", degree=6, gamma="auto")
clf.fit(X, Y);

In [None]:
mlutils.plot_decision_boundary(lambda x: clf.predict(x), X.T, Y.T)
predictions = clf.predict(X)
print ('Accuracy: %d ' % ((np.sum(Y == predictions))/float(Y.size)*100))

In [None]:
sklearn.svm.SVC?

Un ejemplo interesante de uso de SVM: https://www.kaggle.com/pierra/credit-card-dataset-svm-classification

## Usemos conjunto de entrenamiento y de prueba

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

Dividimos el conjunto de datos

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [None]:
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s=50, cmap=plt.cm.Spectral)
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()

In [None]:
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, s=50, cmap=plt.cm.Spectral)
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()

## Reporte de clasificación usando el conjunto de datos de prueba

In [None]:
from IPython.display import Image
Image('./images/confusion_matrix.png',width=500) 

In [None]:
clf = sklearn.svm.SVC(kernel="sigmoid", gamma="auto").fit(X_train, y_train)
print(classification_report(y_test, clf.predict(X_test)))

mlutils.plot_decision_boundary(lambda x: clf.predict(x), X_test.T, y_test.T)

In [None]:
clf = sklearn.svm.SVC(kernel="rbf", gamma="auto", verbose=2).fit(X_train, y_train)
print(classification_report(y_test, clf.predict(X_test)))

mlutils.plot_decision_boundary(lambda x: clf.predict(x), X_test.T, y_test.T)

## Referencias:
 * Scikit: [Support Vector Machines](https://scikit-learn.org/stable/modules/svm.html#support-vector-machines)