In [1]:
from sklearn.datasets import load_iris

iris = load_iris()
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [None]:
from sklearn.model_selection import train_test_split

# Leemos conjunto de ejemplos
X = iris.data
print(X[:5])
# Leemos valores de clase para cada ejemplo
y = iris.target
print(y[:5])
# Separamos el dataset en dos: entrenamiento y evaluación
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

In [None]:
from sklearn.svm import SVC

# Clasificación SVM
# Definición del clasificador
svm_classifier = SVC(kernel="linear")
# Entrenamiento del clasificador con lo datos de entrenamiento y valores de clase para cada ejemplo
svm_classifier.fit(X_train, y_train)
# Predicción con el clasificador entrenado en los datos de evaluación 
y_predict = svm_classifier.predict(X_test)
y_predict

In [None]:
# Medidas de rendimiento del clasificador
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

print("Accuracy: {}".format(accuracy_score(y_test, y_predict)))
print("Precision: {}".format(precision_score(y_test, y_predict, average="macro")))
print("Recall: {}".format(recall_score(y_test, y_predict, average="macro")))
print("F-score: {}".format(f1_score(y_test, y_predict, average="macro")))

In [None]:
from sklearn.metrics import classification_report

target_names = ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica']
print(classification_report(y_test, y_predict, target_names=target_names))

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix

print(confusion_matrix(y_test, y_predict))
plot_confusion_matrix(svm_classifier, X_test, y_test, cmap=plt.cm.Blues, 
                      display_labels=['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica']) 

In [None]:
from sklearn.manifold import TSNE
X_test_embedded = TSNE(n_components=2).fit_transform(X_test)
X_test_embedded.shape

In [None]:
import pandas as pd
import seaborn as sns

plt.figure()
tsne_result_df = pd.DataFrame({'tsne_1': X_test_embedded[:,0], 'tsne_2': X_test_embedded[:,1], 'label': y_test})
sns.scatterplot(x='tsne_1', y='tsne_2', hue='label', data=tsne_result_df)

In [None]:
# Predicción de ejemplo y probabilidades para las clases
y_predict = svm_classifier.predict([[10.0, 5.0, 7.8, 0.1]])
print(y_predict)

# Probabilidades para las clases
prob = svm_classifier.predict_proba([[10.0, 5.0, 7.8, 0.1]])
print(prob)

In [None]:
# Número de vectores de soporte por clase
print(svm_classifier.n_support_)

# Índices de vectores de soporte  
print(svm_classifier.support_)

# Vectores de soporte
print(svm_classifier.support_vectors_)

In [None]:
# Hiperparámetros del modelo SVM
svm_classifier.get_params()

In [2]:
print(iris)

{'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
     