# **Práctica 2**
* **Alumno 1**: Bolinches Segovia, Jorge
* **Alumno 2**: Cerezo Pomykol, Jan
***

### **Carga de datasets.**

In [34]:
import numpy as np
import pandas as pd
import os.path
from sklearn.datasets import load_iris, load_wine, load_breast_cancer, fetch_openml

# Iris
dataset = load_iris()
X_iris = dataset.data
y_iris = dataset.target

# Wine
dataset = load_wine()
X_wine = dataset.data
y_wine = dataset.target

# Cancer
dataset = load_breast_cancer()
X_cancer = dataset.data
y_cancer = dataset.target


# Isolet
# Si existe la base de datos, cargo las variables
if os.path.exists("isolet_X.pickle"):
    X = pd.read_pickle('isolet_X.pickle')
    y = pd.read_pickle('isolet_y.pickle')
else:
    # Cargamos desde internet ( https://www.openml.org ) y la guardamos en el directorio local
    X, y = fetch_openml('isolet', version=1, return_X_y=True, cache=False)
    # Guardamos los datos para no volver a descargarlos
    X.to_pickle("isolet_X.pickle")
    y.to_pickle("isolet_y.pickle")

X_isolet = np.array(X)
y_isolet = pd.factorize(y)[0]

# MNIST
# Si existe la base de datos, cargo las variables
if os.path.exists("mnist_X.pickle"):
    X = pd.read_pickle('mnist_X.pickle')
    y = pd.read_pickle('mnist_y.pickle')
else:
    # Cargamos desde internet ( https://www.openml.org ) y la guardamos en el directorio local
    X, y = fetch_openml('mnist_784', version=1, return_X_y=True, cache=False)
    # Guardamos los datos para no volver a descargarlos
    X.to_pickle("mnist_X.pickle")
    y.to_pickle("mnist_y.pickle")

X_mnist = np.array(X)
y_mnist = pd.factorize(y)[0]

datasets = {"iris": (X_iris, y_iris),
            "wine": (X_wine, y_wine),
            "cancer": (X_cancer, y_cancer),
            "isolet": (X_isolet, y_isolet),
            "mnist": (X_mnist, y_mnist)}

In [19]:
for i in datasets:
    print(i, ":\t", datasets[i][0].shape, sep='')

iris:	(150, 4)
wine:	(178, 13)
cancer:	(569, 30)
isolet:	(7797, 617)
mnist:	(70000, 784)


## **Entrenamiento y evaluación del perceptrón.**

In [21]:
from sklearn.linear_model import Perceptron
from sklearn.model_selection import cross_val_score
from time import time

#### **Parámetros por defecto**

In [28]:
for i in datasets:
    t0 = time()
    clf = Perceptron(random_state=0)
    scores = cross_val_score(clf, datasets[i][0], datasets[i][1], cv=10)
    print(i, ":\t", "%.4f" % np.mean(scores), ", std: ", "%.4f" % np.std(scores), ", time: %8.4fs" % (time()-t0), sep='')

iris:	0.7067, std: 0.0611, time:   0.0150s
wine:	0.6134, std: 0.1230, time:   0.0160s
cancer:	0.8964, std: 0.0345, time:   0.0100s
isolet:	0.9433, std: 0.0181, time:  19.3106s
mnist:	0.8671, std: 0.0123, time:  75.9903s


#### **Parámetro $\delta$**

##### Dataset iris

In [29]:
t0 = time()
clf = Perceptron(eta0=0.5, early_stopping=True, random_state=0)
scores = cross_val_score(clf, X_iris, y_iris, cv=10)
print("iris:\t", "%.4f" % np.mean(scores), ", std: ", "%.4f" % np.std(scores), ", time: %.4fs" % (time()-t0), sep='')

iris:	0.6667, std: 0.0000, time: 0.0480s


***
##### Dataset wine

In [30]:
t0 = time()
clf = Perceptron(eta0=0.5, early_stopping=True, random_state=0)
scores = cross_val_score(clf, X_wine, y_wine, cv=10)
print("wine:\t", "%.4f" % np.mean(scores), ", std: ", "%.4f" % np.std(scores), ", time: %.4fs" % (time()-t0), sep='')

wine:	0.5297, std: 0.1177, time: 0.0540s


***
##### Dataset cancer

In [31]:
t0 = time()
clf = Perceptron(eta0=0.5, early_stopping=True, random_state=0)
scores = cross_val_score(clf, X_cancer, y_cancer, cv=10)
print("cancer:\t", "%.4f" % np.mean(scores), ", std: ", "%.4f" % np.std(scores), ", time: %.4fs" % (time()-t0), sep='')

cancer:	0.7823, std: 0.2063, time: 0.0300s


***
##### Dataset isolet

In [32]:
t0 = time()
clf = Perceptron(eta0=0.5, early_stopping=True, random_state=0)
scores = cross_val_score(clf, X_isolet, y_isolet, cv=10)
print("isolet:\t", "%.4f" % np.mean(scores), ", std: ", "%.4f" % np.std(scores), ", time: %.4fs" % (time()-t0), sep='')

isolet:	0.9259, std: 0.0147, time: 14.6223s


***
##### Dataset mnist

In [33]:
t0 = time()
clf = Perceptron(eta0=0.5, early_stopping=True, random_state=0)
scores = cross_val_score(clf, X_mnist, y_mnist, cv=10)
print("mnist:\t", "%.4f" % np.mean(scores), ", std: ", "%.4f" % np.std(scores), ", time: %.4fs" % (time()-t0), sep='')

mnist:	0.8687, std: 0.0144, time: 69.6371s


***
#### **Resumen de resultados**

|dataset|ratio de aprendizaje ($\delta$)|fraccion de validación interna|tasa de aciertos|
|---|---|---|---|
|iris|default|default|0.6667|
|wine|default|default|0.5297|
|cancer|default|default|0.7823|
|isolet|default|default|0.9259|
|mnist|default|default|0.8687|


## **Ensemble del clasificador.**