# Deep learning (Intro)

## Preparación de ambiente

### Carga de módulos

In [1]:
# Data wrangling
import h5py
import numpy as np
import pandas as pd

# Plotting
import cufflinks as cf

# Modeling
from keras import metrics, Input
from sklearn.neural_network import MLPClassifier
from keras.models import Sequential, Model, load_model
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from keras.layers import Embedding, Dense, Flatten, Dropout, Conv2D, MaxPooling2D

# Preprocessing
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler

# Model performance
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score

cf.go_offline()

### Funciones relevantes

In [2]:
def print_data(data):
    for row in data:
        print(''.join('{:3}'.format(value) for value in row))

## Data Wrangling

### Carga de datos

In [3]:
df = pd.read_csv("letters.csv")

In [4]:
#La primer columna contiene índices
df.columns[0]

'Unnamed: 0'

### EDA

In [5]:
df.drop(columns='Unnamed: 0', inplace=True)
df

Unnamed: 0,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,1x10,...,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28,letter
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,W
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,G
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,P
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,O
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124795,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,B
124796,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,R
124797,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M
124798,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,O


In [6]:
for _ in range(5):
    letters_sample = df.sample()
    print(letters_sample["letter"].values[0])
    letters_sample = letters_sample.drop(columns=["letter"]).to_numpy().reshape((28, 28))
    print_data(letters_sample)

J
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 18 74  7  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 37197 34  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 37229 82  2  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 37247172 21  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 37250215 37  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 32245217 37  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  9220217 37  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  3183220 51  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2170242112  3  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0129234 95  3

### Limpieza básica

In [7]:
# Missings
df.isnull().sum().sum()

0

In [8]:
# Duplicados
duplicate_rows = df[df.duplicated()].index
duplicate_rows

Int64Index([32048, 87685, 95535, 97607, 102354], dtype='int64')

In [9]:
df.drop(duplicate_rows, inplace=True)

### Comprobando que todos los valores se encuentran entre 0 y 255

In [10]:
#Buscando si existe alguna fila que contenga un valor menor a 0
df[(df.loc[:,df.columns != 'letter'] < 0).any(axis=1)]

Unnamed: 0,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,1x10,...,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28,letter


In [11]:
#Buscando si existe alguna fila que contenga un valor mayor a 255
df[(df.loc[:,df.columns != 'letter'] > 255).any(axis=1)]

Unnamed: 0,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,1x10,...,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28,letter


No existen valores fuera de rango.

### Eliminando columnas univariantes

In [12]:
def get_univariate(df):
    univariate_col = []
    for col in df.columns:
        if len(np.unique(df[col])) == 1:
            univariate_col.append(col)
    return univariate_col

In [13]:
univariate_columns = get_univariate(df)
len(univariate_columns)

36

In [14]:
df.drop(columns=univariate_columns, inplace=True)
df

Unnamed: 0,1x5,1x6,1x7,1x8,1x9,1x10,1x11,1x15,1x16,1x17,...,28x6,28x9,28x10,28x13,28x15,28x16,28x17,28x18,28x19,letter
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,W
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,G
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,P
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,O
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124795,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,B
124796,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,R
124797,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M
124798,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,O


### Eliminando columnas de baja variabilidad

In [15]:
def get_low_std(df):
    low_std_col = []
    for col in df.columns:
        if df[col].std() < 1:
            low_std_col.append(col)
    return low_std_col

In [16]:
low_std_columns = get_low_std(df.loc[:,df.columns != 'letter'])
len(low_std_columns)

70

In [17]:
df.drop(columns=low_std_columns, inplace=True)
df

Unnamed: 0,2x5,2x6,2x7,2x8,2x9,2x10,2x11,2x12,2x13,2x14,...,27x17,27x18,27x19,27x20,27x21,27x22,27x23,27x24,27x25,letter
0,3,4,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,W
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,G
2,0,0,0,0,0,0,0,0,0,20,...,0,0,0,0,0,0,0,0,0,P
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,O
4,0,0,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124795,32,8,0,0,0,0,0,0,0,0,...,37,37,37,32,5,2,0,0,0,B
124796,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,R
124797,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,M
124798,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,O


### Buscando registros univariantes (imagenes con el mismo valor en cada pixel)

In [18]:
def get_univariate_row(df):
    df = df.transpose()
    univariate_col = []
    for col in df.columns:
        if len(np.unique(df[col])) == 1:
            univariate_col.append(col)
    return univariate_col

In [19]:
get_univariate_row(df.loc[:,df.columns != 'letter'])

[]

### Preparación de sets

In [20]:
#obteniendo muestra
df_sample = df.sample(52_000, random_state=42)

In [21]:
X = df_sample[[x for x in df.columns if x!= "letter"]].astype(float).to_numpy()
y = df_sample["letter"].to_numpy()

In [22]:
df["letter"].value_counts(True)

Y    0.038463
K    0.038463
O    0.038463
G    0.038463
Q    0.038463
X    0.038463
V    0.038463
M    0.038463
A    0.038463
I    0.038463
H    0.038463
B    0.038463
U    0.038463
N    0.038463
C    0.038463
E    0.038463
P    0.038463
D    0.038463
T    0.038463
S    0.038463
L    0.038463
Z    0.038455
R    0.038455
F    0.038455
J    0.038455
W    0.038455
Name: letter, dtype: float64

In [23]:
print(len(np.unique(y)))
y

26


array(['C', 'O', 'L', ..., 'J', 'U', 'J'], dtype=object)

In [24]:
#pasando de letras a números (A= 0 ... Z=26)
y =[num-65 for num in map(ord,y)]
y

[2,
 14,
 11,
 15,
 15,
 4,
 24,
 10,
 7,
 7,
 8,
 16,
 19,
 23,
 9,
 21,
 8,
 2,
 6,
 13,
 12,
 3,
 21,
 6,
 19,
 19,
 19,
 17,
 15,
 19,
 2,
 16,
 8,
 16,
 21,
 3,
 3,
 17,
 14,
 15,
 18,
 15,
 18,
 25,
 18,
 18,
 3,
 24,
 0,
 1,
 16,
 0,
 18,
 6,
 14,
 14,
 17,
 7,
 7,
 15,
 15,
 1,
 23,
 23,
 24,
 2,
 4,
 10,
 3,
 12,
 19,
 9,
 6,
 25,
 13,
 8,
 5,
 18,
 9,
 22,
 17,
 12,
 17,
 19,
 19,
 4,
 25,
 23,
 25,
 4,
 24,
 5,
 13,
 3,
 25,
 6,
 10,
 15,
 24,
 0,
 11,
 23,
 0,
 12,
 13,
 16,
 6,
 0,
 15,
 15,
 5,
 8,
 3,
 4,
 10,
 15,
 15,
 10,
 24,
 20,
 2,
 10,
 4,
 2,
 11,
 24,
 10,
 8,
 3,
 3,
 24,
 8,
 3,
 16,
 15,
 19,
 14,
 15,
 18,
 21,
 24,
 21,
 4,
 18,
 19,
 23,
 8,
 19,
 22,
 11,
 2,
 0,
 22,
 22,
 9,
 16,
 2,
 2,
 6,
 4,
 10,
 11,
 23,
 24,
 23,
 2,
 21,
 0,
 22,
 14,
 22,
 25,
 19,
 7,
 9,
 9,
 17,
 21,
 24,
 3,
 8,
 12,
 6,
 22,
 13,
 13,
 1,
 7,
 12,
 17,
 11,
 25,
 2,
 10,
 3,
 17,
 20,
 22,
 3,
 22,
 5,
 24,
 16,
 23,
 21,
 10,
 16,
 23,
 18,
 7,
 11,
 0,
 21,
 0,
 5,
 18,

In [25]:
y = to_categorical(y)

In [26]:
y

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

### Escalado de datos

In [28]:
sc = MinMaxScaler()

In [29]:
Xs = sc.fit_transform(X_train)

In [30]:
Xs

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

## Modelado

### Generación de modelo

In [31]:
model = Sequential()

In [32]:
X_train.shape

(39000, 678)

### Capa de entrada

In [33]:
X_train.shape[1]

678

In [34]:
model.add(Input(shape=(X_train.shape[1],)))



In [35]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


### Capas ocultas

In [36]:
model.add(Dense(20, activation="tanh"))
model.add(Dense(40, activation="relu"))
model.add(Dense(60, activation="tanh"))
model.add(Dense(40, activation="sigmoid"))
model.add(Dense(20, activation="relu"))

### Capa de salida

In [37]:
# # Clasificación binaria
# model.add(Dense(1, activation="sigmoid"))

In [38]:
# # Regresión
# model.add(Dense(1, activation="linear"))

In [39]:
model.add(Dense(26, activation="softmax"))

In [40]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                13580     
_________________________________________________________________
dense_1 (Dense)              (None, 40)                840       
_________________________________________________________________
dense_2 (Dense)              (None, 60)                2460      
_________________________________________________________________
dense_3 (Dense)              (None, 40)                2440      
_________________________________________________________________
dense_4 (Dense)              (None, 20)                820       
_________________________________________________________________
dense_5 (Dense)              (None, 26)                546       
Total params: 20,686
Trainable params: 20,686
Non-trainable params: 0
____________________________________________________

### Configuración del modelo

* Regression Loss Functions
 * Mean Squared Error Loss ("mean_squared_error")
 * Mean Squared Logarithmic Error Loss ("mean_squared_logarithmic_error")
 * Mean Absolute Error Loss ("mean_absolute_error")
* Binary Classification Loss Functions
 * Binary Cross-Entropy ("binary_crossentropy")
 * Hinge Loss ("hinge")
 * Squared Hinge Loss ("squared_hinge")
* Multi-Class Classification Loss Functions
 * Multi-Class Cross-Entropy Loss ("categorical_crossentropy")
 * Sparse Multiclass Cross-Entropy Loss ("sparse_categorical_crossentropy")
 * Kullback Leibler Divergence Loss ("kullback_leibler_divergence")

In [41]:
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics=["accuracy"])

### Entrenamiento del modelo

In [42]:
Xs.shape

(39000, 678)

In [43]:
Xs

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [44]:
model.fit(x = Xs, y=y_train, epochs = 100, batch_size = 100, validation_data=(sc.transform(X_test), y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f3a7fbb4040>

### Análisis de resultados

In [45]:
dc_history = model.history.history

In [46]:
dc_history.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

In [47]:
list(zip(dc_history["accuracy"], dc_history["val_accuracy"]))

[(0.37148717045783997, 0.5712307691574097),
 (0.6247949004173279, 0.6629999876022339),
 (0.6914358735084534, 0.698846161365509),
 (0.7207179665565491, 0.7197692394256592),
 (0.7435128092765808, 0.7311538457870483),
 (0.7584359049797058, 0.7480000257492065),
 (0.7714871764183044, 0.7549999952316284),
 (0.7833333611488342, 0.7715384364128113),
 (0.7929487228393555, 0.7647692561149597),
 (0.8017435669898987, 0.7802307605743408),
 (0.8077436089515686, 0.785076916217804),
 (0.8099230527877808, 0.7857692241668701),
 (0.8190512657165527, 0.7896153926849365),
 (0.822282075881958, 0.7827692031860352),
 (0.8301538228988647, 0.7884615659713745),
 (0.8315641283988953, 0.7914615273475647),
 (0.8362563848495483, 0.7943077087402344),
 (0.8371794819831848, 0.7973076701164246),
 (0.8411282300949097, 0.795769214630127),
 (0.8435384631156921, 0.7943845987319946),
 (0.843999981880188, 0.7997692227363586),
 (0.8474358916282654, 0.8019999861717224),
 (0.8501538634300232, 0.7989230751991272),
 (0.85225641727

In [48]:
accuracy = pd.DataFrame(data=zip(dc_history["accuracy"], dc_history["val_accuracy"]), columns=["accuracy", "val_accuracy"])

In [49]:
accuracy.iplot(kind="scatter")

### Predicción

In [50]:
model.predict_classes(Xs)


`model.predict_classes()` is deprecated and will be removed after 2021-01-01. Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).



array([24,  8, 13, ..., 11, 18, 20])

In [51]:
model.predict(Xs)

array([[2.05370121e-09, 1.36707987e-08, 7.27776450e-10, ...,
        3.77262640e-03, 9.95077729e-01, 3.92899260e-08],
       [4.39631123e-07, 2.85576229e-09, 1.50751758e-10, ...,
        1.58493785e-05, 1.39741605e-05, 3.97310203e-08],
       [2.03019499e-05, 1.21343573e-05, 3.25333431e-08, ...,
        4.71718735e-08, 4.91660387e-07, 1.97373110e-13],
       ...,
       [1.03776117e-07, 2.08354534e-09, 1.42066545e-08, ...,
        8.42546942e-05, 1.36162314e-04, 2.18466266e-06],
       [8.97850058e-11, 2.40129139e-10, 1.22316879e-09, ...,
        1.56019350e-15, 4.84920228e-08, 7.28170135e-10],
       [5.82553403e-05, 9.27462452e-07, 6.95348854e-06, ...,
        5.54214949e-08, 3.57560140e-07, 2.51950723e-07]], dtype=float32)

In [52]:
list(map(np.argmax, model.predict(Xs)))

[24,
 8,
 13,
 11,
 17,
 13,
 20,
 8,
 20,
 9,
 5,
 7,
 9,
 13,
 22,
 16,
 25,
 24,
 7,
 19,
 19,
 2,
 14,
 11,
 20,
 0,
 22,
 1,
 9,
 12,
 19,
 13,
 15,
 11,
 20,
 14,
 22,
 16,
 11,
 5,
 6,
 19,
 17,
 19,
 20,
 3,
 10,
 25,
 1,
 7,
 13,
 17,
 8,
 10,
 7,
 9,
 3,
 18,
 12,
 7,
 5,
 7,
 6,
 9,
 22,
 4,
 6,
 3,
 9,
 9,
 6,
 13,
 1,
 11,
 14,
 18,
 25,
 2,
 3,
 1,
 13,
 6,
 16,
 19,
 8,
 18,
 24,
 15,
 10,
 5,
 25,
 22,
 14,
 1,
 17,
 16,
 9,
 3,
 0,
 20,
 21,
 1,
 14,
 17,
 11,
 21,
 7,
 1,
 5,
 1,
 23,
 19,
 4,
 5,
 7,
 16,
 15,
 24,
 22,
 15,
 15,
 0,
 19,
 6,
 17,
 22,
 24,
 13,
 11,
 21,
 12,
 8,
 1,
 9,
 7,
 1,
 24,
 11,
 22,
 22,
 3,
 14,
 3,
 6,
 20,
 18,
 21,
 6,
 12,
 22,
 22,
 14,
 25,
 0,
 9,
 5,
 14,
 19,
 22,
 8,
 13,
 14,
 17,
 11,
 11,
 23,
 2,
 10,
 17,
 25,
 19,
 1,
 8,
 21,
 5,
 5,
 12,
 1,
 8,
 2,
 22,
 14,
 25,
 5,
 5,
 13,
 10,
 5,
 12,
 0,
 21,
 16,
 16,
 4,
 22,
 16,
 15,
 20,
 8,
 13,
 3,
 6,
 1,
 3,
 7,
 22,
 10,
 23,
 6,
 6,
 7,
 1,
 17,
 15,
 20,
 23,
 17,
 0,

In [53]:
list(map(np.argmax, y_train))

[24,
 8,
 13,
 8,
 17,
 13,
 20,
 8,
 20,
 9,
 5,
 7,
 9,
 13,
 22,
 16,
 25,
 24,
 7,
 19,
 19,
 2,
 14,
 11,
 20,
 0,
 22,
 1,
 9,
 12,
 19,
 13,
 15,
 11,
 20,
 14,
 22,
 16,
 11,
 5,
 6,
 19,
 17,
 19,
 20,
 1,
 10,
 25,
 1,
 7,
 13,
 17,
 8,
 10,
 7,
 9,
 3,
 18,
 12,
 7,
 5,
 7,
 6,
 9,
 22,
 4,
 6,
 3,
 9,
 8,
 16,
 13,
 1,
 11,
 14,
 18,
 19,
 2,
 3,
 1,
 13,
 6,
 6,
 19,
 8,
 18,
 24,
 15,
 10,
 5,
 25,
 22,
 14,
 1,
 17,
 16,
 9,
 3,
 0,
 20,
 21,
 1,
 14,
 17,
 11,
 21,
 7,
 1,
 5,
 1,
 23,
 5,
 4,
 5,
 7,
 16,
 15,
 24,
 22,
 15,
 15,
 0,
 19,
 6,
 17,
 22,
 24,
 13,
 11,
 21,
 12,
 8,
 3,
 9,
 7,
 1,
 24,
 11,
 22,
 22,
 3,
 14,
 3,
 16,
 20,
 18,
 21,
 15,
 12,
 22,
 22,
 14,
 25,
 0,
 9,
 5,
 14,
 19,
 22,
 8,
 13,
 1,
 17,
 11,
 11,
 23,
 2,
 10,
 17,
 25,
 19,
 1,
 8,
 21,
 5,
 5,
 12,
 4,
 11,
 2,
 22,
 14,
 25,
 5,
 5,
 13,
 10,
 5,
 12,
 0,
 21,
 16,
 16,
 4,
 22,
 16,
 15,
 20,
 11,
 13,
 16,
 16,
 1,
 3,
 7,
 22,
 10,
 23,
 16,
 6,
 0,
 1,
 17,
 15,
 20,
 23,
 17,

In [54]:
accuracy_score(y_pred=model.predict_classes(Xs), y_true=list(map(np.argmax, y_train)))

0.9167692307692308

In [55]:
accuracy_score(y_pred=model.predict_classes(sc.transform(X_test)), y_true=list(map(np.argmax, y_test)))


`model.predict_classes()` is deprecated and will be removed after 2021-01-01. Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).



0.7940769230769231

In [56]:
validate = pd.read_csv("digits_test_sample.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'digits_test_sample.csv'

In [None]:
X_val = validate[[x for x in validate.columns if x!= "label"]].astype(float).to_numpy()
y_val = validate["label"].to_numpy()

In [None]:
accuracy_score(y_pred=model.predict_classes(sc.transform(X_val)), y_true=list(map(np.argmax, to_categorical(y_val))))

### Preservación del modelo

In [None]:
model.save('./deep_model.h5')

In [None]:
new_model = load_model('./deep_model.h5')

In [None]:
f = h5py.File('./deep_model.h5', 'r')
print(list(f.keys()))