In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from sklearn import metrics
import matplotlib.pyplot as plt
%matplotlib inline

# Deep Learning
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense , Dropout , Lambda, Flatten
from keras import  backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import BatchNormalization, Convolution2D , MaxPooling2D
from keras.preprocessing import image
from keras.layers import Convolution2D, MaxPooling2D

pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
train = pd.read_csv('./train.csv', encoding='utf8')
test = pd.read_csv('./test.csv', encoding='utf8')
train_column_headers = list(train.columns.values)[1:]

# Analisis Exploratorio

In [None]:
exploratory = train.copy()
threshold = 15

for column in train_column_headers:
  exploratory.loc[exploratory[column] < threshold, column] = 0
  exploratory.loc[exploratory[column] >= threshold, column] = 1

In [None]:
# Cantidad de pixeles oscuros vs claros
train.head(1).transpose().value_counts()

In [None]:
x_train = (exploratory.iloc[:,1:].values).astype('float32')
y_train = exploratory.iloc[:,0].values.astype('int32')

In [None]:
# Convertir dataset a (numero de imagenes, pixeles ancho, pixeles largo) 
x_train = x_train.reshape(x_train.shape[0], 28, 28)

plt.subplot(331)
plt.imshow(x_train[1], cmap=plt.get_cmap('gray'))
plt.title(y_train[1])

In [None]:
# Se encuentran cuantos pixels blancos hay en cada imagen
exploratory = exploratory.astype('int32')

pixels = exploratory.sum(axis=1)
white_pixels = pd.DataFrame(data={'label': exploratory['label'], 'pixels': pixels})
white_pixels.groupby(by='label').describe()

In [None]:
white_pixels['label'].value_counts()

In [None]:
plt.rcParams["figure.figsize"] = (20, 20)
white_pixels.groupby(by='label').boxplot(column='pixels')
plt.rcParams["figure.figsize"] = (6, 4)

# Redes Neuronales

In [None]:
x_train = (train.iloc[:,1:].values).astype('float32')
y_train = train.iloc[:,0].values.astype('int32')
x_test = test.values.astype('float32')

In [None]:
#Convert train datset to (num_images, img_rows, img_cols) format 
x_train = x_train.reshape(x_train.shape[0], 28, 28)

for i in range(0, 9):
    plt.subplot(330 + (i+1))
    plt.imshow(x_train[i], cmap=plt.get_cmap('gray'))
    plt.title(y_train[i])

In [None]:
x_train = x_train.reshape(x_train.shape[0], 28, 28,1)
x_train.shape

In [None]:
x_test = x_test.reshape(x_test.shape[0], 28, 28,1)
x_test.shape

In [None]:
mean_px = x_train.mean().astype(np.float32)
std_px = x_train.std().astype(np.float32)

def standardize(x): 
  return (x-mean_px)/std_px

In [None]:
y_train = to_categorical(y_train)
num_classes = y_train.shape[1]
num_classes

In [None]:
seed = 43
np.random.seed(seed)

In [None]:
model = Sequential()
model.add(Lambda(standardize, input_shape=(28,28,1)))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
print('input shape', model.input_shape)
print('output shape', model.output_shape)

In [None]:
model.compile(
  optimizer='rmsprop',
  loss='categorical_crossentropy',
  metrics=['accuracy']
)
gen = image.ImageDataGenerator()

In [None]:
x = x_train
y = y_train
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.20, random_state=42)

In [None]:
history=model.fit(
  x=x_train,
  y=y_train,
  epochs=6,
  batch_size=16,
  validation_split=0.1,
  verbose=2
)

In [None]:
history_dict = history.history
history_dict.keys()

In [None]:
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values)
plt.plot(epochs, val_loss_values)
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.show()

In [None]:
plt.clf()   # clear figure
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']

plt.plot(epochs, acc_values)
plt.plot(epochs, val_acc_values)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

plt.show()

In [None]:
y_preds = model.predict(x_val, verbose=0)

In [None]:
y_prediction = []
for prediction in y_preds:
  max_value = max(prediction)
  max_index = list(prediction).index(max_value)
  y_prediction.append(max_index)
y_prediction = np.array(y_prediction).astype('int64')

In [None]:
y_validation = []
for value in y_val:
  max_value = max(value)
  max_index = list(value).index(max_value)
  y_validation.append(max_index)
y_validation = np.array(y_validation).astype('int64')

In [None]:
result = confusion_matrix(y_validation, y_prediction)
result

In [None]:
accuracy = metrics.accuracy_score(y_validation, y_prediction)
accuracy

# Deep Learning

In [None]:
x_train = (train.iloc[:,1:].values).astype('float32')
y_train = train.iloc[:,0].values.astype('int32')
x_test = test.values.astype('float32')

x_train = x_train.reshape(x_train.shape[0], 28, 28,1)
x_test = x_test.reshape(x_test.shape[0], 28, 28,1)

mean_px = x_train.mean().astype(np.float32)
std_px = x_train.std().astype(np.float32)

y_train = to_categorical(y_train)
num_classes = y_train.shape[1]

seed = 43
np.random.seed(seed)

In [None]:
def get_cnn_model():
    model = Sequential([
        Lambda(standardize, input_shape=(28,28,1)),
        Convolution2D(32,(3,3), activation='relu'),
        Convolution2D(32,(3,3), activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,(3,3), activation='relu'),
        Convolution2D(64,(3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
model= get_cnn_model()
model.optimizer.lr=0.01

In [None]:
x = x_train
y = y_train
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.20, random_state=42)

In [None]:
history=model.fit(
  x=x_train,
  y=y_train,
  epochs=6,
  batch_size=16,
  validation_split=0.1,
  verbose=2
)

In [None]:
gen =ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(x_train, y_train, batch_size=64)
val_batches = gen.flow(x_val, y_val, batch_size=64)

In [None]:
model.optimizer.lr=0.001
history=model.fit_generator(generator=batches, steps_per_epoch=batches.n, epochs=1, 
                    validation_data=val_batches, validation_steps=val_batches.n)

In [None]:
model.optimizer.lr=0.01
gen = image.ImageDataGenerator()
batches = gen.flow(x, y, batch_size=64)
history=model.fit_generator(generator=batches, steps_per_epoch=batches.n, epochs=3)

In [None]:
from keras.layers.normalization import BatchNormalization

def get_bn_model():
    model = Sequential([
        Lambda(standardize, input_shape=(28,28,1)),
        Convolution2D(32,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,(3,3), activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,(3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
predictions = model.predict(x_test, verbose=0)

In [None]:
y_prediction = []
for prediction in y_preds:
  max_value = max(prediction)
  max_index = list(prediction).index(max_value)
  y_prediction.append(max_index)
y_prediction = np.array(y_prediction).astype('int64')

In [None]:
y_validation = []
for value in y_val:
  max_value = max(value)
  max_index = list(value).index(max_value)
  y_validation.append(max_index)
y_validation = np.array(y_validation).astype('int64')

In [None]:
result = confusion_matrix(y_validation, y_prediction)
result

In [None]:
accuracy = metrics.accuracy_score(y_validation, y_prediction)
accuracy